Skip to content
Snippets Groups Projects
ConversionSupport.pm 41.3 KiB
Newer Older
sub sort_chromosomes {
    my ($self, $chr_hashref) = @_;
    $chr_hashref = $self->get_chrlength unless ($chr_hashref);
    throw("You have to pass a hashref of your chromosomes")
        unless ($chr_hashref and ref($chr_hashref) eq 'HASH');
    return (sort _by_chr_num keys %$chr_hashref);
}

=head2 _by_chr_num

  Example     : my @sorted = sort _by_chr_num qw(X, 6-COX, 14, 7);
  Description : Subroutine to use in sort for sorting chromosomes. Sorts
                numerically, then alphabetically
  Return type : values to be used by sort
  Exceptions  : none
  Caller      : internal ($self->sort_chromosomes)

=cut

sub _by_chr_num {
    my @awords = split /-/, $a;
    my @bwords = split /-/, $b;

    my $anum = $awords[0];
    my $bnum = $bwords[0];

    if ($anum !~ /^[0-9]*$/) {
        if ($bnum !~ /^[0-9]*$/) {
            return $anum cmp $bnum;
        } else {
            return 1;
        }
    }
    if ($bnum !~ /^[0-9]*$/) {
        return -1;
    }

    if ($anum <=> $bnum) {
        return $anum <=> $bnum;
    } else {
        if ($#awords == 0) {
            return -1;
        } elsif ($#bwords == 0) {
            return 1;
        } else {
            return $awords[1] cmp $bwords[1];
        }
    }
}

=head2 split_chromosomes_by_size

  Arg[1]      : (optional) Int $cutoff - the cutoff in bp between small and
                large chromosomes
  Example     : my $chr_slices = $support->split_chromosomes_by_size;
                foreach my $block_size (keys %{ $chr_slices }) {
                    print "Chromosomes with blocksize $block_size: ";
                    print join(", ", map { $_->seq_region_name }
                                        @{ $chr_slices->{$block_size} });
                }
  Description : Determines block sizes for storing DensityFeatures on
                chromosomes, and return slices for each chromosome. The block
                size is determined so that you have 150 bins for the smallest
                chromosome over 5 Mb in length. For chromosomes smaller than 5
                Mb, an additional smaller block size is used to yield 150 bins
                for the overall smallest chromosome. This will result in
                reasonable resolution for small chromosomes and high
                performance for big ones.
  Return type : Hashref (key: block size; value: Arrayref of chromosome
                Bio::EnsEMBL::Slices)
  Exceptions  : none
  Caller      : density scripts

=cut

sub split_chromosomes_by_size {
    my $self = shift;
    my $cutoff = shift || 5000000;
    
    my $slice_adaptor = $self->dba->get_SliceAdaptor;
    my $top_slices;
    if ($self->param('chromosomes')) {
        foreach my $chr ($self->param('chromosomes')) {
            push @{ $top_slices }, $slice_adaptor->fetch_by_region('chromosome', $chr);
        }
    } else {
        $top_slices = $slice_adaptor->fetch_all("toplevel");
    }

    my ($big_chr, $small_chr, $min_big_chr, $min_small_chr);
    foreach my $slice (@{ $top_slices }) {
        if ($slice->length < $cutoff) {
            if (! $min_small_chr or ($min_small_chr > $slice->length)) {
                $min_small_chr = $slice->length;
            }
            # push small chromosomes onto $small_chr
            push @{ $small_chr }, $slice;
        }
        if (! $min_big_chr or ($min_big_chr > $slice->length) && $slice->length > $cutoff) {
            $min_big_chr = $slice->length;
        }
        # push _all_ chromosomes onto $big_chr
        push @{ $big_chr }, $slice;
    }

    my $chr_slices;
    $chr_slices->{int($min_big_chr/150)} = $big_chr if $min_big_chr;
    $chr_slices->{int($min_small_chr/150)} = $small_chr if $min_small_chr;

    return $chr_slices;
}

=head2 log

  Arg[1]      : String $txt - the text to log
  Arg[2]      : Int $indent - indentation level for log message
  Example     : my $log = $support->log_filehandle;
                $support->log('Log foo.\n', 1);
  Description : Logs a message to the filehandle initialised by calling
                $self->log_filehandle(). You can supply an indentation level
                to get nice hierarchical log messages.
  Return type : true on success
  Exceptions  : thrown when no filehandle can be obtained
  Caller      : general

=cut

sub log {
    my ($self, $txt, $indent) = @_;
    $indent ||= 0;
    
    # strip off leading linebreaks so that indenting doesn't break
    $txt =~ s/^(\n*)//;
    
    $txt = $1."    "x$indent . $txt;
    my $fh = $self->{'_log_filehandle'};
    throw("Unable to obtain log filehandle") unless $fh;
    print $fh "$txt";
    return(1);
}

=head2 log_warning

  Arg[1]      : String $txt - the warning text to log
  Arg[2]      : Int $indent - indentation level for log message
  Example     : my $log = $support->log_filehandle;
                $support->log_warning('Log foo.\n', 1);
  Description : Logs a message via $self->log and increases the warning counter.
  Return type : true on success
  Exceptions  : none
  Caller      : general

=cut

sub log_warning {
    my ($self, $txt, $indent) = @_;
    $txt = "WARNING: " . $txt;
    $self->log($txt, $indent);
    $self->{'_warnings'}++;
    return(1);
}

=head2 log_error

  Arg[1]      : String $txt - the error text to log
  Arg[2]      : Int $indent - indentation level for log message
  Example     : my $log = $support->log_filehandle;
                $support->log_error('Log foo.\n', 1);
  Description : Logs a message via $self->log and exits the script.
  Return type : none
  Exceptions  : none
  Caller      : general

=cut

sub log_error {
    my ($self, $txt, $indent) = @_;
    $txt = "ERROR: ".$txt;
    $self->log($txt, $indent);
    $self->log("Exiting.\n");
    exit;
}

=head2 log_verbose

  Arg[1]      : String $txt - the warning text to log
  Arg[2]      : Int $indent - indentation level for log message
  Example     : my $log = $support->log_filehandle;
                $support->log_verbose('Log this verbose message.\n', 1);
  Description : Logs a message via $self->log if --verbose option was used
  Return type : TRUE on success, FALSE if not verbose
  Exceptions  : none
  Caller      : general

=cut

sub log_verbose {
    my ($self, $txt, $indent) = @_;

    return(0) unless $self->param('verbose');

    $self->log($txt, $indent);
    return(1);
}

=head2 log_stamped

  Arg[1]      : String $txt - the warning text to log
  Arg[2]      : Int $indent - indentation level for log message
  Example     : my $log = $support->log_filehandle;
                $support->log_stamped('Log this stamped message.\n', 1);
  Description : Appends timestamp and memory usage to a message and logs it via
                $self->log
  Return type : TRUE on success
  Exceptions  : none
  Caller      : general

=cut

sub log_stamped {
    my ($self, $txt, $indent) = @_;
    
    # append timestamp and memory usage to log text
    $txt =~ s/(\n*)$//;
    $txt .= " ".$self->date_and_mem.$1;
    
    $self->log($txt, $indent);
    return(1);
}

  Arg[1]      : (optional) String $mode - file access mode
  Example     : my $log = $support->log_filehandle;
                # print to the filehandle
                print $log 'Lets start logging...\n';
                # log via the wrapper $self->log()
                $support->log('Another log message.\n');
  Description : Returns a filehandle for logging (STDERR by default, logfile if
                set from config or commandline). You can use the filehandle
                directly to print to, or use the smart wrapper $self->log().
                Logging mode (truncate or append) can be set by passing the
                mode as an argument to log_filehandle(), or with the
                --logappend commandline option (default: truncate)
  Return type : Filehandle - the filehandle to log to
  Exceptions  : thrown if logfile can't be opened
  Caller      : general

=cut

sub log_filehandle {
    my ($self, $mode) = @_;
    $mode ||= '>';
    $mode = '>>' if ($self->param('logappend'));
    my $fh = \*STDERR;
    if (my $logfile = $self->param('logfile')) {
        if (my $logpath = $self->param('logpath')) {
            unless (-e $logpath) {
                system("mkdir $logpath") == 0 or
                    $self->log_error("Can't create log dir $logpath: $!\n");
            }
        open($fh, "$mode", $logfile) or throw(
            "Unable to open $logfile for writing: $!");
    }
    $self->{'_log_filehandle'} = $fh;
    return $self->{'_log_filehandle'};
}

=head2 filehandle

  Arg[1]      : String $mode - file access mode
  Arg[2]      : String $file - input or output file
  Example     : my $fh = $support->filehandle('>>', '/path/to/file');
                # print to the filehandle
                print $fh 'Your text goes here...\n';
  Description : Returns a filehandle (*STDOUT for writing, *STDIN for reading
                by default) to print to or read from.
  Return type : Filehandle - the filehandle
  Exceptions  : thrown if file can't be opened
  Caller      : general

=cut

sub filehandle {
    my ($self, $mode, $file) = @_;
    $mode ||= ">";
    my $fh;
    if ($file) {
        open($fh, "$mode", $file) or throw(
            "Unable to open $file for writing: $!");
    } elsif ($mode =~ />/) {
        $fh = \*STDOUT;
    } elsif ($mode =~ /</) {
        $fh = \*STDIN;
    }
    return $fh;
}

  Example     : $support->init_log;
  Description : Opens a filehandle to the logfile and prints some header
                information to this file. This includes script name, date, user
                running the script and parameters the script will be running
                with.
  Return type : Filehandle - the log filehandle
  Exceptions  : none
  Caller      : general

=cut

sub init_log {
    my $self = shift;

    # get a log filehandle
    my $log = $self->log_filehandle;

    # print script name, date, user who is running it
    my $hostname = `hostname`;
    chomp $hostname;
    my $script = "$hostname:$Bin/$Script";
    my $user = `whoami`;
    chomp $user;
    $self->log("Script: $script\nDate: ".$self->date."\nUser: $user\n");

    # print parameters the script is running with
    $self->log("Parameters:\n\n");
    $self->log($self->list_all_params);
    # remember start time
    $self->{'_start_time'} = time;

    return $log;
  Example     : $support->finish_log;
  Description : Writes footer information to a logfile. This includes the
                number of logged warnings, timestamp and memory footprint.
  Return type : TRUE on success
  Exceptions  : none
  Caller      : general

=cut

sub finish_log {
    my $self = shift;
Patrick Meidl's avatar
Patrick Meidl committed
    $self->log("\nAll done. ".$self->warnings." warnings. ");
    if ($self->{'_start_time'}) {
        $self->log("Runtime ");
        my $diff = time - $self->{'_start_time'};
        my $sec = $diff % 60;
        $diff = ($diff - $sec) / 60;
        my $min = $diff % 60;
        my $hours = ($diff - $min) / 60;
        $self->log("${hours}h ${min}min ${sec}sec ");
    }
    $self->log($self->date_and_mem."\n\n");
    return(1);
}

=head2 date_and_mem

  Example     : print LOG "Time, memory usage: ".$support->date_and_mem."\n";
  Description : Prints a timestamp and the memory usage of your script.
  Return type : String - timestamp and memory usage
  Exceptions  : none
  Caller      : general

=cut

sub date_and_mem {
    my $date = strftime "%Y-%m-%d %T", localtime;
    my $mem = `ps -p $$ -o vsz |tail -1`;
    chomp $mem;
    return "[$date, mem $mem]";
}

=head2 date

  Example     : print "Date: " . $support->date . "\n";
  Description : Prints a nicely formatted timestamp (YYYY-DD-MM hh:mm:ss)
  Return type : String - the timestamp
  Exceptions  : none
  Caller      : general

=cut

sub date {
    return strftime "%Y-%m-%d %T", localtime;
}

=head2 mem

  Example     : print "Memory usage: " . $support->mem . "\n";
  Description : Prints the memory used by your script. Not sure about platform
                dependence of this call ...
  Return type : String - memory usage
  Exceptions  : none
  Caller      : general

=cut

sub mem {
    my $mem = `ps -p $$ -o vsz |tail -1`;