From ed2f3ec07faa0909937c5bfea9c595eeb0e74aaf Mon Sep 17 00:00:00 2001
From: Patrick Meidl <pm2@sanger.ac.uk>
Date: Tue, 4 Jul 2006 14:14:51 +0000
Subject: [PATCH] some script utilities (configuration parsing, logging and
 various other stuff)

---
 modules/Bio/EnsEMBL/Utils/ConfParser.pm  | 586 ++++++++++++++++++++++
 modules/Bio/EnsEMBL/Utils/Logger.pm      | 611 +++++++++++++++++++++++
 modules/Bio/EnsEMBL/Utils/ScriptUtils.pm | 184 +++++++
 3 files changed, 1381 insertions(+)
 create mode 100644 modules/Bio/EnsEMBL/Utils/ConfParser.pm
 create mode 100644 modules/Bio/EnsEMBL/Utils/Logger.pm
 create mode 100644 modules/Bio/EnsEMBL/Utils/ScriptUtils.pm

diff --git a/modules/Bio/EnsEMBL/Utils/ConfParser.pm b/modules/Bio/EnsEMBL/Utils/ConfParser.pm
new file mode 100644
index 0000000000..619a99d564
--- /dev/null
+++ b/modules/Bio/EnsEMBL/Utils/ConfParser.pm
@@ -0,0 +1,586 @@
+package Bio::EnsEMBL::Utils::ConfParser;
+
+=head1 NAME
+
+Bio::EnsEMBL::Utils::ConversionSupport - Utility module for Vega release and
+schema conversion scripts
+
+=head1 SYNOPSIS
+
+    my $serverroot = '/path/to/ensembl';
+    my $suport = new Bio::EnsEMBL::Utils::ConversionSupport($serverroot);
+
+    # parse common options
+    $support->parse_common_options;
+
+    # parse extra options for your script
+    $support->parse_extra_options('string_opt=s', 'numeric_opt=n');
+
+    # ask user if he wants to run script with these parameters
+    $support->confirm_params;
+
+    # see individual method documentation for more stuff
+
+=head1 DESCRIPTION
+
+This module is a collection of common methods and provides helper functions 
+for the Vega release and schema conversion scripts. Amongst others, it reads
+options from a config file, parses commandline options and does logging.
+
+=head1 LICENCE
+
+This code is distributed under an Apache style licence:
+Please see http://www.ensembl.org/code_licence.html for details
+
+=head1 AUTHOR
+
+Patrick Meidl <pm2@sanger.ac.uk>
+
+=head1 CONTACT
+
+Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk
+
+=cut
+
+use strict;
+use warnings;
+no warnings 'uninitialized';
+
+use Getopt::Long;
+use Text::Wrap;
+use Cwd qw(abs_path);
+use Bio::EnsEMBL::Utils::Argument qw(rearrange);
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+use Bio::EnsEMBL::Utils::ScriptUtils qw(user_proceed);
+use Bio::EnsEMBL::Utils::Logger;
+
+
+=head2 new
+
+  Arg[SERVERROOT] : String $serverroot
+                root directory of your ensembl code
+  Example     : my $support = new Bio::EnsEMBL::Utils::ConversionSupport(
+                                        '/path/to/ensembl');
+  Description : constructor
+  Return type : Bio::EnsEMBL::Utils::ConversionSupport object
+  Exceptions  : thrown if no serverroot is provided
+  Caller      : general
+
+=cut
+
+sub new {
+  my $class = shift;
+
+  my ($serverroot) = rearrange(['SERVERROOT'], @_);
+
+  throw("You must supply a serverroot.") unless ($serverroot);
+
+  my $self = {
+      '_serverroot'   => $serverroot,
+      '_param'        => { interactive => 1 },
+  };
+
+  bless ($self, $class);
+
+  return $self;
+}
+
+
+=head2 parse_common_options
+
+  Example     : $support->parse_common_options;
+  Description : This method reads options from a configuration file and parses
+                some commandline options that are common to all scripts (like
+                db connection settings, help, dry-run). Commandline options
+                will override config file settings. 
+
+                All options will be accessible via $self->param('name').
+  Return type : true on success 
+  Exceptions  : thrown if configuration file can't be opened
+  Caller      : general
+
+=cut
+
+sub parse_common_options {
+  my $self = shift;
+
+  # read commandline options
+  my %h;
+  Getopt::Long::Configure("pass_through");
+  &GetOptions( \%h,
+      'conffile|conf=s',
+      'logfile|log=s',
+      'logpath=s',
+      'logappend|log_append=s',
+      'is_component=s',
+      'verbose|v=s',
+      'interactive|i=s',
+      'dry_run|dry|n=s',
+      'help|h|?',
+      );
+
+  # reads config file
+  my $conffile = $h{'conffile'} || $self->param('default_conf') ||
+    "$ENV{HOME}/.ensembl_script.conf";
+  $conffile = abs_path($conffile);
+
+  if (-e $conffile) {
+    open(CONF, $conffile) or throw( 
+        "Unable to open configuration file $conffile for reading: $!");
+
+    my $serverroot = $self->serverroot;
+
+    while (<CONF>) {
+      chomp;
+
+      # remove comments
+      s/^[#;].*//;
+      s/\s+[;].*$//;
+
+      # read options into internal parameter datastructure
+      next unless (/(\w\S*)\s*=\s*(.*)/);
+      my $name = $1;
+      my $val = $2;
+      if ($val =~ /\$SERVERROOT/) {
+        $val =~ s/\$SERVERROOT/$serverroot/g;
+        $val = abs_path($val);
+      }
+      $self->param($name, $val);
+    }
+
+    $self->param('conffile', $conffile);
+  }
+
+  # override configured parameter with commandline options
+  map { $self->param($_, $h{$_}) } keys %h;
+  return(1);
+}
+
+
+=head2 parse_extra_options
+
+  Arg[1-N]    : option descriptors that will be passed on to Getopt::Long
+  Example     : $support->parse_extra_options('string_opt=s', 'numeric_opt=n');
+  Description : Parse extra commandline options by passing them on to
+                Getopt::Long and storing parameters in $self->param('name).
+  Return type : true on success
+  Exceptions  : none (caugth by $self->error)
+  Caller      : general
+
+=cut
+
+sub parse_extra_options {
+  my ($self, @params) = @_;
+  
+  Getopt::Long::Configure("no_pass_through");
+  
+  eval {
+    # catch warnings to pass to $self->error
+    local $SIG{__WARN__} = sub { die @_; };
+    &GetOptions(\%{ $self->{'_param'} }, @params);
+  };
+
+  # comma to list
+  foreach my $param (@params) {
+    if ($param =~ /\@$/) {
+      $param =~ s/(^\w+).*/$1/;
+      $self->comma_to_list($param);
+    }
+  }
+  
+  $self->error($@) if $@;
+  
+  return(1);
+}
+
+
+=head2 allowed_params
+
+  Arg[1-N]    : (optional) List of allowed parameters to set
+  Example     : my @allowed = $self->allowed_params(qw(param1 param2));
+  Description : Getter/setter for allowed parameters. This is used by
+                $self->confirm_params() to avoid cluttering of output with
+                conffile entries not relevant for a given script. You can use
+                $self->get_common_params() as a shortcut to set them.
+  Return type : Array - list of allowed parameters
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub allowed_params {
+  my $self = shift;
+
+  # setter
+  if (@_) {
+    @{ $self->{'_allowed_params'} } = @_;
+  }
+
+  # getter
+  if (ref($self->{'_allowed_params'}) eq 'ARRAY') {
+    return @{ $self->{'_allowed_params'} };
+  } else {
+    return ();
+  }
+}
+
+
+=head2 get_common_params
+
+  Example     : my @allowed_params = $self->get_common_params, 'extra_param';
+  Description : Returns a list of commonly used parameters in the conversion
+                scripts. Shortcut for setting allowed parameters with
+                $self->allowed_params().
+  Return type : Array - list of common parameters
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub get_common_params {
+  return qw(
+      conffile
+      logpath
+      logfile
+      logappend
+      is_component
+      verbose
+      interactive
+      dry_run
+  );
+}
+
+
+=head2 confirm_params
+
+  Example     : $support->confirm_params;
+  Description : Prints a table of parameters that were collected from config
+                file and commandline and asks user to confirm if he wants
+                to proceed.
+  Return type : true on success
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub confirm_params {
+  my $self = shift;
+
+  if ($self->param('interactive')) {
+    # print parameter table
+    print "Running script with these parameters:\n\n";
+    print $self->list_all_params;
+
+    # ask user if he wants to proceed
+    exit unless user_proceed("Continue?");
+  }
+  
+  return(1);
+}
+
+
+=head2 list_all_params
+
+  Example     : print LOG $support->list_all_params;
+  Description : prints a table of the parameters used in the script
+  Return type : String - the table to print
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub list_all_params {
+  my $self = shift;
+
+  $Text::Wrap::colums = 72;
+
+  my $txt = sprintf "    %-20s%-40s\n", qw(PARAMETER VALUE);
+  $txt .= "    " . "-"x70 . "\n";
+
+  foreach my $key ($self->allowed_params) {
+    my $val;
+    if (defined($self->param($key))) {
+      $txt .= Text::Wrap::wrap(sprintf('    %-20s', $key), ' 'x24,
+        join(", ", $self->param($key)))."\n";
+    }
+  }
+
+  $txt .= "\n";
+
+  return $txt;
+}
+
+
+=head2 create_commandline_options
+
+  Arg[1]      : Hashref $settings - hashref describing what to do
+                Allowed keys:
+                    allowed_params => 0|1   # use all allowed parameters
+                    exclude => []           # listref of parameters to exclude
+                    replace => {param => newval} # replace value of param with
+                                                 # newval
+  Example     : $support->create_commandline_options({
+                    allowed_params => 1,
+                    exclude => ['verbose'],
+                    replace => { 'dbname' => 'homo_sapiens_vega_33_35e' }
+                });
+  Description : Creates a commandline options string that can be passed to any
+                other script using ConversionSupport.
+  Return type : String - commandline options string
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub create_commandline_options {
+  my $self = shift;
+
+  my ($allowed_params, $exclude, $replace) = rearrange(
+    ['ALLOWED_PARAMS', 'EXCLUDE', 'REPLACE'], @_);
+
+  my %param_hash;
+
+  # get all allowed parameters
+  if ($allowed_params) {
+    # exclude params explicitly stated
+    my %exclude = map { $_ => 1 } @{ $exclude || [] };
+
+    foreach my $param ($self->allowed_params) {
+      unless ($exclude{$param}) {
+        my ($first, @rest) = $self->param($param);
+        next unless (defined($first));
+
+        if (@rest) {
+          $first = join(",", $first, @rest);
+        }
+        $param_hash{$param} = $first;
+      }
+    }
+
+  }
+
+  # replace values
+  foreach my $key (keys %{ $replace || {} }) {
+    $param_hash{$key} = $replace->{$key};
+  }
+
+  # create the commandline options string
+  my $options_string;
+  foreach my $param (keys %param_hash) {
+    $options_string .= sprintf("--%s %s ", $param, $param_hash{$param});
+  }
+
+  return $options_string;
+}
+
+
+=head2 check_required_params
+
+  Arg[1-N]    : List @params - parameters to check
+  Example     : $self->check_required_params(qw(dbname host port));
+  Description : Checks $self->param to make sure the requested parameters
+                have been set. Dies if parameters are missing.
+  Return type : true on success
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub check_required_params {
+  my ($self, @params) = @_;
+  
+  my @missing = ();
+  
+  foreach my $param (@params) {
+    push @missing, $param unless $self->param($param);
+  }
+  
+  if (@missing) {
+    throw("Missing parameters: @missing.\nYou must specify them on the commandline or in your conffile.\n");
+  }
+  
+  return(1);
+}
+
+
+=head2 comma_to_list
+
+  Arg[1-N]    : list of parameter names to parse
+  Example     : $support->comma_to_list('chromosomes');
+  Description : Transparently converts comma-separated lists into arrays (to
+                allow different styles of commandline options, see perldoc
+                Getopt::Long for details). Parameters are converted in place
+                (accessible through $self->param('name')).
+  Return type : true on success
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub comma_to_list {
+  my $self = shift;
+  
+  foreach my $param (@_) {
+    $self->param($param, split (/,/, join (',', $self->param($param))));
+  }
+  
+  return(1);
+}
+
+
+=head2 list_or_file
+
+  Arg[1]      : Name of parameter to parse
+  Example     : $support->list_or_file('gene_stable_id');
+  Description : Determines whether a parameter holds a list or it is a filename
+                to read the list entries from.
+  Return type : true on success
+  Exceptions  : thrown if list file can't be opened
+  Caller      : general
+
+=cut
+
+sub list_or_file {
+  my ($self, $param) = @_;
+  
+  my @vals = $self->param($param);
+  return unless (@vals);
+
+  my $firstval = $vals[0];
+  
+  if (scalar(@vals) == 1 && -e $firstval) {
+    # we didn't get a list of values, but a file to read values from
+    @vals = ();
+    
+    open(IN, $firstval) or throw("Cannot open $firstval for reading: $!");
+    
+    while(<IN>){
+      chomp;
+      push(@vals, $_);
+    }
+    
+    close(IN);
+    
+    $self->param($param, @vals);
+  }
+  
+  $self->comma_to_list($param);
+  
+  return(1);
+}
+
+
+=head2 param
+
+  Arg[1]      : Parameter name
+  Arg[2-N]    : (optional) List of values to set
+  Example     : my $dbname = $support->param('dbname');
+                $support->param('port', 3306);
+                $support->param('chromosomes', 1, 6, 'X');
+  Description : Getter/setter for parameters. Accepts single-value params and
+                list params.
+  Return type : Scalar value for single-value parameters, array of values for
+                list parameters
+  Exceptions  : thrown if no parameter name is supplied
+  Caller      : general
+
+=cut
+
+sub param {
+  my $self = shift;
+  my $name = shift or throw("You must supply a parameter name");
+
+  # setter
+  if (@_) {
+    if (scalar(@_) == 1) {
+      # single value
+      $self->{'_param'}->{$name} = shift;
+    } else {
+      # list of values
+      undef $self->{'_param'}->{$name};
+      @{ $self->{'_param'}->{$name} } = @_;
+    }
+  }
+
+  # getter
+  if (ref($self->{'_param'}->{$name}) eq 'ARRAY') {
+    # list parameter
+    return @{ $self->{'_param'}->{$name} };
+  } elsif (defined($self->{'_param'}->{$name})) {
+    # single-value parameter
+    return $self->{'_param'}->{$name};
+  } else {
+    return undef;
+  }
+}
+
+
+=head2 serverroot
+
+  Arg[1]      : (optional) String - root directory of your ensembl sandbox
+  Example     : my $serverroot = $support->serverroot;
+  Description : Getter/setter for the root directory of your ensembl sandbox.
+                This is set when ConversionSupport object is created, so
+                usually only used as a getter.
+  Return type : String - the server root directory
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub serverroot {
+  my $self = shift;
+  $self->{'_serverroot'} = shift if (@_);
+  return $self->{'_serverroot'};
+}
+
+
+=head2 error
+
+  Arg[1]      : (optional) String - error message
+  Example     : $support->error("An error occurred: $@");
+                exit(0) if $support->error;
+  Description : Getter/setter for error messages
+  Return type : String - error message
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub error {
+    my $self = shift;
+    $self->{'_error'} = shift if (@_);
+    return $self->{'_error'};
+}
+
+
+=head2 
+
+  Arg[1]      : 
+  Example     : 
+  Description : 
+  Return type : 
+  Exceptions  : 
+  Caller      : 
+  Status      :
+
+=cut
+
+sub logger {
+  my $self = shift;
+
+  if (@_) {
+    $self->{'_logger'} = shift;
+
+  } elsif (! $self->{'_logger'}) {
+    # log to STDERR if no logger supplied
+    $self->{'_logger'} = new Bio::EnsEMBL::Utils::Logger(
+      -VERBOSE => $self->param('verbose'),
+    );
+  }
+
+  return $self->{'_logger'};
+}
+
+1;
+
diff --git a/modules/Bio/EnsEMBL/Utils/Logger.pm b/modules/Bio/EnsEMBL/Utils/Logger.pm
new file mode 100644
index 0000000000..8cdde2995c
--- /dev/null
+++ b/modules/Bio/EnsEMBL/Utils/Logger.pm
@@ -0,0 +1,611 @@
+package Bio::EnsEMBL::Utils::Logger;
+
+=head1 NAME
+
+Bio::EnsEMBL::Utils::ConversionSupport - Utility module for Vega release and
+schema conversion scripts
+
+=head1 SYNOPSIS
+
+    my $serverroot = '/path/to/ensembl';
+    my $suport = new Bio::EnsEMBL::Utils::ConversionSupport($serverroot);
+
+    # parse common options
+    $support->parse_common_options;
+
+    # parse extra options for your script
+    $support->parse_extra_options('string_opt=s', 'numeric_opt=n');
+
+    # ask user if he wants to run script with these parameters
+    $support->confirm_params;
+
+    # see individual method documentation for more stuff
+
+=head1 DESCRIPTION
+
+This module is a collection of common methods and provides helper functions 
+for the Vega release and schema conversion scripts. Amongst others, it reads
+options from a config file, parses commandline options and does logging.
+
+=head1 LICENCE
+
+This code is distributed under an Apache style licence:
+Please see http://www.ensembl.org/code_licence.html for details
+
+=head1 AUTHOR
+
+Patrick Meidl <pm2@sanger.ac.uk>
+
+=head1 CONTACT
+
+Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk
+
+=cut
+
+use strict;
+use warnings;
+no warnings 'uninitialized';
+
+use FindBin qw($Bin $Script);
+use POSIX qw(strftime);
+use Bio::EnsEMBL::Utils::Argument qw(rearrange);
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+
+
+=head2 new
+
+  Arg[1]      : String $serverroot - root directory of your ensembl sandbox
+  Example     : my $support = new Bio::EnsEMBL::Utils::ConversionSupport(
+                                        '/path/to/ensembl');
+  Description : constructor
+  Return type : Bio::EnsEMBL::Utils::ConversionSupport object
+  Exceptions  : thrown if no serverroot is provided
+  Caller      : general
+
+=cut
+
+sub new {
+  my $class = shift;
+
+  my ($logfile, $logpath, $logappend, $verbose, $is_component) = rearrange(
+    ['LOGFILE', 'LOGPATH', 'LOGAPPEND', 'VERBOSE', 'IS_COMPONENT'], @_);
+  
+  my $self = { '_warnings'     => 0, };
+  bless ($self, $class);
+
+  # initialise
+  $self->logfile($logfile);
+  $self->logpath($logpath);
+  $self->logappend($logappend);
+  $self->verbose($verbose);
+  $self->is_component($is_component);
+  
+  return $self;
+}
+
+
+=head2 log
+
+  Arg[1]      : String $txt - the text to log
+  Arg[2]      : Int $indent - indentation level for log message
+  Example     : my $log = $support->log_filehandle;
+                $support->log('Log foo.\n', 1);
+  Description : Logs a message to the filehandle initialised by calling
+                $self->log_filehandle(). You can supply an indentation level
+                to get nice hierarchical log messages.
+  Return type : true on success
+  Exceptions  : thrown when no filehandle can be obtained
+  Caller      : general
+
+=cut
+
+sub log {
+  my ($self, $txt, $indent) = @_;
+
+  $indent ||= 0;
+  my $fh = $self->log_filehandle;
+  
+  # strip off leading linebreaks so that indenting doesn't break
+  $txt =~ s/^(\n*)//;
+  
+  # indent
+  $txt = $1."  "x$indent . $txt;
+  
+  print $fh "$txt";
+  
+  return(1);
+}
+
+
+=head2 log_warning
+
+  Arg[1]      : String $txt - the warning text to log
+  Arg[2]      : Int $indent - indentation level for log message
+  Example     : my $log = $support->log_filehandle;
+                $support->log_warning('Log foo.\n', 1);
+  Description : Logs a message via $self->log and increases the warning counter.
+  Return type : true on success
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub log_warning {
+  my ($self, $txt, $indent) = @_;
+  
+  $txt = "WARNING: " . $txt;
+  $self->log($txt, $indent);
+  
+  $self->{'_warnings'}++;
+  
+  return(1);
+}
+
+
+=head2 log_error
+
+  Arg[1]      : String $txt - the error text to log
+  Arg[2]      : Int $indent - indentation level for log message
+  Example     : my $log = $support->log_filehandle;
+                $support->log_error('Log foo.\n', 1);
+  Description : Logs a message via $self->log and exits the script.
+  Return type : none
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub log_error {
+  my ($self, $txt, $indent) = @_;
+  
+  $txt = "ERROR: ".$txt;
+  $self->log($txt, $indent);
+  
+  $self->log("Exiting prematurely.\n\n");
+  $self->log("Runtime: ".$self->runtime." ".$self->date_and_mem."\n\n");
+  
+  exit;
+}
+
+
+=head2 log_verbose
+
+  Arg[1]      : String $txt - the warning text to log
+  Arg[2]      : Int $indent - indentation level for log message
+  Example     : my $log = $support->log_filehandle;
+                $support->log_verbose('Log this verbose message.\n', 1);
+  Description : Logs a message via $self->log if --verbose option was used
+  Return type : TRUE on success, FALSE if not verbose
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub log_verbose {
+  my ($self, $txt, $indent) = @_;
+
+  return(0) unless $self->verbose;
+
+  $self->log($txt, $indent);
+  return(1);
+}
+
+
+=head2 log_stamped
+
+  Arg[1]      : String $txt - the warning text to log
+  Arg[2]      : Int $indent - indentation level for log message
+  Example     : my $log = $support->log_filehandle;
+                $support->log_stamped('Log this stamped message.\n', 1);
+  Description : Appends timestamp and memory usage to a message and logs it via
+                $self->log
+  Return type : TRUE on success
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub log_stamped {
+  my ($self, $txt, $indent) = @_;
+  
+  # append timestamp and memory usage to log text
+  $txt =~ s/(\n*)$//;
+  $txt .= " ".$self->date_and_mem.$1;
+  
+  $self->log($txt, $indent);
+  return(1);
+}
+
+
+sub log_progress {
+  my $self = shift;
+  my $max = shift;
+  my $curr = shift;
+  my $incr = shift || 20;
+  my $indent = shift;
+  my $show_mem = shift;
+
+  throw("You must provide a maximum and current value to log progress.")
+    unless ($max and $curr);
+
+  if (($curr % $incr) == 0 or $curr < 20 or $curr == $max) {
+    my $mem;
+    $mem = ", mem ".$self->mem if ($show_mem);
+    my $log_str = "\r".('  'x$indent)."$curr/$max (".int($curr/$max*100)."\%$mem)";
+    $log_str .= "\n" if ($curr == $max);
+    
+    $self->log($log_str);
+  }
+
+}
+
+
+sub log_progressbar {
+  my $self = shift;
+  my $name = shift;
+  my $curr = shift;
+  my $indent = shift;
+  
+  throw("You must provide a name and the current value for your progress bar")
+    unless ($name and $curr);
+
+  # return if we haven't reached the next increment
+  return if ($curr < int($self->{'_progress'}->{$name}->{'next'}));
+
+  my $index = $self->{'_progress'}->{$name}->{'index'};
+  my $percent = $index*5;
+
+  my $log_str = "\r".('  'x$indent)."[".('='x$index).(' 'x(20-$index))."] ${percent}\%";
+  $log_str .= "\n" if ($curr == $self->{'_progress'}->{$name}->{'max_val'});
+
+  $self->log($log_str);
+
+  # increment counters
+  $self->{'_progress'}->{$name}->{'index'}++;
+  $self->{'_progress'}->{$name}->{'next'} +=
+    $self->{'_progress'}->{$name}->{'binsize'};
+}
+
+
+sub init_progressbar {
+  my $self = shift;
+  my $name = shift;
+  my $max = shift;
+
+  throw("You must provide a name and the maximum value for your progress bar")
+    unless ($name and $max);
+
+  # calculate bin size; we will use 20 bins (5% increments)
+  my $binsize = $max/20;
+
+  $self->{'_progress'}->{$name}->{'max_val'} = $max;
+  $self->{'_progress'}->{$name}->{'binsize'} = $binsize;
+  $self->{'_progress'}->{$name}->{'next'} = 0;
+  $self->{'_progress'}->{$name}->{'index'} = 0;
+}
+
+
+=head2 log_filehandle
+
+  Arg[1]      : (optional) String $mode - file access mode
+  Example     : my $log = $support->log_filehandle;
+                # print to the filehandle
+                print $log 'Lets start logging...\n';
+                # log via the wrapper $self->log()
+                $support->log('Another log message.\n');
+  Description : Returns a filehandle for logging (STDERR by default, logfile if
+                set from config or commandline). You can use the filehandle
+                directly to print to, or use the smart wrapper $self->log().
+                Logging mode (truncate or append) can be set by passing the
+                mode as an argument to log_filehandle(), or with the
+                --logappend commandline option (default: truncate)
+  Return type : Filehandle - the filehandle to log to
+  Exceptions  : thrown if logfile can't be opened
+  Caller      : general
+
+=cut
+
+sub log_filehandle {
+  my ($self, $mode) = @_;
+  
+  unless ($self->{'_log_filehandle'}) {
+    $mode ||= '>';
+    $mode = '>>' if ($self->logappend);
+    
+    my $fh = \*STDERR;
+    
+    if (my $logfile = $self->logfile) {
+      if (my $logpath = $self->logpath) {
+        unless (-e $logpath) {
+          system("mkdir $logpath") == 0 or
+            throw("Can't create log dir $logpath: $!\n");
+        }
+        
+        $logfile = "$logpath/".$self->logfile;
+      }
+      
+      open($fh, "$mode", $logfile) or
+        throw("Unable to open $logfile for writing: $!");
+    }
+
+    $self->{'_log_filehandle'} = $fh;
+  }
+
+  return $self->{'_log_filehandle'};
+}
+
+
+=head2 
+
+  Arg[1]      : 
+  Example     : 
+  Description : 
+  Return type : 
+  Exceptions  : 
+  Caller      : 
+  Status      :
+
+=cut
+
+sub extract_log_identifier {
+  my $self = shift;
+
+  if (my $logfile = $self->logfile) {
+    $logfile =~ /.+\.([^\.]+)\.log/;
+    return $1;
+  } else {
+    return undef;
+  }
+}
+
+
+=head2 init_log
+
+  Example     : $support->init_log;
+  Description : Opens a filehandle to the logfile and prints some header
+                information to this file. This includes script name, date, user
+                running the script and parameters the script will be running
+                with.
+  Return type : Filehandle - the log filehandle
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub init_log {
+  my $self = shift;
+  my $params = shift;
+
+  # get a log filehandle
+  my $log = $self->log_filehandle;
+
+  # remember start time
+  $self->{'_start_time'} = time;
+
+  # don't log parameters if this script is run by another one
+  unless ($self->is_component) {
+    # print script name, date, user who is running it
+    my $hostname = `hostname`;
+    chomp $hostname;
+    my $script = "$hostname:$Bin/$Script";
+    my $user = `whoami`;
+    chomp $user;
+    $self->log("Script: $script\nDate: ".$self->date."\nUser: $user\n");
+
+    # print parameters the script is running with
+    if ($params) {
+      $self->log("Parameters:\n\n");
+      $self->log($params);
+    }
+  }
+
+  return $log;
+}
+
+
+=head2 finish_log
+
+  Example     : $support->finish_log;
+  Description : Writes footer information to a logfile. This includes the
+                number of logged warnings, timestamp and memory footprint.
+  Return type : TRUE on success
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub finish_log {
+  my $self = shift;
+  
+  $self->log("\nAll done for $Script.\n");
+  $self->log($self->warnings." warnings. ");
+  $self->log("Runtime: ".$self->runtime." ".$self->date_and_mem."\n\n");
+  
+  return(1);
+}
+
+
+sub runtime {
+  my $self = shift;
+
+  my $runtime = "n/a";
+
+  if ($self->{'_start_time'}) {
+    my $diff = time - $self->{'_start_time'};
+    my $sec = $diff % 60;
+    $diff = ($diff - $sec) / 60;
+    my $min = $diff % 60;
+    my $hours = ($diff - $min) / 60;
+    
+    $runtime = "${hours}h ${min}min ${sec}sec";
+  }
+
+  return $runtime;
+}
+
+
+=head2 date_and_mem
+
+  Example     : print LOG "Time, memory usage: ".$support->date_and_mem."\n";
+  Description : Prints a timestamp and the memory usage of your script.
+  Return type : String - timestamp and memory usage
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub date_and_mem {
+  my $date = strftime "%Y-%m-%d %T", localtime;
+  my $mem = `ps -p $$ -o vsz |tail -1`;
+  chomp $mem;
+  return "[$date, mem $mem]";
+}
+
+
+=head2 date
+
+  Example     : print "Date: " . $support->date . "\n";
+  Description : Prints a nicely formatted timestamp (YYYY-DD-MM hh:mm:ss)
+  Return type : String - the timestamp
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub date {
+  return strftime "%Y-%m-%d %T", localtime;
+}
+
+
+=head2 mem
+
+  Example     : print "Memory usage: " . $support->mem . "\n";
+  Description : Prints the memory used by your script. Not sure about platform
+                dependence of this call ...
+  Return type : String - memory usage
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub mem {
+  my $mem = `ps -p $$ -o vsz |tail -1`;
+  chomp $mem;
+  return $mem;
+}
+
+
+=head2 warnings
+
+  Example     : print LOG "There were ".$support->warnings." warnings.\n";
+  Description : Returns the number of warnings encountered while running the
+                script (the warning counter is increased by $self->log_warning).
+  Return type : Int - number of warnings
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub warnings {
+  my $self = shift;
+  return $self->{'_warnings'};
+}
+
+
+=head2 
+
+  Arg[1]      : 
+  Example     : 
+  Description : 
+  Return type : 
+  Exceptions  : 
+  Caller      : 
+  Status      :
+
+=cut
+
+sub logfile {
+  my $self = shift;
+  $self->{'_logfile'} = shift if (@_);
+  return $self->{'_logfile'};
+}
+
+
+=head2 
+
+  Arg[1]      : 
+  Example     : 
+  Description : 
+  Return type : 
+  Exceptions  : 
+  Caller      : 
+  Status      :
+
+=cut
+
+sub logpath {
+  my $self = shift;
+  $self->{'_logpath'} = shift if (@_);
+  return $self->{'_logpath'};
+}
+
+
+=head2 
+
+  Arg[1]      : 
+  Example     : 
+  Description : 
+  Return type : 
+  Exceptions  : 
+  Caller      : 
+  Status      :
+
+=cut
+
+sub logappend {
+  my $self = shift;
+  $self->{'_logappend'} = shift if (@_);
+  return $self->{'_logappend'};
+}
+
+
+=head2 
+
+  Arg[1]      : 
+  Example     : 
+  Description : 
+  Return type : 
+  Exceptions  : 
+  Caller      : 
+  Status      :
+
+=cut
+
+sub verbose {
+  my $self = shift;
+  $self->{'_verbose'} = shift if (@_);
+  return $self->{'_verbose'};
+}
+
+
+=head2 
+
+  Arg[1]      : 
+  Example     : 
+  Description : 
+  Return type : 
+  Exceptions  : 
+  Caller      : 
+  Status      :
+
+=cut
+
+sub is_component {
+  my $self = shift;
+  $self->{'_is_component'} = shift if (@_);
+  return $self->{'_is_component'};
+}
+
+
+1;
+
diff --git a/modules/Bio/EnsEMBL/Utils/ScriptUtils.pm b/modules/Bio/EnsEMBL/Utils/ScriptUtils.pm
new file mode 100644
index 0000000000..24b2308d64
--- /dev/null
+++ b/modules/Bio/EnsEMBL/Utils/ScriptUtils.pm
@@ -0,0 +1,184 @@
+package Bio::EnsEMBL::Utils::ScriptUtils;
+
+=head1 NAME
+
+
+=head1 SYNOPSIS
+
+
+=head1 DESCRIPTION
+
+
+=head1 METHODS
+
+
+=head1 LICENCE
+
+This code is distributed under an Apache style licence. Please see
+http://www.ensembl.org/info/about/code_licence.html for details.
+
+=head1 AUTHOR
+
+Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
+
+=head1 CONTACT
+
+Please post comments/questions to the Ensembl development list
+<ensembl-dev@ebi.ac.uk>
+
+=cut
+
+use strict;
+use warnings;
+no warnings 'uninitialized';
+
+use Exporter;
+our @ISA = qw(Exporter);
+
+our @EXPORT_OK = qw(
+  user_proceed
+  commify
+  sort_chromosomes
+  parse_bytes
+);
+
+
+=head2 user_proceed
+
+  Arg[1]      : (optional) String $text - notification text to present to user
+  Example     : # run a code snipped conditionally
+                if ($support->user_proceed("Run the next code snipped?")) {
+                    # run some code
+                }
+
+                # exit if requested by user
+                exit unless ($support->user_proceed("Want to continue?"));
+  Description : If running interactively, the user is asked if he wants to
+                perform a script action. If he doesn't, this section is skipped
+                and the script proceeds with the code. When running
+                non-interactively, the section is run by default.
+  Return type : TRUE to proceed, FALSE to skip.
+  Exceptions  : none
+  Caller      : general
+
+=cut
+
+sub user_proceed {
+  my $text = shift;
+
+  print "$text\n" if $text;
+  print "[y/N] ";
+  
+  my $input = lc(<>);
+  chomp $input;
+  
+  if ($input eq 'y') {
+    return(1);
+  } else {
+    print "Skipping.\n";
+    return(0);
+  }
+}
+
+
+=head2 sort_chromosomes
+
+  Arg[1]      : (optional) Hashref $chr_hashref - Hashref with chr_name as keys
+  Example     : my $chr = { '6-COX' => 1, '1' => 1, 'X' => 1 };
+                my @sorted = $support->sort_chromosomes($chr);
+  Description : Sorts chromosomes in an intuitive way (numerically, then
+                alphabetically). If no chromosome hashref is passed, it's
+                retrieve by calling $self->get_chrlength()
+  Return type : List - sorted chromosome names
+  Exceptions  : thrown if no hashref is provided
+  Caller      : general
+
+=cut
+
+sub sort_chromosomes {
+    my @chromosomes = @_;
+    
+    return (sort _by_chr_num @chromosomes);
+}
+
+
+=head2 _by_chr_num
+
+  Example     : my @sorted = sort _by_chr_num qw(X, 6-COX, 14, 7);
+  Description : Subroutine to use in sort for sorting chromosomes. Sorts
+                numerically, then alphabetically
+  Return type : values to be used by sort
+  Exceptions  : none
+  Caller      : internal ($self->sort_chromosomes)
+
+=cut
+
+sub _by_chr_num {
+    my @awords = split /-/, $a;
+    my @bwords = split /-/, $b;
+
+    my $anum = $awords[0];
+    my $bnum = $bwords[0];
+
+    if ($anum !~ /^[0-9]*$/) {
+        if ($bnum !~ /^[0-9]*$/) {
+            return $anum cmp $bnum;
+        } else {
+            return 1;
+        }
+    }
+    if ($bnum !~ /^[0-9]*$/) {
+        return -1;
+    }
+
+    if ($anum <=> $bnum) {
+        return $anum <=> $bnum;
+    } else {
+        if ($#awords == 0) {
+            return -1;
+        } elsif ($#bwords == 0) {
+            return 1;
+        } else {
+            return $awords[1] cmp $bwords[1];
+        }
+    }
+}
+
+
+=head2 commify
+
+  Arg[1]      : Int $num - a number to commify
+  Example     : print "An easy to read number: ".$self->commify(100000000);
+                # will print 100,000,000
+  Description : put commas into a number to make it easier to read
+  Return type : a string representing the commified number
+  Exceptions  : none
+  Caller      : general
+  Status      : stable
+
+=cut
+
+sub commify {
+  my $num = shift;
+
+  $num = reverse($num);
+  $num =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
+
+  return scalar reverse $num;
+}
+
+
+sub parse_bytes {
+  my $bytes = shift;
+
+  my @suffixes = qw(bytes kb Mb Gb Tb);
+
+  my $length = length($bytes);
+  my $order = int(($length-1)/3);
+
+  my $parsed = sprintf('%.0f', $bytes/10**(3*$order));
+
+  return "$parsed ".$suffixes[$order];
+}
+
+1;
-- 
GitLab