From 340a6b9f6f7a4eb66e6f24a732a78b5cb111856c Mon Sep 17 00:00:00 2001
From: Patrick Meidl <pm2@sanger.ac.uk>
Date: Fri, 29 Jun 2007 14:36:56 +0000
Subject: [PATCH] considerable rewrite to make it easier to use

---
 modules/Bio/EnsEMBL/Utils/ConfParser.pm | 568 +++++++++++-------------
 1 file changed, 265 insertions(+), 303 deletions(-)

diff --git a/modules/Bio/EnsEMBL/Utils/ConfParser.pm b/modules/Bio/EnsEMBL/Utils/ConfParser.pm
index 7b5906ebc0..6895c7b922 100644
--- a/modules/Bio/EnsEMBL/Utils/ConfParser.pm
+++ b/modules/Bio/EnsEMBL/Utils/ConfParser.pm
@@ -2,30 +2,34 @@ package Bio::EnsEMBL::Utils::ConfParser;
 
 =head1 NAME
 
-Bio::EnsEMBL::Utils::ConversionSupport - Utility module for Vega release and
-schema conversion scripts
+Bio::EnsEMBL::Utils::ConfParser - configuration parser for perl scripts
 
 =head1 SYNOPSIS
 
-    my $serverroot = '/path/to/ensembl';
-    my $suport = new Bio::EnsEMBL::Utils::ConversionSupport($serverroot);
-
-    # parse common options
-    $support->parse_common_options;
-
-    # parse extra options for your script
-    $support->parse_extra_options('string_opt=s', 'numeric_opt=n');
+    my $conf = new Bio::EnsEMBL::Utils::ConfParser(
+      -SERVERROOT => "/path/to/ensembl",
+      -DEFAULT_CONF => "my.default.conf"
+    );
 
-    # ask user if he wants to run script with these parameters
-    $support->confirm_params;
+    # parse options from configuration file and commandline
+    $conf->parse_options(
+      'mandatory_string_opt=s' => 1,
+      'optional_numeric_opt=n' => 0,
+    );
 
-    # see individual method documentation for more stuff
+    # get a paramter value
+    my $val = $conf->param('manadatory_string_op');
 
 =head1 DESCRIPTION
 
-This module is a collection of common methods and provides helper functions 
-for the Vega release and schema conversion scripts. Amongst others, it reads
-options from a config file, parses commandline options and does logging.
+This module parses a configuration file and the commandline options passed to a
+script (the latter superseed the former). Configuration files contain ini-file
+style name-value pairs, and the commandline options are passed to Getopt::Long
+for parsing.
+
+The parameter values are consequently accessible via the param() method. You
+can also create a commandline string of all current parameters and their values
+to pass to another script.
 
 =head1 LICENCE
 
@@ -34,11 +38,12 @@ Please see http://www.ensembl.org/code_licence.html for details
 
 =head1 AUTHOR
 
-Patrick Meidl <pm2@sanger.ac.uk>
+Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
 
 =head1 CONTACT
 
-Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk
+Please post comments/questions to the Ensembl development list
+<ensembl-dev@ebi.ac.uk>
 
 =cut
 
@@ -49,22 +54,28 @@ no warnings 'uninitialized';
 use Getopt::Long;
 use Text::Wrap;
 use Cwd qw(abs_path);
+use Pod::Usage qw(pod2usage);
 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
 use Bio::EnsEMBL::Utils::ScriptUtils qw(user_proceed);
-use Bio::EnsEMBL::Utils::Logger;
 
 
 =head2 new
 
-  Arg[SERVERROOT] : String $serverroot
-                root directory of your ensembl code
-  Example     : my $support = new Bio::EnsEMBL::Utils::ConversionSupport(
-                                        '/path/to/ensembl');
-  Description : constructor
-  Return type : Bio::EnsEMBL::Utils::ConversionSupport object
+  Arg [SERVERROOT] :
+                String $serverroot - root directory of your ensembl code
+  Arg [DEFAULT_CONF] :
+                String $default_conf - default configuration file
+  Example     : my $conf = new Bio::EnsEMBL::Utils::ConfParser(
+                  -SERVERROOT => '/path/to/ensembl',
+                  -DEFAULT_CONF => 'my.default.conf'
+                );
+  Description : object constructor
+  Return type : Bio::EnsEMBL::Utils::ConfParser object
   Exceptions  : thrown if no serverroot is provided
   Caller      : general
+  Status      : At Risk
+              : under development
 
 =cut
 
@@ -72,57 +83,71 @@ sub new {
   my $caller = shift;
   my $class = ref($caller) || $caller;
 
-  my ($serverroot) = rearrange(['SERVERROOT'], @_);
+  my ($serverroot, $default_conf) =
+    rearrange([qw(SERVERROOT DEFAULT_CONF)], @_);
 
   throw("You must supply a serverroot.") unless ($serverroot);
 
-  my $self = {
-      '_serverroot'   => $serverroot,
-      '_param'        => { interactive => 1 },
-  };
-
+  my $self = {};
   bless ($self, $class);
 
+  $self->serverroot($serverroot);
+  $self->default_conf($default_conf || "$ENV{HOME}/.ensembl_script.conf");
+
   return $self;
 }
 
 
-=head2 parse_common_options
-
-  Example     : $support->parse_common_options;
-  Description : This method reads options from a configuration file and parses
-                some commandline options that are common to all scripts (like
-                db connection settings, help, dry-run). Commandline options
-                will override config file settings. 
-
-                All options will be accessible via $self->param('name').
+=head2 parse_options
+
+  Arg[1..n]   : pairs of option definitions and mandatory flag (see below for
+                details)
+  Example     : $conf->parse_options(
+                  'mandatory_string_opt=s' => 1,
+                  'optional_numeric_opt=n' => 0,
+                );
+  Description : This method reads options from an (optional) configuration file
+                and parses the commandline options supplied by the user.
+                Commandline options will superseed config file settings. The
+                string "$SERVERROOT" in the configuration entries will be
+                replaced by  the appropriate value.
+
+                The arguments passed to this method are pairs of a Getopt::Long
+                style option definition (in fact it will be passed to
+                GetOptions() directly) and a flag indicating whether this
+                option is mandatory (1) or optional (0).
+
+                In addition to these user-defined options, a set of common
+                options is always parsed. See _common_options() for details.
+                
+                If you run your script with --interactive the user will be
+                asked to confirm the parameters after parsing.
+                
+                All parameters will then be accessible via $self->param('name').
   Return type : true on success 
   Exceptions  : thrown if configuration file can't be opened
+                thrown on missing mandatory parameters
   Caller      : general
+  Status      : At Risk
+              : under development
 
 =cut
 
-sub parse_common_options {
-  my $self = shift;
+sub parse_options {
+  my ($self, @params) = @_;
+
+  # add common options to user supplied list
+  push @params, $self->_common_options;
 
-  # read commandline options
+  # read common commandline options
   my %h;
-  Getopt::Long::Configure("pass_through");
-  &GetOptions( \%h,
-      'conffile|conf=s',
-      'logfile|log=s',
-      'logpath=s',
-      'logappend|log_append=s',
-      'is_component=s',
-      'verbose|v=s',
-      'interactive|i=s',
-      'dry_run|dry|n=s',
-      'help|h|?',
-      );
+  my %params = @params;
+
+  Getopt::Long::Configure('pass_through');
+  &GetOptions(\%h, keys %params);
 
   # reads config file
-  my $conffile = $h{'conffile'} || $self->param('default_conf') ||
-    "$ENV{HOME}/.ensembl_script.conf";
+  my $conffile = $h{'conffile'} || $self->default_conf;
   $conffile = abs_path($conffile);
 
   if (-e $conffile) {
@@ -142,6 +167,8 @@ sub parse_common_options {
       next unless (/(\w\S*)\s*=\s*(.*)/);
       my $name = $1;
       my $val = $2;
+      
+      # replace $SERVERROOT with value
       if ($val =~ /\$SERVERROOT/) {
         $val =~ s/\$SERVERROOT/$serverroot/g;
         $val = abs_path($val);
@@ -154,143 +181,177 @@ sub parse_common_options {
 
   # override configured parameter with commandline options
   map { $self->param($_, $h{$_}) } keys %h;
+
+  # check for required params, convert comma to list, maintain an ordered
+  # list of parameters and list of 'flag' type params
+  my @missing = ();
+  my $i = 0;
+
+  foreach my $param (@params) {
+    next if ($i++ % 2);
+
+    my $required = $params{$param};
+    my $list = 1 if ($param =~ /\@$/);
+    my $flag = 1 if ($param =~ /!$/);
+    $param =~ s/(^\w+).*/$1/;
+    
+    $self->comma_to_list($param) if ($list);
+
+    push @missing, $param if ($required and !$self->param($param));
+    push @{ $self->{'_ordered_params'} }, $param;
+    $self->{'_flag_params'}->{$param} = 1 if ($flag);
+  }
+  
+  if (@missing) {
+    throw("Missing parameters: @missing.\nYou must specify them on the commandline or in your conffile.\n");
+  }
+
+  # error handling and --help
+  pod2usage(1) if ($self->param('help'));
+
+  # ask user to confirm parameters to proceed
+  $self->confirm_params;
+
   return(1);
 }
 
 
-=head2 parse_extra_options
+#
+# Commonly used options. These are parsed by default even if they are not
+# passed to parse_options() explicitely.
+#
+sub _common_options {
+  my $self = shift;
+  return (
+    'conffile|conf=s' => 0,
+    'logfile|log=s' => 0,
+    'logpath=s' => 0,
+    'logappend|log_append|log-append!' => 0,
+    'is_component|is-component!' => 0,
+    'verbose|v!' => 0,
+    'interactive|i!' => 0,
+    'dry_run|dry-run|dry|n!' => 0,
+    'help|h|?' => 0,
+  );
+}
+
+
+=head2 confirm_params
 
-  Arg[1-N]    : option descriptors that will be passed on to Getopt::Long
-  Example     : $support->parse_extra_options('string_opt=s', 'numeric_opt=n');
-  Description : Parse extra commandline options by passing them on to
-                Getopt::Long and storing parameters in $self->param('name).
+  Example     : $conf->confirm_params;
+  Description : If the script is run with the --interactive switch, this method
+                prints a table of all parameters and their values and asks user
+                to confirm if he wants to proceed.
   Return type : true on success
-  Exceptions  : none (caugth by $self->error)
-  Caller      : general
+  Exceptions  : none
+  Caller      : parse_options()
+  Status      : At Risk
+              : under development
 
 =cut
 
-sub parse_extra_options {
-  my ($self, @params) = @_;
-  
-  Getopt::Long::Configure("no_pass_through");
-  
-  eval {
-    # catch warnings to pass to $self->error
-    local $SIG{__WARN__} = sub { die @_; };
-    &GetOptions(\%{ $self->{'_param'} }, @params);
-  };
+sub confirm_params {
+  my $self = shift;
 
-  # comma to list
-  foreach my $param (@params) {
-    if ($param =~ /\@$/) {
-      $param =~ s/(^\w+).*/$1/;
-      $self->comma_to_list($param);
-    }
+  if ($self->param('interactive')) {
+    # print parameter table
+    print "Running script with these parameters:\n\n";
+    print $self->list_param_values;
+
+    # ask user if he wants to proceed
+    exit unless user_proceed("Continue?");
   }
   
-  $self->error($@) if $@;
-  
   return(1);
 }
 
 
-=head2 allowed_params
+=head2 param
 
-  Arg[1-N]    : (optional) List of allowed parameters to set
-  Example     : my @allowed = $self->allowed_params(qw(param1 param2));
-  Description : Getter/setter for allowed parameters. This is used by
-                $self->confirm_params() to avoid cluttering of output with
-                conffile entries not relevant for a given script. You can use
-                $self->get_common_params() as a shortcut to set them.
-  Return type : Array - list of allowed parameters
-  Exceptions  : none
+  Arg[1]      : Parameter name
+  Arg[2..n]   : (optional) List of values to set
+  Example     : # getter
+                my $dbname = $conf->param('dbname');
+
+                # setter
+                $conf->param('port', 3306);
+                $conf->param('chromosomes', 1, 6, 'X');
+  Description : Getter/setter for parameters. Accepts single-value params and
+                list params.
+  Return type : Scalar value for single-value parameters, array of values for
+                list parameters
+  Exceptions  : thrown if no parameter name is supplied
   Caller      : general
+  Status      : At Risk
+              : under development
 
 =cut
 
-sub allowed_params {
+sub param {
   my $self = shift;
+  my $name = shift or throw("You must supply a parameter name");
 
   # setter
   if (@_) {
-    @{ $self->{'_allowed_params'} } = @_;
+    if (scalar(@_) == 1) {
+      # single value
+      $self->{'_param'}->{$name} = shift;
+    } else {
+      # list of values
+      undef $self->{'_param'}->{$name};
+      @{ $self->{'_param'}->{$name} } = @_;
+    }
   }
 
   # getter
-  if (ref($self->{'_allowed_params'}) eq 'ARRAY') {
-    return @{ $self->{'_allowed_params'} };
+  if (ref($self->{'_param'}->{$name}) eq 'ARRAY') {
+    # list parameter
+    return @{ $self->{'_param'}->{$name} };
+  } elsif (defined($self->{'_param'}->{$name})) {
+    # single-value parameter
+    return $self->{'_param'}->{$name};
   } else {
-    return ();
+    return undef;
   }
 }
 
 
-=head2 get_common_params
+=head2 list_params
 
-  Example     : my @allowed_params = $self->get_common_params, 'extra_param';
-  Description : Returns a list of commonly used parameters in the conversion
-                scripts. Shortcut for setting allowed parameters with
-                $self->allowed_params().
-  Return type : Array - list of common parameters
+  Example     : print "Current parameter names:\n";
+                foreach my $param (@{ $conf->list_params }) {
+                  print "  $param\n";
+                }
+  Description : Returns a list of the currently available parameter names. The
+                list will be in the same order as option definitions were
+                passed to the new() method.
+  Return type : Arrayref of parameter names
   Exceptions  : none
-  Caller      : general
+  Caller      : list_param_values(), create_commandline_options()
+  Status      : At Risk
+              : under development
 
 =cut
 
-sub get_common_params {
-  return qw(
-      conffile
-      logpath
-      logfile
-      logappend
-      is_component
-      verbose
-      interactive
-      dry_run
-  );
-}
-
-
-=head2 confirm_params
-
-  Example     : $support->confirm_params;
-  Description : Prints a table of parameters that were collected from config
-                file and commandline and asks user to confirm if he wants
-                to proceed.
-  Return type : true on success
-  Exceptions  : none
-  Caller      : general
-
-=cut
-
-sub confirm_params {
+sub list_params {
   my $self = shift;
-
-  if ($self->param('interactive')) {
-    # print parameter table
-    print "Running script with these parameters:\n\n";
-    print $self->list_all_params;
-
-    # ask user if he wants to proceed
-    exit unless user_proceed("Continue?");
-  }
-  
-  return(1);
+  return $self->{'_ordered_params'} || [];
 }
 
 
-=head2 list_all_params
+=head2 list_param_values
 
-  Example     : print LOG $support->list_all_params;
+  Example     : print LOG $conf->list_param_values;
   Description : prints a table of the parameters used in the script
   Return type : String - the table to print
   Exceptions  : none
   Caller      : general
+  Status      : At Risk
+              : under development
 
 =cut
 
-sub list_all_params {
+sub list_param_values {
   my $self = shift;
 
   $Text::Wrap::colums = 72;
@@ -298,10 +359,10 @@ sub list_all_params {
   my $txt = sprintf "    %-20s%-40s\n", qw(PARAMETER VALUE);
   $txt .= "    " . "-"x70 . "\n";
 
-  foreach my $key ($self->allowed_params) {
+  foreach my $key (@{ $self->list_params }) {
     my $val;
     if (defined($self->param($key))) {
-      $txt .= Text::Wrap::wrap(sprintf('    %-20s', $key), ' 'x24,
+      $txt .= Text::Wrap::wrap(sprintf('    %-19s ', $key), ' 'x24,
         join(", ", $self->param($key)))."\n";
     }
   }
@@ -314,100 +375,69 @@ sub list_all_params {
 
 =head2 create_commandline_options
 
-  Arg[1]      : Hashref $settings - hashref describing what to do
-                Allowed keys:
-                    allowed_params => 0|1   # use all allowed parameters
-                    exclude => []           # listref of parameters to exclude
-                    replace => {param => newval} # replace value of param with
-                                                 # newval
-  Example     : $support->create_commandline_options({
-                    allowed_params => 1,
-                    exclude => ['verbose'],
-                    replace => { 'dbname' => 'homo_sapiens_vega_33_35e' }
-                });
-  Description : Creates a commandline options string that can be passed to any
-                other script using ConversionSupport.
+  Arg[1..n]   : param/value pairs which should be added to or override the
+                currently defined parameters
+  Example     : $conf->create_commandline_options(
+                    'dbname' => 'homo_sapiens_vega_33_35e',
+                    'interactive' => 0
+                );
+  Description : Creates a commandline options string of all current paramters
+                that can be passed to another script.
   Return type : String - commandline options string
   Exceptions  : none
   Caller      : general
+  Status      : At Risk
+              : under development
 
 =cut
 
 sub create_commandline_options {
-  my $self = shift;
-
-  my ($allowed_params, $exclude, $replace) = rearrange(
-    ['ALLOWED_PARAMS', 'EXCLUDE', 'REPLACE'], @_);
+  my ($self, %replace) = @_;
 
   my %param_hash;
 
-  # get all allowed parameters
-  if ($allowed_params) {
-    # exclude params explicitly stated
-    my %exclude = map { $_ => 1 } @{ $exclude || [] };
-
-    foreach my $param ($self->allowed_params) {
-      unless ($exclude{$param}) {
-        my ($first, @rest) = $self->param($param);
-        next unless (defined($first));
+  # deal with list values
+  foreach my $param (@{ $self->list_params }) {
+    my ($first, @rest) = $self->param($param);
+    next unless (defined($first));
 
-        if (@rest) {
-          $first = join(",", $first, @rest);
-        }
-        $param_hash{$param} = $first;
-      }
+    if (@rest) {
+      $first = join(",", $first, @rest);
     }
-
+    $param_hash{$param} = $first;
   }
 
   # replace values
-  foreach my $key (keys %{ $replace || {} }) {
-    $param_hash{$key} = $replace->{$key};
+  foreach my $key (keys %replace) {
+    $param_hash{$key} = $replace{$key};
   }
 
   # create the commandline options string
   my $options_string;
   foreach my $param (keys %param_hash) {
-    $options_string .= sprintf("--%s %s ", $param, $param_hash{$param});
-  }
 
-  return $options_string;
-}
-
-
-=head2 check_required_params
-
-  Arg[1-N]    : List @params - parameters to check
-  Example     : $self->check_required_params(qw(dbname host port));
-  Description : Checks $self->param to make sure the requested parameters
-                have been set. Dies if parameters are missing.
-  Return type : true on success
-  Exceptions  : none
-  Caller      : general
+    my $val = $param_hash{$param};
 
-=cut
+    # deal with 'flag' type params correctly
+    if ($self->{'_flag_params'}->{$param}) {
+      # change 'myparam' to 'nomyparam' if no value set
+      $param = 'no'.$param unless ($val);
 
-sub check_required_params {
-  my ($self, @params) = @_;
-  
-  my @missing = ();
-  
-  foreach my $param (@params) {
-    push @missing, $param unless $self->param($param);
-  }
-  
-  if (@missing) {
-    throw("Missing parameters: @missing.\nYou must specify them on the commandline or in your conffile.\n");
+      # unset value (this is how flags behave)
+      $val = undef;
+    }
+    
+    $options_string .= sprintf("--%s %s ", $param, $val);
   }
-  
-  return(1);
+
+  return $options_string;
 }
 
 
 =head2 comma_to_list
 
-  Arg[1-N]    : list of parameter names to parse
-  Example     : $support->comma_to_list('chromosomes');
+  Arg[1..n]   : list of parameter names to parse
+  Example     : $conf->comma_to_list('chromosomes');
   Description : Transparently converts comma-separated lists into arrays (to
                 allow different styles of commandline options, see perldoc
                 Getopt::Long for details). Parameters are converted in place
@@ -415,6 +445,8 @@ sub check_required_params {
   Return type : true on success
   Exceptions  : none
   Caller      : general
+  Status      : At Risk
+              : under development
 
 =cut
 
@@ -432,12 +464,14 @@ sub comma_to_list {
 =head2 list_or_file
 
   Arg[1]      : Name of parameter to parse
-  Example     : $support->list_or_file('gene_stable_id');
+  Example     : $conf->list_or_file('gene_stable_id');
   Description : Determines whether a parameter holds a list or it is a filename
                 to read the list entries from.
   Return type : true on success
   Exceptions  : thrown if list file can't be opened
   Caller      : general
+  Status      : At Risk
+              : under development
 
 =cut
 
@@ -471,61 +505,16 @@ sub list_or_file {
 }
 
 
-=head2 param
-
-  Arg[1]      : Parameter name
-  Arg[2-N]    : (optional) List of values to set
-  Example     : my $dbname = $support->param('dbname');
-                $support->param('port', 3306);
-                $support->param('chromosomes', 1, 6, 'X');
-  Description : Getter/setter for parameters. Accepts single-value params and
-                list params.
-  Return type : Scalar value for single-value parameters, array of values for
-                list parameters
-  Exceptions  : thrown if no parameter name is supplied
-  Caller      : general
-
-=cut
-
-sub param {
-  my $self = shift;
-  my $name = shift or throw("You must supply a parameter name");
-
-  # setter
-  if (@_) {
-    if (scalar(@_) == 1) {
-      # single value
-      $self->{'_param'}->{$name} = shift;
-    } else {
-      # list of values
-      undef $self->{'_param'}->{$name};
-      @{ $self->{'_param'}->{$name} } = @_;
-    }
-  }
-
-  # getter
-  if (ref($self->{'_param'}->{$name}) eq 'ARRAY') {
-    # list parameter
-    return @{ $self->{'_param'}->{$name} };
-  } elsif (defined($self->{'_param'}->{$name})) {
-    # single-value parameter
-    return $self->{'_param'}->{$name};
-  } else {
-    return undef;
-  }
-}
-
-
 =head2 serverroot
 
-  Arg[1]      : (optional) String - root directory of your ensembl sandbox
-  Example     : my $serverroot = $support->serverroot;
-  Description : Getter/setter for the root directory of your ensembl sandbox.
-                This is set when ConversionSupport object is created, so
-                usually only used as a getter.
-  Return type : String - the server root directory
+  Arg[1]      : (optional) String - root directory of your ensembl checkout
+  Example     : my $serverroot = $conf->serverroot;
+  Description : Getter/setter for the root directory of your ensembl checkout.
+  Return type : String
   Exceptions  : none
-  Caller      : general
+  Caller      : new(), general
+  Status      : At Risk
+              : under development
 
 =cut
 
@@ -536,52 +525,25 @@ sub serverroot {
 }
 
 
-=head2 error
+=head2 default_conf
 
-  Arg[1]      : (optional) String - error message
-  Example     : $support->error("An error occurred: $@");
-                exit(0) if $support->error;
-  Description : Getter/setter for error messages
-  Return type : String - error message
+  Arg[1]      : (optional) String - default configuration file
+  Example     : $conf->default_conf('my.default.conf');
+  Description : Getter/setter for the default configuration file.
+  Return type : String
   Exceptions  : none
-  Caller      : general
+  Caller      : new(), general
+  Status      : At Risk
+              : under development
 
 =cut
 
-sub error {
-    my $self = shift;
-    $self->{'_error'} = shift if (@_);
-    return $self->{'_error'};
-}
-
-
-=head2 
-
-  Arg[1]      : 
-  Example     : 
-  Description : 
-  Return type : 
-  Exceptions  : 
-  Caller      : 
-  Status      :
-
-=cut
-
-sub logger {
+sub default_conf {
   my $self = shift;
-
-  if (@_) {
-    $self->{'_logger'} = shift;
-
-  } elsif (! $self->{'_logger'}) {
-    # log to STDERR if no logger supplied
-    $self->{'_logger'} = new Bio::EnsEMBL::Utils::Logger(
-      -VERBOSE => $self->param('verbose'),
-    );
-  }
-
-  return $self->{'_logger'};
+  $self->{'_default_conf'} = shift if (@_);
+  return $self->{'_default_conf'};
 }
 
+
 1;
 
-- 
GitLab