Params.pm 12.6 KB
Newer Older
1 2 3 4
=pod 

=head1 NAME

5
    Bio::EnsEMBL::Hive::Params
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37

=head1 SYNOPSIS

By inheriting from this module you make your module able to deal with parameters:

    1) parsing of parameters in the order of precedence, starting with the lowest:
            #
            ## general usage:
            # $self->param_init( $lowest_precedence_hashref, $middle_precedence_hashref, $highest_precedence_hashref );
            #
            ## typical usage:
            # $job->param_init( 
            #       $runObj->param_defaults(),                      # module-wide built-in defaults have the lowest precedence (will always be the same for this module)
            #       $self->db->get_MetaContainer->get_param_hash(), # then come the pipeline-wide parameters from the 'meta' table (define things common to all modules in this pipeline)
            #       $self->analysis->parameters(),                  # analysis-wide 'parameters' are even more specific (can be defined differently for several occurence of the same module)
            #       $job->input_id(),                               # job-specific 'input_id' parameters have the highest precedence
            # );
          

    2) reading a parameter's value
            #
            #  my $source = $self->param('source'); )

    3) dynamically setting a parameter's value
            #
            #  $self->param('binpath', '/software/ensembl/compara');
            #
        Note: It proved to be a convenient mechanism to exchange params
              between fetch_input(), run(), write_output() and other methods.

=head1 DESCRIPTION

38 39 40 41 42 43 44 45 46 47 48 49
    Most of Compara RunnableDB methods work under assumption
    that both analysis.parameters and job.input_id fields contain a Perl-style parameter hashref as a string.

    This module implements a generic param() method that allows to set parameters according to the following parameter precedence rules:

        (1) Job-Specific parameters defined in job.input_id hash, they have the highest priority and override everything else.

        (2) Analysis-Wide parameters defined in analysis.parameters hash. Can be overridden by (1).

        (3) Pipeline-Wide parameters defined in the 'meta' table. Can be overridden by (1) and (2).

        (4) Module_Defaults that are hard-coded into modules have the lowest precedence. Can be overridden by (1), (2) and (3).
50

51
=head1 LICENSE
52

53
    Copyright [1999-2014] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
54

55 56
    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at
57

58
         http://www.apache.org/licenses/LICENSE-2.0
59

60 61 62
    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.
63 64 65

=head1 CONTACT

66
    Please subscribe to the Hive mailing list:  http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users  to discuss Hive-related questions or to be notified of our updates
67 68 69 70 71 72 73

=cut


package Bio::EnsEMBL::Hive::Params;

use strict;
74 75
use warnings;

76 77
use List::Util qw(first min max minstr maxstr reduce sum shuffle);              # make them available for substituted expressions
use Bio::EnsEMBL::Hive::Utils ('stringify', 'dir_revhash', 'go_figure_dbc');    # NB: dir_revhash() is used by some substituted expressions, do not remove!
78 79


80 81 82 83 84 85 86 87 88 89 90 91 92
=head2 new

    Description: a trivial constructor, mostly for testing a Params object

=cut

sub new {
    my $class = shift @_;

    return bless {}, $class;
}


93 94
=head2 param_init

95 96 97
    Description: First parses the parameters from all sources in the reverse precedence order (supply the lowest precedence hash first),
                 then preforms "total" parameter substitution.
                 Will fail on detecting a substitution loop.
98 99 100 101 102 103 104 105

=cut

sub param_init {
                    
    my $self                = shift @_;
    my $strict_hash_format  = shift @_;

106
    my %unsubstituted_param_hash = ();
107

108 109 110 111 112 113
    foreach my $source (@_) {
        if(ref($source) ne 'HASH') {
            if($strict_hash_format or $source=~/^\{.*\}$/) {
                my $param_hash = eval($source) || {};
                if($@ or (ref($param_hash) ne 'HASH')) {
                    die "Expected a {'param'=>'value'} hashref, but got the following string instead: '$source'\n";
114
                }
115 116 117
                $source = $param_hash;
            } else {
                $source = {};
118
            }
119 120 121
        }
        while(my ($k,$v) = each %$source ) {
            $unsubstituted_param_hash{$k} = $v;
122 123
        }
    }
124 125

    $self->{'_unsubstituted_param_hash'} = \%unsubstituted_param_hash;
126 127 128
}


129 130 131 132 133 134 135 136 137 138
sub _param_possibly_overridden {
    my ($self, $param_name, $overriding_hash) = @_;

    return ( ( (ref($overriding_hash) eq 'HASH') && exists($overriding_hash->{ $param_name }) )
                    ? $overriding_hash->{ $param_name }
                    : $self->_param_silent($param_name)
           );
}


139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
sub _param_silent {
    my $self        = shift @_;
    my $param_name  = shift @_
        or die "ParamError: calling param() without arguments\n";

    if(@_) { # If there is a value (even if undef), then set it!
        $self->{'_param_hash'}{$param_name} = shift @_;
    } elsif( !exists( $self->{'_param_hash'}{$param_name} )
       and    exists( $self->{'_unsubstituted_param_hash'}{$param_name} ) ) {
        my $unsubstituted = $self->{'_unsubstituted_param_hash'}{$param_name};

        $self->{'_param_hash'}{$param_name} = $self->param_substitute( $unsubstituted );
    }

    return exists( $self->{'_param_hash'}{$param_name} )
                ? $self->{'_param_hash'}{$param_name}
                : undef;
}


=head2 param_required

    Arg [1]    : string $param_name

    Description: A strict getter method for a job's parameter; will die if the parameter was not set or is undefined

    Example    : my $source = $self->param_required('source');

    Returntype : any Perl structure or object that you dared to store

=cut

sub param_required {
    my $self        = shift @_;
    my $param_name  = shift @_;

    my $value = $self->_param_silent($param_name);

    return defined( $value )
            ? $value
            : die "ParamError: value for param_required('$param_name') is required and has to be defined\n";
}


183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
=head2 param_exists

    Arg [1]    : string $param_name

    Description: A predicate tester for whether the parameter has been initialized (even to undef)

    Example    : if( $self->param_exists('source') ) { print "'source' exists\n"; } else { print "never heard of 'source'\n"; }

    Returntype : boolean

=cut

sub param_exists {
    my $self        = shift @_;
    my $param_name  = shift @_;

    return exists( $self->{'_param_hash'}{$param_name} )
            ? 1
            : 0;
}

204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
=head2 param_is_defined

    Arg [1]    : string $param_name

    Description: A predicate tester for definedness of a parameter

    Example    : if( $self->param_is_defined('source') ) { print "defined, possibly zero"; } else { print "undefined"; }

    Returntype : boolean

=cut

sub param_is_defined {
    my $self        = shift @_;
    my $param_name  = shift @_;

    return defined( $self->_param_silent($param_name) )
            ? 1
            : 0;
}


226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
=head2 param

    Arg [1]    : string $param_name

    Arg [2]    : (optional) $param_value

    Description: A getter/setter method for a job's parameters that are initialized through 4 levels of precedence (see param_init() )

    Example 1  : my $source = $self->param('source'); # acting as a getter

    Example 2  : $self->param('binpath', '/software/ensembl/compara');  # acting as a setter

    Returntype : any Perl structure or object that you dared to store

=cut

sub param {
243 244 245 246
    my $self        = shift @_;
    my $param_name  = shift @_
        or die "ParamError: calling param() without arguments\n";

247 248
    my $value = $self->_param_silent( $param_name, @_ );
    
249
    unless( $self->param_exists( $param_name ) ) {
250
        warn "ParamWarning: value for param('$param_name') is used before having been initialized!\n";
251
    }
252

253
    return $value;
254 255
}

256

257 258 259 260 261 262 263 264 265 266 267
=head2 param_substitute

    Arg [1]    : Perl structure $string_with_templates

    Description: Performs parameter substitution on strings that contain templates like " #param_name# followed by #another_param_name# " .

    Returntype : *another* Perl structure with matching topology (may be more complex as a result of substituting a substructure for a term)

=cut

sub param_substitute {
268
    my ($self, $structure, $overriding_hash) = @_;
269

270
    my $ref_type = ref($structure);
271

272
    if(!$ref_type) {
273

274 275 276 277
        if(!$structure) {

            return $structure;

278
        } elsif($structure=~/^(?:#(expr\(.+?\)expr|[\w:]+)#)$/) {   # if the given string is one complete substitution, we don't want to force the output into a string
279

280
            return $self->_subst_one_hashpair($1, $overriding_hash);
281 282

        } else {
283
            my $scalar_defined  = 1;
284

285
            $structure=~s/(?:#(expr\(.+?\)expr|[\w:]+)#)/my $value = $self->_subst_one_hashpair($1, $overriding_hash); $scalar_defined &&= defined($value); $value/eg;
286 287

            return $scalar_defined ? $structure : undef;
288 289
        }

290
    } elsif($ref_type eq 'ARRAY') {
291 292
        my @substituted_array = ();
        foreach my $element (@$structure) {
293
            push @substituted_array, $self->param_substitute($element, $overriding_hash);
294 295
        }
        return \@substituted_array;
296
    } elsif($ref_type eq 'HASH') {
297 298
        my %substituted_hash = ();
        while(my($key,$value) = each %$structure) {
299
            $substituted_hash{$self->param_substitute($key, $overriding_hash)} = $self->param_substitute($value, $overriding_hash);
300 301 302
        }
        return \%substituted_hash;
    } else {
303 304
        warn "Could not substitute parameters in '$structure' - unsupported data type '$ref_type'\n";
        return $structure;
305 306 307 308 309 310 311
    }
}


sub mysql_conn { # an example stringification formatter (others can be defined here or in a descendent of Params)
    my ($self, $db_conn) = @_;

312
    if(ref($db_conn) eq 'HASH') {
313
        return "--host=$db_conn->{-host} --port=$db_conn->{-port} --user='$db_conn->{-user}' --password='$db_conn->{-pass}' $db_conn->{-dbname}";
314 315
    } else {
        my $dbc = go_figure_dbc( $db_conn );
316
        return '--host='.$dbc->host.' --port='.$dbc->port." --user='".$dbc->username."' --password='".$dbc->password."' ".$dbc->dbname;
317
    }
318 319 320 321 322
}

sub mysql_dbname { # another example stringification formatter
    my ($self, $db_conn) = @_;

323 324 325 326 327 328
    if(ref($db_conn) eq 'HASH') {
        return $db_conn->{-dbname};
    } else {
        my $dbc = go_figure_dbc( $db_conn );
        return $dbc->dbname;
    }
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
}

sub csvq { # another example stringification formatter
    my ($self, $list) = @_;

    return join(',', map { "'$_'" } @$list);
}

#--------------------------------------------[private methods]----------------------------------------------

=head2 _subst_one_hashpair
    
    Description: this is a private method that performs one substitution. Called by param_substitute().

=cut

sub _subst_one_hashpair {
346
    my ($self, $inside_hashes, $overriding_hash) = @_;
347

348 349 350 351 352 353
    if($self->{'_substitution_in_progress'}{$inside_hashes}++) {
        die "ParamError: substitution loop among {".join(', ', map {"'$_'"} keys %{$self->{'_substitution_in_progress'}})."} has been detected\n";
    }

    my $value;

354 355
    if($inside_hashes=~/^\w+$/) {

356
        $value =  $self->_param_possibly_overridden($inside_hashes, $overriding_hash);
357 358 359

    } elsif($inside_hashes=~/^(\w+):(\w+)$/) {

360
        $value = $self->$1($self->_param_possibly_overridden($2, $overriding_hash));
361 362 363 364

    } elsif($inside_hashes=~/^expr\((.*)\)expr$/) {

        my $expression = $1;
365 366 367
            # FIXME: the following two lines will have to be switched to drop support for $old_substitution_syntax and stay with #new_substitution_syntax#
        $expression=~s{(?:\$(\w+)|#(\w+)#)}{stringify($self->_param_possibly_overridden($1 // $2, $overriding_hash))}eg;    # substitute-by-value (bulky, but supports old syntax)
#        $expression=~s{(?:#(\w+)#)}{\$self->_param_possibly_overridden('$1', \$overriding_hash)}g;                         # substitute-by-call (no longer supports old syntax)
368

369 370
        $value = eval "return $expression";     # NB: 'return' is needed to protect the hashrefs from being interpreted as scoping blocks
# warn "SOH: #$inside_hashes# becomes $expression and is then evaluated into ".stringify($value)."\n";
371
    }
372 373 374 375 376

    warn "ParamWarning: substituting an undefined value of #$inside_hashes#\n" unless(defined($value));

    delete $self->{'_substitution_in_progress'}{$inside_hashes};
    return $value;
377 378 379
}

1;