Utils.pm 12.7 KB
Newer Older
1

Leo Gordon's avatar
Leo Gordon committed
2
=pod
3 4 5

=head1 NAME

6
    Bio::EnsEMBL::Hive::Utils
7

Leo Gordon's avatar
Leo Gordon committed
8
=head1 SYNOPSIS
9

Leo Gordon's avatar
Leo Gordon committed
10 11 12
        # Example of an import:
    use Bio::EnsEMBL::Hive::Utils 'stringify';
    my $input_id_string = stringify($input_id_hash);
13

Leo Gordon's avatar
Leo Gordon committed
14 15 16
        # Example of inheritance:
    use base ('Bio::EnsEMBL::Hive::Utils', ...);
    my $input_id_string = $self->stringify($input_id_hash);
17

Leo Gordon's avatar
Leo Gordon committed
18 19 20
        # Example of a direct call:
    use Bio::EnsEMBL::Hive::Utils;
    my $input_id_string = Bio::EnsEMBL::Hive::Utils::stringify($input_id_hash);
21

Leo Gordon's avatar
Leo Gordon committed
22
=head1 DESCRIPTION
23

24 25 26 27 28 29 30 31 32 33
    This module provides general utility functions that can be used in different contexts through three different calling mechanisms:

        * import:  another module/script can selectively import methods from this module into its namespace

        * inheritance:  another module can inherit from this one and so implicitly acquire the methods into its namespace
        
        * direct call to a module's method:  another module/script can directly call a method from this module prefixed with this module's name

=head1 LICENSE

34
    Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Matthieu Muffato's avatar
Matthieu Muffato committed
35
    Copyright [2016-2018] EMBL-European Bioinformatics Institute
36 37 38

    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at
39

40
         http://www.apache.org/licenses/LICENSE-2.0
41

42 43 44
    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.
45

Leo Gordon's avatar
Leo Gordon committed
46
=head1 CONTACT
47

48
    Please subscribe to the Hive mailing list:  http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users  to discuss Hive-related questions or to be notified of our updates
Leo Gordon's avatar
Leo Gordon committed
49 50
  
=cut
51 52


Leo Gordon's avatar
Leo Gordon committed
53
package Bio::EnsEMBL::Hive::Utils;
54

Leo Gordon's avatar
Leo Gordon committed
55 56 57
use strict;
use warnings;
use Data::Dumper;
58 59
use Bio::EnsEMBL::Hive::Meadow;
use Bio::EnsEMBL::Hive::Valley;
60 61
use Bio::EnsEMBL::Hive::Version;
use Bio::EnsEMBL::Hive::DBSQL::SqlSchemaAdaptor;
62
use Bio::EnsEMBL::Hive::DBSQL::DBConnection;
63

Leo Gordon's avatar
Leo Gordon committed
64
use Exporter 'import';
65
our @EXPORT_OK = qw(stringify destringify dir_revhash parse_cmdline_options find_submodules load_file_or_module script_usage split_for_bash url2dbconn_hash go_figure_dbc report_versions);
66 67


Leo Gordon's avatar
Leo Gordon committed
68
=head2 stringify
69

Leo Gordon's avatar
Leo Gordon committed
70 71
    Description: This function takes in a Perl data structure and stringifies it using specific configuration
                 that allows us to store/recreate this data structure according to our specific storage/communication requirements.
72

Leo Gordon's avatar
Leo Gordon committed
73 74
    Callers    : Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor      # stringification of input_id() hash
                 Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf   # stringification of parameters() hash
75 76 77 78 79 80

=cut

sub stringify {
    my $structure = pop @_;

81 82 83 84 85 86
    local $Data::Dumper::Indent    = 0;         # we want everything on one line
    local $Data::Dumper::Terse     = 1;         # and we want it without dummy variable names
    local $Data::Dumper::Sortkeys  = 1;         # make stringification more deterministic
    local $Data::Dumper::Quotekeys = 1;         # conserve some space
    local $Data::Dumper::Useqq     = 1;         # escape the \n and \t correctly
    local $Data::Dumper::Pair      = ' => ';    # make sure we always produce Perl-parsable structures, no matter what is set externally
87
    local $Data::Dumper::Maxdepth  = 0;         # make sure nobody can mess up stringification by setting a lower Maxdepth
88
    local $Data::Dumper::Deepcopy  = 1;         # avoid self-references in case the same structure is reused within params
89 90 91 92

    return Dumper($structure);
}

Leo Gordon's avatar
Leo Gordon committed
93 94 95 96 97 98
=head2 destringify

    Description: This function takes in a string that may or may not contain a stingified Perl structure.
                 If it seems to contain a hash/array/quoted_string, the contents is evaluated, otherwise it is returned "as is".
                 This function is mainly used to read values from 'meta' table that may represent Perl structures, but generally don't have to.

99
    Callers    : Bio::EnsEMBL::Hive::DBSQL::MetaContainer           # destringification of general 'meta' params
Leo Gordon's avatar
Leo Gordon committed
100 101 102 103 104
                 beekeeper.pl script                                # destringification of the 'pipeline_name' meta param

=cut

sub destringify {
105 106 107
    my $value = pop @_;

    if($value) {
108 109 110 111
        if($value=~/^'.*'$/s
        or $value=~/^".*"$/s
        or $value=~/^{.*}$/s
        or $value=~/^\[.*\]$/s
112
        or $value eq 'undef') {
113 114 115 116 117 118 119 120

            $value = eval($value);
        }
    }

    return $value;
}

121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
=head2 dir_revhash

    Description: This function takes in a string (which is usually a numeric id) and turns its reverse into a multilevel directory hash.
                 Please note that no directory is created at this step - it is purely a string conversion function.

    Callers    : Bio::EnsEMBL::Hive::Worker                 # hashing of the worker output directories
                 Bio::EnsEMBL::Hive::RunnableDB::JobFactory # hashing of an arbitrary id

=cut

sub dir_revhash {
    my $id = pop @_;

    my @dirs = reverse(split(//, $id));
    pop @dirs;  # do not use the first digit for hashing

    return join('/', @dirs);
}

Leo Gordon's avatar
Leo Gordon committed
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158

=head2 parse_cmdline_options

    Description: This function reads all options from command line into a key-value hash
                (keys must be prefixed with a single or double dash, the following term becomes the value).
                The rest of the terms go into the list.
                Command line options are not removed from @ARGV, so the same or another parser can be run again if needed.

    Callers    : scripts

=cut

sub parse_cmdline_options {
    my %pairs = ();
    my @list  = ();

    my $temp_key;

    foreach my $arg (@ARGV) {
159 160
        if($temp_key) {                     # only the value, get the key from buffer
            $pairs{$temp_key} = destringify($arg);
Leo Gordon's avatar
Leo Gordon committed
161
            $temp_key = '';
162 163 164
        } elsif($arg=~/^--?(\w+)=(.+)$/) {  # both the key and the value
            $pairs{$1} = destringify($2);
        } elsif($arg=~/^--?(\w+)$/) {       # only the key, buffer it and expect the value on the next round
Leo Gordon's avatar
Leo Gordon committed
165 166 167 168 169 170 171 172 173
            $temp_key = $1;
        } else {
            push @list, $arg;
        }
    }
    return (\%pairs, \@list);
}


174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
=head2 find_submodules

    Description: This function takes one argument ("prefix" of a module name),
                transforms it into a directory name from the filesystem's point of view
                and finds all module names in these "directories".
                Each module_name found is reported only once,
                even if there are multiple matching files in different directories.

    Callers    : scripts

=cut

sub find_submodules {
    my $prefix = shift @_;

    $prefix=~s{::}{/}g;

    my %seen_module_name = ();

    foreach my $inc (@INC) {
        foreach my $full_module_path (<$inc/$prefix/*.pm>) {
            my $module_name = substr($full_module_path, length($inc)+1, -3);    # remove leading "$inc/" and trailing '.pm'
            $module_name=~s{/}{::}g;                                            # transform back to module_name space

            $seen_module_name{$module_name}++;
        }
    }
    return [ keys %seen_module_name ];
}


Leo Gordon's avatar
Leo Gordon committed
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
=head2 load_file_or_module

    Description: This function takes one argument, tries to determine whether it is a module name ('::'-separated)
                or a path to the module ('/'-separated), finds the module_name and dynamically loads it.

    Callers    : scripts

=cut

sub load_file_or_module {
    my $file_or_module = pop @_;

    my $module_name;

    if( $file_or_module=~/^(\w|::)+$/ ) {

        $module_name = $file_or_module;

    } elsif(-r $file_or_module) {

        if(my $package_line = `grep ^package $file_or_module`) {
            if($package_line=~/^\s*package\s+((?:\w|::)+)\s*;/) {

                $module_name = $1;

            } else {
                warn "Package line format unrecognized:\n$package_line\n";
                script_usage(1);
            }
        } else {
            warn "Could not find the package definition line in '$file_or_module'\n";
            script_usage(1);
        }

    } else {
        warn "The parameter '$file_or_module' neither seems to be a valid module nor a valid readable file\n";
        script_usage(1);
    }

    eval "require $module_name;";
    die $@ if ($@);

    return $module_name;
}


=head2 script_usage

    Description: This function takes one argument (return value).
                It attempts to run perldoc on the current script, and if perldoc is not present, emulates its behaviour.
                Then it exits with the return value given.

    Callers    : scripts

=cut

sub script_usage {
    my $retvalue = pop @_;

    if(`which perldoc`) {
        system('perldoc', $0);
    } else {
        foreach my $line (<main::DATA>) {
            if($line!~s/\=\w+\s?//) {
                $line = "\t$line";
            }
            print $line;
        }
        <main::DATA>;   # this is just to stop the 'used once' warnings
    }
    exit($retvalue);
}


279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
=head2 split_for_bash

    Description: This function takes one argument (String) and splits it assuming it represents bash command line parameters.
                 It mainly splits on whitespace, except for cases when spaces are trapped between quotes or apostrophes.
                 In the latter case the outer quotes are removed.
    Returntype : list of Strings

=cut

sub split_for_bash {
    my $cmd = pop @_;

    my @cmd = ($cmd =~ /((?:".*?"|'.*?'|\S)+)/g);   # split on space except for quoted strings

    foreach my $syll (@cmd) {                       # remove the outer quotes or apostrophes
        if($syll=~/^(\S*?)"(.*?)"(\S*?)$/) {
            $syll = $1 . $2 . $3;
        } elsif($syll=~/^(\S*?)'(.*?)'(\S*?)$/) {
            $syll = $1 . $2 . $3;
        }
    }

    return @cmd;
}


305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
sub url2dbconn_hash {
    my $url = pop @_;

    if( my ($driver, $user, $pass, $host, $port, $dbname) =
        $url =~ m{^(\w*)://(?:(\w+)(?:\:([^/\@]*))?\@)?(?:([\w\-\.]+)(?:\:(\d+))?)?/(\w*)} ) {

        return {
            '-driver' => $driver    || 'mysql',
            '-host'   => $host      || 'localhost',
            '-port'   => $port      || 3306,
            '-user'   => $user      || '',
            '-pass'   => $pass      || '',
            '-dbname' => $dbname,
        };
    } else {
        return 0;
    }
}

324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341

sub go_figure_dbc {
    my ($foo, $schema_type) = @_;

    if(UNIVERSAL::isa($foo, 'Bio::EnsEMBL::DBSQL::DBConnection')) { # already a DBConnection, return it:

        return $foo;

    } elsif(UNIVERSAL::can($foo, 'dbc') and UNIVERSAL::isa($foo->dbc, 'Bio::EnsEMBL::DBSQL::DBConnection')) {

        return $foo->dbc;

    } elsif(UNIVERSAL::can($foo, 'db') and UNIVERSAL::can($foo->db, 'dbc') and UNIVERSAL::isa($foo->db->dbc, 'Bio::EnsEMBL::DBSQL::DBConnection')) { # another data adaptor or Runnable:

        return $foo->db->dbc;

    } elsif(my $db_conn = (ref($foo) eq 'HASH') ? $foo : url2dbconn_hash( $foo ) ) {  # either a hash or a URL that translates into a hash

342
        return Bio::EnsEMBL::Hive::DBSQL::DBConnection->new( %$db_conn );
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358

    } else {
        unless(ref($foo)) {    # maybe it is simply a registry key?
            my $dba;
            eval {
                $schema_type ||= 'hive';
                $dba = Bio::EnsEMBL::Registry->get_DBAdaptor($foo, $schema_type);
            };
            if(UNIVERSAL::can($dba, 'dbc')) {
                return $dba->dbc;
            }
        }
        die "Sorry, could not figure out how to make a DBConnection object out of '$foo'";
    }
}

359 360 361 362

sub report_versions {
    print "CodeVersion\t".Bio::EnsEMBL::Hive::Version->get_code_version()."\n";
    print "CompatibleHiveDatabaseSchemaVersion\t".Bio::EnsEMBL::Hive::DBSQL::SqlSchemaAdaptor->get_code_sql_schema_version()."\n";
363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378

    print "MeadowInterfaceVersion\t".Bio::EnsEMBL::Hive::Meadow->get_meadow_major_version()."\n";
    my $meadow_class_path = Bio::EnsEMBL::Hive::Valley->meadow_class_path;
    foreach my $meadow_class (@{ Bio::EnsEMBL::Hive::Valley->loaded_meadow_drivers }) {
        $meadow_class=~/^${meadow_class_path}::(.+)$/;
        my $meadow_driver   = $1;
        my $meadow_version  = $meadow_class->get_meadow_version;
        my $compatible      = $meadow_class->check_version_compatibility;
        my $status          = $compatible
                                ? ( $meadow_class->name
                                    ? 'available'
                                    : 'unavailable'
                                   )
                                : 'incompatible';
        print '',join("\t", 'Meadow::'.$meadow_driver, $meadow_version, $status)."\n";
    }
379 380
}

381 382
1;