Utils.pm 10.3 KB
Newer Older
Leo Gordon's avatar
Leo Gordon committed
1
=pod
2 3 4

=head1 NAME

5
    Bio::EnsEMBL::Hive::Utils
6

Leo Gordon's avatar
Leo Gordon committed
7
=head1 SYNOPSIS
8

Leo Gordon's avatar
Leo Gordon committed
9 10 11
        # Example of an import:
    use Bio::EnsEMBL::Hive::Utils 'stringify';
    my $input_id_string = stringify($input_id_hash);
12

Leo Gordon's avatar
Leo Gordon committed
13 14 15
        # Example of inheritance:
    use base ('Bio::EnsEMBL::Hive::Utils', ...);
    my $input_id_string = $self->stringify($input_id_hash);
16

Leo Gordon's avatar
Leo Gordon committed
17 18 19
        # Example of a direct call:
    use Bio::EnsEMBL::Hive::Utils;
    my $input_id_string = Bio::EnsEMBL::Hive::Utils::stringify($input_id_hash);
20

Leo Gordon's avatar
Leo Gordon committed
21
=head1 DESCRIPTION
22

23 24 25 26 27 28 29 30 31 32
    This module provides general utility functions that can be used in different contexts through three different calling mechanisms:

        * import:  another module/script can selectively import methods from this module into its namespace

        * inheritance:  another module can inherit from this one and so implicitly acquire the methods into its namespace
        
        * direct call to a module's method:  another module/script can directly call a method from this module prefixed with this module's name

=head1 LICENSE

33
    Copyright [1999-2014] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
34 35 36

    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at
37

38
         http://www.apache.org/licenses/LICENSE-2.0
39

40 41 42
    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.
43

Leo Gordon's avatar
Leo Gordon committed
44
=head1 CONTACT
45

46
    Please contact ehive-users@ebi.ac.uk mailing list with questions/suggestions.
Leo Gordon's avatar
Leo Gordon committed
47 48
  
=cut
49 50


Leo Gordon's avatar
Leo Gordon committed
51
package Bio::EnsEMBL::Hive::Utils;
52

Leo Gordon's avatar
Leo Gordon committed
53 54 55
use strict;
use warnings;
use Data::Dumper;
56
use Bio::EnsEMBL::Hive::DBSQL::DBConnection;
57

Leo Gordon's avatar
Leo Gordon committed
58
use Exporter 'import';
59
our @EXPORT_OK = qw(stringify destringify dir_revhash parse_cmdline_options find_submodules load_file_or_module script_usage url2dbconn_hash go_figure_dbc);
60 61


Leo Gordon's avatar
Leo Gordon committed
62
=head2 stringify
63

Leo Gordon's avatar
Leo Gordon committed
64 65
    Description: This function takes in a Perl data structure and stringifies it using specific configuration
                 that allows us to store/recreate this data structure according to our specific storage/communication requirements.
66

Leo Gordon's avatar
Leo Gordon committed
67 68
    Callers    : Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor      # stringification of input_id() hash
                 Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf   # stringification of parameters() hash
69 70 71 72 73 74

=cut

sub stringify {
    my $structure = pop @_;

75 76 77 78 79 80
    local $Data::Dumper::Indent    = 0;         # we want everything on one line
    local $Data::Dumper::Terse     = 1;         # and we want it without dummy variable names
    local $Data::Dumper::Sortkeys  = 1;         # make stringification more deterministic
    local $Data::Dumper::Quotekeys = 1;         # conserve some space
    local $Data::Dumper::Useqq     = 1;         # escape the \n and \t correctly
    local $Data::Dumper::Pair      = ' => ';    # make sure we always produce Perl-parsable structures, no matter what is set externally
81
    local $Data::Dumper::Maxdepth  = undef;     # make sure nobody can mess up stringification by setting a lower Maxdepth
82 83 84 85

    return Dumper($structure);
}

Leo Gordon's avatar
Leo Gordon committed
86 87 88 89 90 91
=head2 destringify

    Description: This function takes in a string that may or may not contain a stingified Perl structure.
                 If it seems to contain a hash/array/quoted_string, the contents is evaluated, otherwise it is returned "as is".
                 This function is mainly used to read values from 'meta' table that may represent Perl structures, but generally don't have to.

92
    Callers    : Bio::EnsEMBL::Hive::DBSQL::MetaContainer           # destringification of general 'meta' params
Leo Gordon's avatar
Leo Gordon committed
93 94 95 96 97
                 beekeeper.pl script                                # destringification of the 'pipeline_name' meta param

=cut

sub destringify {
98 99 100 101 102 103
    my $value = pop @_;

    if($value) {
        if($value=~/^'.*'$/
        or $value=~/^".*"$/
        or $value=~/^{.*}$/
104 105
        or $value=~/^\[.*\]$/
        or $value eq 'undef') {
106 107 108 109 110 111 112 113

            $value = eval($value);
        }
    }

    return $value;
}

114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
=head2 dir_revhash

    Description: This function takes in a string (which is usually a numeric id) and turns its reverse into a multilevel directory hash.
                 Please note that no directory is created at this step - it is purely a string conversion function.

    Callers    : Bio::EnsEMBL::Hive::Worker                 # hashing of the worker output directories
                 Bio::EnsEMBL::Hive::RunnableDB::JobFactory # hashing of an arbitrary id

=cut

sub dir_revhash {
    my $id = pop @_;

    my @dirs = reverse(split(//, $id));
    pop @dirs;  # do not use the first digit for hashing

    return join('/', @dirs);
}

Leo Gordon's avatar
Leo Gordon committed
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151

=head2 parse_cmdline_options

    Description: This function reads all options from command line into a key-value hash
                (keys must be prefixed with a single or double dash, the following term becomes the value).
                The rest of the terms go into the list.
                Command line options are not removed from @ARGV, so the same or another parser can be run again if needed.

    Callers    : scripts

=cut

sub parse_cmdline_options {
    my %pairs = ();
    my @list  = ();

    my $temp_key;

    foreach my $arg (@ARGV) {
152 153
        if($temp_key) {                     # only the value, get the key from buffer
            $pairs{$temp_key} = destringify($arg);
Leo Gordon's avatar
Leo Gordon committed
154
            $temp_key = '';
155 156 157
        } elsif($arg=~/^--?(\w+)=(.+)$/) {  # both the key and the value
            $pairs{$1} = destringify($2);
        } elsif($arg=~/^--?(\w+)$/) {       # only the key, buffer it and expect the value on the next round
Leo Gordon's avatar
Leo Gordon committed
158 159 160 161 162 163 164 165 166
            $temp_key = $1;
        } else {
            push @list, $arg;
        }
    }
    return (\%pairs, \@list);
}


167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
=head2 find_submodules

    Description: This function takes one argument ("prefix" of a module name),
                transforms it into a directory name from the filesystem's point of view
                and finds all module names in these "directories".
                Each module_name found is reported only once,
                even if there are multiple matching files in different directories.

    Callers    : scripts

=cut

sub find_submodules {
    my $prefix = shift @_;

    $prefix=~s{::}{/}g;

    my %seen_module_name = ();

    foreach my $inc (@INC) {
        foreach my $full_module_path (<$inc/$prefix/*.pm>) {
            my $module_name = substr($full_module_path, length($inc)+1, -3);    # remove leading "$inc/" and trailing '.pm'
            $module_name=~s{/}{::}g;                                            # transform back to module_name space

            $seen_module_name{$module_name}++;
        }
    }
    return [ keys %seen_module_name ];
}


Leo Gordon's avatar
Leo Gordon committed
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
=head2 load_file_or_module

    Description: This function takes one argument, tries to determine whether it is a module name ('::'-separated)
                or a path to the module ('/'-separated), finds the module_name and dynamically loads it.

    Callers    : scripts

=cut

sub load_file_or_module {
    my $file_or_module = pop @_;

    my $module_name;

    if( $file_or_module=~/^(\w|::)+$/ ) {

        $module_name = $file_or_module;

    } elsif(-r $file_or_module) {

        if(my $package_line = `grep ^package $file_or_module`) {
            if($package_line=~/^\s*package\s+((?:\w|::)+)\s*;/) {

                $module_name = $1;

            } else {
                warn "Package line format unrecognized:\n$package_line\n";
                script_usage(1);
            }
        } else {
            warn "Could not find the package definition line in '$file_or_module'\n";
            script_usage(1);
        }

    } else {
        warn "The parameter '$file_or_module' neither seems to be a valid module nor a valid readable file\n";
        script_usage(1);
    }

    eval "require $module_name;";
    die $@ if ($@);

    return $module_name;
}


=head2 script_usage

    Description: This function takes one argument (return value).
                It attempts to run perldoc on the current script, and if perldoc is not present, emulates its behaviour.
                Then it exits with the return value given.

    Callers    : scripts

=cut

sub script_usage {
    my $retvalue = pop @_;

    if(`which perldoc`) {
        system('perldoc', $0);
    } else {
        foreach my $line (<main::DATA>) {
            if($line!~s/\=\w+\s?//) {
                $line = "\t$line";
            }
            print $line;
        }
        <main::DATA>;   # this is just to stop the 'used once' warnings
    }
    exit($retvalue);
}


272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
sub url2dbconn_hash {
    my $url = pop @_;

    if( my ($driver, $user, $pass, $host, $port, $dbname) =
        $url =~ m{^(\w*)://(?:(\w+)(?:\:([^/\@]*))?\@)?(?:([\w\-\.]+)(?:\:(\d+))?)?/(\w*)} ) {

        return {
            '-driver' => $driver    || 'mysql',
            '-host'   => $host      || 'localhost',
            '-port'   => $port      || 3306,
            '-user'   => $user      || '',
            '-pass'   => $pass      || '',
            '-dbname' => $dbname,
        };
    } else {
        return 0;
    }
}

291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308

sub go_figure_dbc {
    my ($foo, $schema_type) = @_;

    if(UNIVERSAL::isa($foo, 'Bio::EnsEMBL::DBSQL::DBConnection')) { # already a DBConnection, return it:

        return $foo;

    } elsif(UNIVERSAL::can($foo, 'dbc') and UNIVERSAL::isa($foo->dbc, 'Bio::EnsEMBL::DBSQL::DBConnection')) {

        return $foo->dbc;

    } elsif(UNIVERSAL::can($foo, 'db') and UNIVERSAL::can($foo->db, 'dbc') and UNIVERSAL::isa($foo->db->dbc, 'Bio::EnsEMBL::DBSQL::DBConnection')) { # another data adaptor or Runnable:

        return $foo->db->dbc;

    } elsif(my $db_conn = (ref($foo) eq 'HASH') ? $foo : url2dbconn_hash( $foo ) ) {  # either a hash or a URL that translates into a hash

309
        return Bio::EnsEMBL::Hive::DBSQL::DBConnection->new( %$db_conn );
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325

    } else {
        unless(ref($foo)) {    # maybe it is simply a registry key?
            my $dba;
            eval {
                $schema_type ||= 'hive';
                $dba = Bio::EnsEMBL::Registry->get_DBAdaptor($foo, $schema_type);
            };
            if(UNIVERSAL::can($dba, 'dbc')) {
                return $dba->dbc;
            }
        }
        die "Sorry, could not figure out how to make a DBConnection object out of '$foo'";
    }
}

326 327
1;