SystemCmd.pm 6.53 KB
Newer Older
1 2 3 4
=pod 

=head1 NAME

5
    Bio::EnsEMBL::Hive::RunnableDB::SystemCmd
6

Leo Gordon's avatar
Leo Gordon committed
7 8
=head1 SYNOPSIS

9
    standaloneJob.pl Bio::EnsEMBL::Hive::RunnableDB::SystemCmd --cmd 'ls -1 ${ENSEMBL_CVS_ROOT_DIR}/ensembl-hive/modules/Bio/EnsEMBL/Hive/RunnableDB/*.pm >building_blocks.list'
Leo Gordon's avatar
Leo Gordon committed
10

11 12
=head1 DESCRIPTION

13
    This RunnableDB module acts as a wrapper for shell-level command lines. If you behave you may also use parameter substitution.
14

15 16
    The command line must be stored in the parameters() as the value corresponding to the 'cmd' key.
    It allows to pass in other parameters and use the parameter substitution mechanism in its full glory.
17 18 19 20 21 22 23 24 25 26 27 28 29

=head1 CONFIGURATION EXAMPLE

    # The following example shows how to configure SystemCmd in a PipeConfig module
    # to create a MySQL snapshot of the Hive database before executing a critical operation.
    #
    # It is a useful incantation when debugging pipelines, similar to setting a breakpoint/savepoint.
    # You will be able to reset your pipeline to the saved point in by un-dumping this file.

        {   -logic_name => 'db_snapshot_before_critical_A',
            -module     => 'Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
            -parameters => {
                'filename'  => $ENV{'HOME'}.'/db_snapshot_before_critical_A',
30
                'cmd'       => $self->db_cmd().' --executable mysqldump > #filename#',
31 32
            },
        },
33

34 35
=head1 LICENSE

36
    Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Brandon Walts's avatar
Brandon Walts committed
37
    Copyright [2016-2020] EMBL-European Bioinformatics Institute
38 39 40 41 42 43 44 45 46 47

    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

         http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.

48
=head1 CONTACT
49

50
    Please subscribe to the Hive mailing list:  http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users  to discuss Hive-related questions or to be notified of our updates
51 52 53

=cut

54

55 56 57
package Bio::EnsEMBL::Hive::RunnableDB::SystemCmd;

use strict;
58
use warnings;
59

60 61
use Bio::EnsEMBL::Hive::Utils qw(join_command_args);

62 63
use Capture::Tiny ':all';

64
use base ('Bio::EnsEMBL::Hive::Process');
65

Leo Gordon's avatar
Leo Gordon committed
66

67 68 69
sub param_defaults {
    return {
        return_codes_2_branches => {},      # Hash that maps some of the command return codes to branch numbers
70
        'use_bash_pipefail' => 0,           # Boolean. When true, the command will be run with "bash -o pipefail -c $cmd". Useful to capture errors in a command that contains pipes
71 72 73 74
    }
}


Leo Gordon's avatar
Leo Gordon committed
75 76 77 78 79
=head2 run

    Description : Implements run() interface method of Bio::EnsEMBL::Hive::Process that is used to perform the main bulk of the job (minus input and output).
                  Here it actually runs the command line.

80 81 82 83 84
    param('cmd'): The recommended way of passing in the command line. It can be either a string, or an array-ref of strings. The later is safer if some of the
                  arguments contain white-spaces.

    param('*'):   Any other parameters can be freely used for parameter substitution.

Leo Gordon's avatar
Leo Gordon committed
85 86
=cut

87 88
sub run {
    my $self = shift;
89
 
90
    my $cmd = $self->param_required('cmd');
91 92
    my ($join_needed, $flat_cmd) = join_command_args($cmd);
    # Let's use the array if possible, it saves us from running a shell
93
    my @cmd_to_run = $self->param('use_bash_pipefail') ? ('bash' => ('-o' => 'pipefail', '-c' => $flat_cmd)) : ($join_needed ? $flat_cmd : (ref($cmd) ? @$cmd : $cmd));
94

95
    if($self->debug()) {
96
        use Data::Dumper;
97 98 99 100
        local $Data::Dumper::Terse = 1;
        local $Data::Dumper::Indent = 0;
        warn "Command given: ", Dumper($cmd), "\n";
        warn "Command to run: ", Dumper(\@cmd_to_run), "\n";
101 102
    }

Leo Gordon's avatar
Leo Gordon committed
103
    $self->dbc and $self->dbc->disconnect_when_inactive(1);    # release this connection for the duration of system() call
104 105
    my $return_value;
    my $stderr = tee_stderr {
106
        $return_value = system(@cmd_to_run);
107 108
    };
    $self->dbc and $self->dbc->disconnect_when_inactive(0);    # allow the worker to keep the connection open again
109

110 111 112 113
    # To be used in write_output()
    $self->param('return_value', $return_value);
    $self->param('stderr', $stderr);
    $self->param('flat_cmd', $flat_cmd);
114 115
}

Leo Gordon's avatar
Leo Gordon committed
116

Leo Gordon's avatar
Leo Gordon committed
117 118 119
=head2 write_output

    Description : Implements write_output() interface method of Bio::EnsEMBL::Hive::Process that is used to deal with job's output after the execution.
120
                  Here we take actions based on the command's exit status.
121

Leo Gordon's avatar
Leo Gordon committed
122 123 124
=cut

sub write_output {
125 126 127 128 129 130
    my $self = shift;

    my $return_value = $self->param('return_value');
    my $stderr = $self->param('stderr');
    my $flat_cmd = $self->param('flat_cmd');

131 132 133 134
    # Lower 8 bits indicate the process has been killed and did not complete.
    if ($return_value & 255) {
        # It can happen because of a MEMLIMIT / RUNLIMIT, which we
        # know are not atomic. The best is to wait a bit that LSF kills
135 136
        # the worker too
        sleep 30;
137
        # If we reach this point, it was killed for another reason.
138 139 140
        die sprintf( "'%s' was killed with code=%d\nstderr is: %s\n", $flat_cmd, $return_value, $stderr);

    } elsif ($return_value) {
141
        # "Normal" process exit with a non-zero code (in the upper 8 bits)
142
        $return_value >>= 8;
143 144 145 146 147 148 149 150 151

        # We create a dataflow event depending on the exit code of the process.
        if (exists $self->param('return_codes_2_branches')->{$return_value}) {
            my $branch_number = $self->param('return_codes_2_branches')->{$return_value};
            $self->dataflow_output_id( $self->input_id, $branch_number );
            $self->input_job->autoflow(0);
            $self->complete_early(sprintf("The command exited with code %d, which is mapped to a dataflow on branch #%d.\n", $return_value, $branch_number));
        }

152
        if ($stderr =~ /Exception in thread ".*" java.lang.OutOfMemoryError: Java heap space at/) {
153 154 155 156 157 158 159
            my $job_ids = $self->dataflow_output_id( $self->input_id, -1 );
            if (scalar(@$job_ids)) {
                $self->input_job->autoflow(0);
                $self->complete_early("Java heap space is out of memory. A job has been dataflown to the -1 branch.\n");
            } else {
                die $stderr;
            }
160 161
        }

162 163
        die sprintf( "'%s' resulted in an error code=%d\nstderr is: %s\n", $flat_cmd, $return_value, $stderr);
    }
164 165 166
}

1;