Commit d1c1aa95 authored by Matthieu Muffato's avatar Matthieu Muffato
Browse files

Added an option in SystemCmd to parse a file in order to generate dataflow events

parent 789dff18
......@@ -101,6 +101,7 @@ package Bio::EnsEMBL::Hive::Process;
use strict;
use warnings;
use JSON;
use Scalar::Util qw(looks_like_number);
use Bio::EnsEMBL::Hive::Utils ('stringify', 'go_figure_dbc', 'join_command_args');
......@@ -546,6 +547,39 @@ sub dataflow_output_id {
}
=head2 dataflow_output_ids_from_json
Title : dataflow_output_ids_from_json
Arg[1] : File name
Arg[2] : (optional) Branch number, defaults to 1 (see L<AnalysisJob::dataflow_output_id>)
Function: Wrapper around L<dataflow_output_id> that takes the output_ids from a JSON file.
Each line in the JSON file is expected to be a complete JSON structure, which
may be prefixed with a branch number
=cut
sub dataflow_output_ids_from_json {
my ($self, $filename, $default_branch) = @_;
my $json_formatter = JSON->new()->indent(0);
my @output_job_ids;
open(my $fh, '<', $filename) or die "Could not open '$filename' because: $!";
while (my $l = $fh->getline()) {
chomp $l;
my $branch = $default_branch;
my $json = $l;
if ($l =~ /^(-?\d+)\s+(.*)$/) {
$branch = $1;
$json = $2;
}
my $hash = $json_formatter->decode($json);
push @output_job_ids, @{ $self->dataflow_output_id($hash, $branch) };
}
close($fh);
return \@output_job_ids;
}
sub throw {
my $msg = pop @_;
......
......@@ -65,6 +65,8 @@ sub param_defaults {
return_codes_2_branches => {}, # Hash that maps some of the command return codes to branch numbers
'use_bash_pipefail' => 0, # Boolean. When true, the command will be run with "bash -o pipefail -c $cmd". Useful to capture errors in a command that contains pipes
'use_bash_errexit' => 0, # When the command is composed of multiple commands (concatenated with a semi-colon), use "bash -o errexit" so that a failure will interrupt the whole script
'dataflow_file' => undef, # The path to a file that contains 1 line per dataflow event, in the form of a JSON object
'dataflow_branch' => undef, # The default branch for JSON dataflows
}
}
......@@ -105,8 +107,15 @@ sub write_output {
my $self = shift;
my $return_value = $self->param('return_value');
return unless $return_value;
## Success
unless ($return_value) {
# FIXME branch number
$self->dataflow_output_ids_from_json($self->param('dataflow_file'), $self->param('dataflow_branch')) if $self->param('dataflow_file');
return;
}
## Error processing
my $stderr = $self->param('stderr');
my $flat_cmd = $self->param('flat_cmd');
......
......@@ -23,8 +23,10 @@ use Cwd ();
use File::Basename ();
$ENV{'EHIVE_ROOT_DIR'} ||= File::Basename::dirname( File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) ) );
use JSON;
use Test::More;
use Data::Dumper;
use File::Temp qw/tempfile/;
use Bio::EnsEMBL::Hive::Utils qw(stringify);
use Bio::EnsEMBL::Hive::Utils::Test qw(standaloneJob);
......@@ -138,4 +140,40 @@ standaloneJob('Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
);
my $json_formatter = JSON->new()->indent(0);
my $array_of_hashes = [{'key1' => 1}, {"funny\nkey2" => [2,2]}];
my ($fh, $filename) = tempfile(UNLINK => 1);
print $fh $json_formatter->encode($array_of_hashes->[0]), "\n";
print $fh '3 ', $json_formatter->encode($array_of_hashes->[0]), "\n";
print $fh '-1 ', $json_formatter->encode($array_of_hashes->[1]), "\n";
print $fh '1 ', $json_formatter->encode($array_of_hashes), "\n";
close($fh);
standaloneJob('Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
{ 'cmd' => 'sleep 0', 'dataflow_file' => $filename },
[
[
'DATAFLOW',
$array_of_hashes->[0],
undef,
],
[
'DATAFLOW',
$array_of_hashes->[0],
3,
],
[
'DATAFLOW',
$array_of_hashes->[1],
-1,
],
[
'DATAFLOW',
$array_of_hashes,
1,
],
],
);
done_testing();
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment