Commit fb0ab74a authored by Leo Gordon's avatar Leo Gordon
Browse files

initial implementation of conditional dataflow

parent 4f0433b8
......@@ -97,6 +97,7 @@ You can toggle the display of individual columns using [Show/Hide columns] butto
<li style="margin-right:0px"><a href="#analysis_base" style="text-decoration:none;font-weight:bold">analysis_base</a></li>
<li style="margin-right:0px"><a href="#analysis_stats" style="text-decoration:none;font-weight:bold">analysis_stats</a></li>
<li style="margin-right:0px"><a href="#dataflow_rule" style="text-decoration:none;font-weight:bold">dataflow_rule</a></li>
<li style="margin-right:0px"><a href="#dataflow_target" style="text-decoration:none;font-weight:bold">dataflow_target</a></li>
<li style="margin-right:0px"><a href="#analysis_ctrl_rule" style="text-decoration:none;font-weight:bold">analysis_ctrl_rule</a></li>
</ul>
......@@ -299,17 +300,40 @@ You can toggle the display of individual columns using [Show/Hide columns] butto
</div>
<div style="clear:both"></div>
</div>
<p style="padding:5px 0px;margin-bottom:0px;width:800px">Extension of simple_rule design except that goal(to) is now in extended URL format e.g. mysql://ensadmin:<pass>@ecs2:3361/compara_hive_test?analysis.logic_name='blast_NCBI34' (full network address of an analysis). The only requirement is that there are rows in the job, analysis, dataflow_rule, and worker tables so that the following join works on the same database WHERE analysis.analysis_id = dataflow_rule.from_analysis_id AND analysis.analysis_id = job.analysis_id AND analysis.analysis_id = worker.analysis_id These are the rules used to create entries in the job table where the input_id (control data) is passed from one analysis to the next to define work. The analysis table will be extended so that it can specify different read and write databases, with the default being the database the analysis is on</p>
<p style="padding:5px 0px;margin-bottom:0px;width:800px">Each entry of this table defines a starting point for dataflow (via from_analysis_id and branch_code) to which point a group of dataflow_target entries can be linked. This grouping is used in two ways: (1) dataflow_target entries that link into the same dataflow_rule share the same from_analysis, branch_code and funnel_dataflow_rule (2) to define the conditions for DEFAULT or ELSE case (via excluding all conditions explicitly listed in the group)</p>
<div id="div_dataflow_rule" style="display:none">
<table style="border:1px solid #667aa6;padding:0px;min-width:1000px;max-width:1200px">
<tr class="center" style="color:#FFFFFF;background-color:#667aa6"><th style="color:#FFF;padding:2px">Column</th><th style="color:#FFF;padding:2px">Type</th><th style="color:#FFF;padding:2px;min-width:80px">Default value</th><th style="color:#FFF;padding:2px;min-width:500px">Description</th><th style="color:#FFF;padding:2px;min-width:100px">Index</th></tr>
<tr class="bg1"><td><b>dataflow_rule_id</b></td><td>INTEGER</td><td>-</td><td>internal ID</td><td></td></tr>
<tr class="bg2"><td><b>from_analysis_id</b></td><td>INTEGER</td><td>-</td><td>foreign key to analysis table analysis_id</td><td>unique: key</td></tr>
<tr class="bg1"><td><b>branch_code</b></td><td>INTEGER</td><td>1</td><td>branch_code of the fan</td><td>unique: key</td></tr>
<tr class="bg2"><td><b>funnel_dataflow_rule_id</b></td><td>INTEGER</td><td>NULL</td><td>dataflow_rule_id of the semaphored funnel (is NULL by default, which means dataflow is not semaphored)</td><td>unique: key</td></tr>
<tr class="bg1"><td><b>to_analysis_url</b></td><td>VARCHAR(255)</td><td>''</td><td>foreign key to net distributed analysis logic_name reference</td><td>unique: key</td></tr>
<tr class="bg2"><td><b>input_id_template</b></td><td>MEDIUMTEXT</td><td>NULL</td><td>a template for generating a new input_id (not necessarily a hashref) in this dataflow; if undefined is kept original</td><td>unique: key</td></tr>
<tr class="bg2"><td><b>from_analysis_id</b></td><td>INTEGER</td><td>-</td><td>foreign key to analysis table analysis_id</td><td></td></tr>
<tr class="bg1"><td><b>branch_code</b></td><td>INTEGER</td><td>1</td><td>branch_code of the fan</td><td></td></tr>
<tr class="bg2"><td><b>funnel_dataflow_rule_id</b></td><td>INTEGER</td><td>NULL</td><td>dataflow_rule_id of the semaphored funnel (is NULL by default, which means dataflow is not semaphored)</td><td></td></tr>
</table>
</div>
<div id="dataflow_target" style="width:850px;background-color:#F4F4F4;border-bottom:1px solid #BBB;margin-top:60px;margin-bottom:2px;padding:4px;border-top:1px solid #C70C09">
<div style="float:left;text-align:left;font-size:11pt;font-weight:bold;color:#000;padding:2px 1px">
<span style="display:inline-block;height:10px;width:10px;border-radius:5px;margin-right:5px;background-color:#C70C09;box-shadow:1px 1px 2px #888;vertical-align:middle"></span>dataflow_target</div>
<div style="float:right;text-align:right;padding:2px 1px">
<a id="a_dataflow_target" class="help-header" style="cursor:pointer;font-weight:bold;border-radius:5px;background-color:#FFF;border:1px solid #667aa6;padding:1px 2px;margin-right:5px" onclick="show_hide('dataflow_target','columns')">
<img src="http://www.ensembl.org/i/16/plus-button.png" style="width:12px;height:12px;position:relative;top:2px" alt="show"/> Show columns
</a>
<span style="margin-right:5px;border-right:1px solid #000"> </span> <a href="#top" style="text-decoration:none">[Back to top]</a>
</div>
<div style="clear:both"></div>
</div>
<p style="padding:5px 0px;margin-bottom:0px;width:800px">This table links specific conditions with the target object (Analysis/Table/Accu) and optional input_id_template.</p>
<div id="div_dataflow_target" style="display:none">
<table style="border:1px solid #667aa6;padding:0px;min-width:1000px;max-width:1200px">
<tr class="center" style="color:#FFFFFF;background-color:#667aa6"><th style="color:#FFF;padding:2px">Column</th><th style="color:#FFF;padding:2px">Type</th><th style="color:#FFF;padding:2px;min-width:80px">Default value</th><th style="color:#FFF;padding:2px;min-width:500px">Description</th><th style="color:#FFF;padding:2px;min-width:100px">Index</th></tr>
<tr class="bg1"><td><b>source_dataflow_rule_id</b></td><td>INTEGER</td><td>-</td><td>foreign key to the dataflow_rule object that defines grouping (see description of dataflow_rule table)</td><td>unique: key</td></tr>
<tr class="bg2"><td><b>on_condition</b></td><td>VARCHAR(255)</td><td>NULL</td><td>param-substitutable string evaluated at the moment of dataflow event that defines whether or not this case produces any dataflow; NULL means DEFAULT or ELSE</td><td>unique: key</td></tr>
<tr class="bg1"><td><b>input_id_template</b></td><td>MEDIUMTEXT</td><td>NULL</td><td>a template for generating a new input_id (not necessarily a hashref) in this dataflow; if undefined is kept original</td><td>unique: key</td></tr>
<tr class="bg2"><td><b>to_analysis_url</b></td><td>VARCHAR(255)</td><td>''</td><td>the URL of the dataflow target object (Analysis/Table/Accu)</td><td>unique: key</td></tr>
</table>
</div>
......@@ -421,7 +445,7 @@ You can toggle the display of individual columns using [Show/Hide columns] butto
<tr class="bg1"><td><b>param_id_stack</b></td><td>CHAR(64)</td><td>''</td><td>a CSV of job_ids whose input_ids contribute to the stack of local variables for the job</td><td>unique key: input_id_stacks_analysis</td></tr>
<tr class="bg2"><td><b>accu_id_stack</b></td><td>CHAR(64)</td><td>''</td><td>a CSV of job_ids whose accu's contribute to the stack of local variables for the job</td><td>unique key: input_id_stacks_analysis</td></tr>
<tr class="bg1"><td><b>role_id</b></td><td>INTEGER</td><td>NULL</td><td>links to the Role that claimed this job (NULL means it has never been claimed)</td><td>key: role_status</td></tr>
<tr class="bg2"><td><b>status</b></td><td>ENUM('SEMAPHORED','READY','CLAIMED','COMPILATION','PRE_CLEANUP','FETCH_INPUT','RUN','WRITE_OUTPUT','POST_CLEANUP','DONE','FAILED','PASSED_ON')</td><td>'READY'</td><td>state the job is in</td><td>key: analysis_status_retry<br />key: role_status</td></tr>
<tr class="bg2"><td><b>status</b></td><td>CHAR(32)</td><td>'READY'</td><td>state the job is in</td><td>key: analysis_status_retry<br />key: role_status</td></tr>
<tr class="bg1"><td><b>retry_count</b></td><td>INTEGER</td><td>0</td><td>number times job had to be reset when worker failed to run it</td><td>key: analysis_status_retry</td></tr>
<tr class="bg2"><td><b>when_completed</b></td><td>TIMESTAMP</td><td>NULL</td><td>when the job was completed</td><td></td></tr>
<tr class="bg1"><td><b>runtime_msec</b></td><td>INTEGER</td><td>NULL</td><td>how long did it take to execute the job (or until the moment it failed)</td><td></td></tr>
......@@ -538,7 +562,7 @@ You can toggle the display of individual columns using [Show/Hide columns] butto
<tr class="bg2"><td><b>process_id</b></td><td>VARCHAR(255)</td><td>-</td><td>identifies the Worker process on the Meadow (for 'LOCAL' is the OS PID)</td><td>key: meadow_process</td></tr>
<tr class="bg1"><td><b>resource_class_id</b></td><td>INTEGER</td><td>NULL</td><td>links to Worker's resource class</td><td></td></tr>
<tr class="bg2"><td><b>work_done</b></td><td>INTEGER</td><td>0</td><td>how many jobs the Worker has completed successfully</td><td></td></tr>
<tr class="bg1"><td><b>status</b></td><td>ENUM('SPECIALIZATION','COMPILATION','READY','PRE_CLEANUP','FETCH_INPUT','RUN','WRITE_OUTPUT','POST_CLEANUP','DEAD')</td><td>'READY'</td><td>current status of the Worker</td><td></td></tr>
<tr class="bg1"><td><b>status</b></td><td>VARCHAR(255)</td><td>'READY'</td><td>current status of the Worker</td><td></td></tr>
<tr class="bg2"><td><b>when_born</b></td><td>TIMESTAMP</td><td>CURRENT_TIMESTAMP</td><td>when the Worker process was started</td><td></td></tr>
<tr class="bg1"><td><b>when_checked_in</b></td><td>TIMESTAMP</td><td>NULL</td><td>when the Worker last checked into the database</td><td></td></tr>
<tr class="bg2"><td><b>when_seen</b></td><td>TIMESTAMP</td><td>NULL</td><td>when the Worker was last seen by the Meadow</td><td></td></tr>
......
No preview for this file type
docs/hive_schema.png

263 KB | W: | H:

docs/hive_schema.png

277 KB | W: | H:

docs/hive_schema.png
docs/hive_schema.png
docs/hive_schema.png
docs/hive_schema.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -262,20 +262,22 @@ sub get_grouped_dataflow_rules {
foreach my $dfr ( sort { ($b->funnel_dataflow_rule // 0) <=> ($a->funnel_dataflow_rule // 0) } @{$self->dataflow_rules_collection}) {
if(my $funnel_dfr = $dfr->funnel_dataflow_rule) {
unless($set_of_groups{$funnel_dfr}) {
if( $funnel_dfr->to_analysis->isa('Bio::EnsEMBL::Hive::Analysis') ) {
$set_of_groups{$funnel_dfr} = [$funnel_dfr, []];
} else {
throw("A funnel target must be an Analysis");
unless($set_of_groups{$funnel_dfr}) { # both the type check and the initial push will only be done once per funnel
foreach my $df_target (@{ $funnel_dfr->get_my_targets }) {
unless($df_target->to_analysis->isa('Bio::EnsEMBL::Hive::Analysis')) {
throw("Each conditional branch of a semaphored funnel rule must point at an Analysis");
}
}
$set_of_groups{$funnel_dfr} = [$funnel_dfr, []];
}
my $this_group = $set_of_groups{$funnel_dfr};
if( $dfr->to_analysis->isa('Bio::EnsEMBL::Hive::Analysis') ) {
push @{$this_group->[1]}, $dfr;
} else {
throw("A semaphored fan target must be an Analysis");
foreach my $df_target (@{ $dfr->get_my_targets }) {
unless($df_target->to_analysis->isa('Bio::EnsEMBL::Hive::Analysis')) {
throw("Each conditional branch of a semaphored fan rule must point at an Analysis");
}
}
push @{$this_group->[1]}, $dfr;
} else {
$set_of_groups{$dfr} ||= [$dfr, []];
}
......
......@@ -313,18 +313,47 @@ sub dataflow_output_id {
# fan rules come sorted before funnel rules for the same branch_code:
foreach my $df_rule ( @{ $self->analysis->dataflow_rules_by_branch->{$branch_code} || [] } ) {
my $targets_grouped_by_condition = $df_rule->get_my_targets_grouped_by_condition; # the pairs are deliberately ordered to put the DEFAULT branch last
my @conditions = map { $_->[0] } @$targets_grouped_by_condition;
foreach my $output_id (@$output_ids) { # filter the output_ids and place them into the [2] part of $targets_grouped_by_condition
my $condition_match_count = 0;
foreach my $condition_idx (0..@conditions-1) {
my $unsubstituted_condition = $conditions[$condition_idx];
if(defined($unsubstituted_condition)) {
if(my $substituted_condition = $self->param_substitute($unsubstituted_condition, $output_id)) {
$condition_match_count++;
} else {
next; # non-DEFAULT condition branch failed
}
} elsif($condition_match_count) {
next; # DEFAULT condition branch failed, because one of the conditions fired
}
push @{$targets_grouped_by_condition->[$condition_idx][2]}, $output_id;
}
}
foreach my $triple (@$targets_grouped_by_condition) {
my ($unsubstituted_condition, $df_targets, $filtered_output_ids) = @$triple;
foreach my $df_target (@$df_targets) {
# parameter substitution into input_id_template is rule-specific
my $output_ids_for_this_rule;
if(my $template_string = $df_rule->input_id_template()) {
if(my $template_string = $df_target->input_id_template()) {
my $template_hash = destringify($template_string);
$output_ids_for_this_rule = [ map { $self->param_substitute($template_hash, $_) } @$output_ids ];
$output_ids_for_this_rule = [ map { $self->param_substitute($template_hash, $_) } @$filtered_output_ids ];
} else {
$output_ids_for_this_rule = $output_ids;
$output_ids_for_this_rule = $filtered_output_ids;
}
my ($stored_listref) = $df_rule->to_analysis->dataflow( $output_ids_for_this_rule, $self, \@common_job_params, $df_rule );
my ($stored_listref) = $df_target->to_analysis->dataflow( $output_ids_for_this_rule, $self, \@common_job_params, $df_rule );
push @output_job_ids, @$stored_listref;
} # /foreach my $df_target
} # /foreach my $unsubstituted_condition
} # /foreach my $df_rule
return \@output_job_ids;
......
......@@ -54,6 +54,7 @@ use Bio::EnsEMBL::Hive::Analysis;
use Bio::EnsEMBL::Hive::AnalysisStats;
use Bio::EnsEMBL::Hive::AnalysisCtrlRule;
use Bio::EnsEMBL::Hive::DataflowRule;
use Bio::EnsEMBL::Hive::DataflowTarget;
sub new {
......@@ -221,6 +222,7 @@ our %adaptor_type_2_package_name = (
'AnalysisJob' => 'Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor',
'AnalysisStats' => 'Bio::EnsEMBL::Hive::DBSQL::AnalysisStatsAdaptor',
'DataflowRule' => 'Bio::EnsEMBL::Hive::DBSQL::DataflowRuleAdaptor',
'DataflowTarget' => 'Bio::EnsEMBL::Hive::DBSQL::DataflowTargetAdaptor',
'LogMessage' => 'Bio::EnsEMBL::Hive::DBSQL::LogMessageAdaptor',
'Meta' => 'Bio::EnsEMBL::Hive::DBSQL::MetaAdaptor',
'PipelineWideParameters'=> 'Bio::EnsEMBL::Hive::DBSQL::PipelineWideParametersAdaptor',
......
......@@ -44,6 +44,11 @@ use Bio::EnsEMBL::Hive::DataflowRule;
use base ('Bio::EnsEMBL::Hive::DBSQL::ObjectAdaptor');
sub check_object_present_in_db_by_content {
return 0;
}
sub default_table_name {
return 'dataflow_rule';
}
......
=pod
=head1 NAME
Bio::EnsEMBL::Hive::DBSQL::DataflowTargetAdaptor
=head1 SYNOPSIS
$dataflow_target_adaptor = $db_adaptor->get_DataflowTargetAdaptor;
$dataflow_target_adaptor = $dataflowRuleObj->adaptor;
=head1 DESCRIPTION
Module to encapsulate all db access for persistent class DataflowTarget.
There should be just one per application and database connection.
=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
=head1 CONTACT
Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
=cut
package Bio::EnsEMBL::Hive::DBSQL::DataflowTargetAdaptor;
use strict;
use warnings;
use Bio::EnsEMBL::Hive::Utils ('stringify');
use Bio::EnsEMBL::Hive::DataflowTarget;
use base ('Bio::EnsEMBL::Hive::DBSQL::ObjectAdaptor');
sub default_table_name {
return 'dataflow_target';
}
sub default_insertion_method {
return 'INSERT';
}
sub object_class {
return 'Bio::EnsEMBL::Hive::DataflowTarget';
}
1;
......@@ -12,9 +12,6 @@
dataflow_rule_id int(10) unsigned NOT NULL AUTO_INCREMENT,
from_analysis_id int(10) unsigned NOT NULL,
branch_code int(10) default 1 NOT NULL,
funnel_dataflow_rule_id int(10) unsigned default NULL,
to_analysis_url varchar(255) default '' NOT NULL,
input_id_template TEXT DEFAULT NULL,
PRIMARY KEY (dataflow_rule_id),
UNIQUE (from_analysis_id, to_analysis_url)
......@@ -54,14 +51,13 @@ package Bio::EnsEMBL::Hive::DataflowRule;
use strict;
use warnings;
use Bio::EnsEMBL::Hive::Utils ('stringify', 'throw');
use Bio::EnsEMBL::Hive::TheApiary;
use base ( 'Bio::EnsEMBL::Hive::Cacheable', 'Bio::EnsEMBL::Hive::Storable' );
sub unikey { # override the default from Cacheable parent
return [ 'from_analysis', 'to_analysis_url', 'branch_code', 'funnel_dataflow_rule', 'input_id_template' ];
sub unikey {
return undef; # unfortunately, this object is no longer unique
}
......@@ -91,75 +87,23 @@ sub branch_code {
}
=head2 input_id_template
Function: getter/setter method for the input_id_template of the dataflow rule
=cut
sub input_id_template {
sub get_my_targets {
my $self = shift @_;
if(@_) {
my $input_id_template = shift @_;
$self->{'_input_id_template'} = (ref($input_id_template) ? stringify($input_id_template) : $input_id_template),
}
return $self->{'_input_id_template'};
return $self->hive_pipeline->collection_of( 'DataflowTarget' )->find_all_by('source_dataflow_rule', $self);
}
=head2 to_analysis_url
Arg[1] : (optional) string $url
Usage : $self->to_analysis_url($url);
Function: Get/set method for the 'to' analysis objects URL for this rule
Returns : string
=cut
sub to_analysis_url {
sub get_my_targets_grouped_by_condition {
my $self = shift @_;
if(@_) {
$self->{'_to_analysis_url'} = shift @_;
if( $self->{'_to_analysis'} ) {
$self->{'_to_analysis'} = undef;
}
} elsif( !$self->{'_to_analysis_url'} and my $target_object=$self->{'_to_analysis'} ) {
my $ref_dba = $self->from_analysis && $self->from_analysis->adaptor && $self->from_analysis->adaptor->db;
$self->{'_to_analysis_url'} = $target_object->url( $ref_dba ); # the URL may be shorter if DBA is the same for source and target
}
return $self->{'_to_analysis_url'};
}
=head2 to_analysis
Usage : $self->to_analysis($analysis);
Function: Get/set method for the goal analysis object of this rule.
Returns : Bio::EnsEMBL::Hive::Analysis
Args : Bio::EnsEMBL::Hive::Analysis
=cut
sub to_analysis {
my ($self, $target_object) = @_;
if( defined $target_object ) {
unless ($target_object->can('url')) {
throw( "to_analysis arg must support 'url' method, '$target_object' does not know how to do it");
}
$self->{'_to_analysis'} = $target_object;
}
if( !$self->{'_to_analysis'} and my $to_analysis_url = $self->to_analysis_url ) { # lazy-load through TheApiary
$self->{'_to_analysis'} = Bio::EnsEMBL::Hive::TheApiary->find_by_url( $to_analysis_url, $self->hive_pipeline );
my %my_targets_by_condition = ();
foreach my $df_target (@{ $self->get_my_targets }) {
my $this_pair = $my_targets_by_condition{ $df_target->on_condition || ''} ||= [ $df_target->on_condition, []];
push @{$this_pair->[1]}, $df_target;
}
return $self->{'_to_analysis'};
return [ sort { $b->[0] cmp $a->[0] } values %my_targets_by_condition ];
}
......@@ -185,9 +129,9 @@ sub toString {
),
' --#',
$self->branch_code,
'--> ',
$self->to_analysis_url,
($self->input_id_template ? (' WITH TEMPLATE: '.$self->input_id_template) : ''),
'--> [ ',
join(', ', map { $_->toString($short) } sort { $b->on_condition <=> $a->on_condition } (@{$self->get_my_targets()})),
' ]',
($self->funnel_dataflow_rule ? ' ---|| ('.$self->funnel_dataflow_rule->toString(1).' )' : ''),
);
}
......
=pod
=head1 NAME
Bio::EnsEMBL::Hive::DataflowTarget
=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
=head1 CONTACT
Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
=cut
package Bio::EnsEMBL::Hive::DataflowTarget;
use strict;
use warnings;
use Bio::EnsEMBL::Hive::Utils ('stringify', 'throw');
use Bio::EnsEMBL::Hive::TheApiary;
use base ( 'Bio::EnsEMBL::Hive::Cacheable', 'Bio::EnsEMBL::Hive::Storable' );
sub unikey { # override the default from Cacheable parent
return [ 'source_dataflow_rule', 'on_condition', 'input_id_template', 'to_analysis_url' ];
}
=head1 AUTOLOADED
source_dataflow_rule_id / source_dataflow_rule
=cut
=head2 on_condition
Function: getter/setter method for the on_condition of the dataflow target
=cut
sub on_condition {
my $self = shift @_;
if(@_) {
$self->{'_on_condition'} = shift @_;
}
return $self->{'_on_condition'};
}
=head2 input_id_template
Function: getter/setter method for the input_id_template of the dataflow target
=cut
sub input_id_template {
my $self = shift @_;
if(@_) {
my $input_id_template = shift @_;
$self->{'_input_id_template'} = (ref($input_id_template) ? stringify($input_id_template) : $input_id_template),
}
return $self->{'_input_id_template'};
}
=head2 to_analysis_url
Arg[1] : (optional) string $url
Usage : $self->to_analysis_url($url);
Function: Get/set method for the 'to' analysis objects URL for this rule
Returns : string
=cut
sub to_analysis_url {
my $self = shift @_;
if(@_) {
$self->{'_to_analysis_url'} = shift @_;
if( $self->{'_to_analysis'} ) {
$self->{'_to_analysis'} = undef;
}
} elsif( !$self->{'_to_analysis_url'} and my $target_object=$self->{'_to_analysis'} ) {
my $ref_dba = $self->from_analysis && $self->from_analysis->adaptor && $self->from_analysis->adaptor->db;
$self->{'_to_analysis_url'} = $target_object->url( $ref_dba ); # the URL may be shorter if DBA is the same for source and target
}
return $self->{'_to_analysis_url'};
}
=head2 to_analysis
Usage : $self->to_analysis($analysis);
Function: Get/set method for the goal analysis object of this rule.
Returns : Bio::EnsEMBL::Hive::Analysis
Args : Bio::EnsEMBL::Hive::Analysis
=cut
sub to_analysis {
my ($self, $target_object) = @_;
if( defined $target_object ) {
unless ($target_object->can('url')) {
throw( "to_analysis arg must support 'url' method, '$target_object' does not know how to do it");
}
$self->{'_to_analysis'} = $target_object;
}
if( !$self->{'_to_analysis'} and my $to_analysis_url = $self->to_analysis_url ) { # lazy-load through TheApiary
$self->{'_to_analysis'} = Bio::EnsEMBL::Hive::TheApiary->find_by_url( $to_analysis_url, $self->hive_pipeline );
}
return $self->{'_to_analysis'};
}
=head2 toString
Args : (none)
Example : print $df_rule->toString()."\n";
Description: returns a stringified representation of the rule
Returntype : string
=cut
sub toString {
my $self = shift @_;
my $short = shift @_;
my $on_condition = $self->on_condition;
return join('',
$short ? () : ( 'DataflowTarget: ' ),
defined($on_condition) ? 'WHEN '.$on_condition : 'DEFAULT ',
'--> ',
$self->to_analysis_url,
($self->input_id_template ? (' WITH TEMPLATE: '.$self->input_id_template) : ''),
);
}
1;
......@@ -142,7 +142,7 @@ sub save_collections {
my $hive_dba = $self->hive_dba();
foreach my $AdaptorType ('MetaParameters', 'PipelineWideParameters', 'ResourceClass', 'ResourceDescription', 'Analysis', 'AnalysisStats', 'AnalysisCtrlRule', 'DataflowRule') {
foreach my $AdaptorType ('MetaParameters', 'PipelineWideParameters', 'ResourceClass', 'ResourceDescription', 'Analysis', 'AnalysisStats', 'AnalysisCtrlRule', 'DataflowRule', 'DataflowTarget') {
my $adaptor = $hive_dba->get_adaptor( $AdaptorType );
my $class = 'Bio::EnsEMBL::Hive::'.$AdaptorType;
foreach my $storable_object ( $self->collection_of( $AdaptorType )->list ) {
......@@ -198,12 +198,12 @@ sub add_new_or_update {
unless( $object ) {
$object = $class->can('new') ? $class->new( @_ ) : { @_ };
my $found_display = UNIVERSAL::can($object, 'toString') ? $object->toString : 'naked entry '.stringify($object);
warn "Created a new $found_display\n";
$self->collection_of( $type )->add( $object );
$object->hive_pipeline($self) if UNIVERSAL::can($object, 'hive_pipeline');
my $found_display = UNIVERSAL::can($object, 'toString') ? $object->toString : 'naked entry '.stringify($object);
warn "Created a new $found_display\n";
}
return $object;
......
......@@ -596,10 +596,15 @@ sub add_objects_from_config {
my $df_rule = $pipeline->add_new_or_update( 'DataflowRule',
'from_analysis' => $analysis,
'to_analysis_url' => $heir_url,
'branch_code' => $branch_name_or_code,
'funnel_dataflow_rule' => $funnel_dataflow_rule,
);
my $df_target = $pipeline->add_new_or_update( 'DataflowTarget',
'source_dataflow_rule' => $df_rule,
'condition' => undef,
'input_id_template' => $input_id_template,
'to_analysis_url' => $heir_url,
);