DataflowRule.pm 5.2 KB
Newer Older
1
=pod
2 3

=head1 NAME
Jessica Severin's avatar
Jessica Severin committed
4

5
    Bio::EnsEMBL::Hive::DataflowRule
6 7

=head1 DESCRIPTION
Jessica Severin's avatar
Jessica Severin committed
8

9
    A data container object (methods are intelligent getters/setters) that corresponds to a row stored in 'dataflow_rule' table
10

Leo Gordon's avatar
Leo Gordon committed
11
    A dataflow rule is activated when a Bio::EnsEMBL::Hive::AnalysisJob::dataflow_output_id is called at any moment during a RunnableDB's execution.
12 13 14
    The current RunnableDB's analysis ($from_analysis) and the requested $branch_code (1 by default) define the entry conditions,
    and whatever rules match these conditions will generate new jobs with input_ids specified in the dataflow_output_id() call.
    If input_id_template happens to contain a non-NULL value, it will be used to generate the corresponding intput_id instead.
15

16
    Jessica's remark on the structure of to_analysis_url:
17
        Extended from design of SimpleRule concept to allow the 'to' analysis to be specified with a network savy URL like
18
        mysql://ensadmin:<pass>@ecs2:3361/compara_hive_test/analysis?logic_name='blast_NCBI34'
19

20 21
=head1 LICENSE

22
    Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
nwillhoft's avatar
nwillhoft committed
23
    Copyright [2016-2021] EMBL-European Bioinformatics Institute
24 25 26 27 28 29 30 31 32 33

    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

         http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.

34
=head1 CONTACT
Jessica Severin's avatar
Jessica Severin committed
35

36
    Please subscribe to the Hive mailing list:  http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users  to discuss Hive-related questions or to be notified of our updates
37 38 39 40 41 42 43

=cut


package Bio::EnsEMBL::Hive::DataflowRule;

use strict;
44
use warnings;
45

46
use Bio::EnsEMBL::Hive::TheApiary;
Matthieu Muffato's avatar
Matthieu Muffato committed
47
use Bio::EnsEMBL::Hive::DBSQL::DataflowRuleAdaptor;
48

49
use base ( 'Bio::EnsEMBL::Hive::Storable' );
50

51

52
sub unikey {
53
    return [ 'from_analysis', 'branch_code', 'funnel_dataflow_rule', 'unitargets' ];
54 55 56
} 


57 58 59 60 61 62 63 64 65
=head1 AUTOLOADED

    from_analysis_id / from_analysis

    funnel_dataflow_rule_id / funnel_dataflow_rule

=cut


66
=head2 branch_code
Jessica Severin's avatar
Jessica Severin committed
67

68 69
    Function: getter/setter method for the branch_code of the dataflow rule

70
=cut
Jessica Severin's avatar
Jessica Severin committed
71

72
sub branch_code {
73 74
    my $self = shift @_;

75 76 77
    if(@_) {
        my $branch_name_or_code = shift @_;
        $self->{'_branch_code'} = $branch_name_or_code && Bio::EnsEMBL::Hive::DBSQL::DataflowRuleAdaptor::branch_name_2_code( $branch_name_or_code );
78
    }
79
    return $self->{'_branch_code'};
80 81
}

82

83
sub get_my_targets {
84 85
    my $self = shift @_;

86
    return $self->hive_pipeline->collection_of( 'DataflowTarget' )->find_all_by('source_dataflow_rule', $self);
87
}
88

89

90
sub get_my_targets_grouped_by_condition {
Leo Gordon's avatar
Leo Gordon committed
91 92 93 94
    my $self        = shift @_;
    my $df_targets  = shift @_;

    $df_targets //= $self->get_my_targets;
95

96
    my %my_targets_by_condition = ();
Leo Gordon's avatar
Leo Gordon committed
97
    foreach my $df_target (@$df_targets) {
98
        my $this_pair = $my_targets_by_condition{ $df_target->on_condition // ''} ||= [ $df_target->on_condition, []];
99
        push @{$this_pair->[1]}, $df_target;
100
    }
Leo Gordon's avatar
Leo Gordon committed
101

102
    return [ sort { ($b->[0]//'') cmp ($a->[0]//'') } values %my_targets_by_condition ];
103 104
}

105

106 107 108
sub _compute_unitargets {
    my $targets = shift;
    return join( ';', map { ($_->on_condition//'').':'.($_->input_id_template//'').':'.$_->to_analysis_url }
109 110 111
                                        sort { ($a->on_condition//'') cmp ($b->on_condition//'')
                                            or ($a->input_id_template//'') cmp ($b->input_id_template//'') }
                                            @$targets);
112 113 114 115 116 117 118 119 120
}

# NOTE: By caching the "unitargets" value, we assume that the list of
# targets will *not* change once the object is loaded. This holds true at
# the moment, but we need to be careful it remains the case in the future,
# otherwise the bits that change the targets would have to invalidate the
# cached value
sub unitargets {
    my $self    = shift @_;
121

122 123
    if (@_) {
        $self->{'_cached_unitargets'} = shift @_;
124
    }
125 126 127 128 129 130 131

    unless ($self->{'_cached_unitargets'}) {
        my $targets = $self->get_my_targets;
        $self->{'_cached_unitargets'} = _compute_unitargets( $targets );
    }

    return $self->{'_cached_unitargets'};
132 133 134
}


135 136
=head2 toString

137 138 139 140
    Args       : (none)
    Example    : print $df_rule->toString()."\n";
    Description: returns a stringified representation of the rule
    Returntype : string
141 142 143 144

=cut

sub toString {
145 146
    my $self    = shift @_;
    my $short   = shift @_;
147 148

    return join('',
149 150 151 152 153 154 155
            $short ? () : (
                'DataflowRule[',
                ($self->dbID // ''),
                ']: ',
                $self->from_analysis->logic_name,
            ),
            ' --#',
156
            $self->branch_code,
157
            '--> [ ',
158
            join(', ', map { $_->toString($short) } sort { ($b->on_condition // '') cmp ($a->on_condition // '') } (@{$self->get_my_targets()})),
159
            ' ]',
160
            ($self->funnel_dataflow_rule ? ' ---|| ('.$self->funnel_dataflow_rule->toString(1).' )'  : ''),
161
    );
162 163 164 165
}

1;