Graph.pm 13.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
package Bio::EnsEMBL::Hive::Utils::Graph;

=head1 NAME

Bio::EnsEMBL::Hive::Utils::Graph

=head1 SYNOPSIS

  my $dba = get_hive_dba();
  my $g = Bio::EnsEMBL::Hive::Utils::Graph->new(-DBA => $dba);
  my $graphviz = $g->build();
  $graphviz->as_png('location.png');

=head1 DESCRIPTION

This is a module for converting a hive database's flow of analyses, control 
rules and dataflows into the GraphViz model language. This information can
then be converted to an image or to the dot language for further manipulation
in GraphViz.

=head1 METHODS/SUBROUTINES

See inline

=cut

use strict;
use warnings;

use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref);

32
use Bio::EnsEMBL::Hive::Utils::GraphViz;
33
use Bio::EnsEMBL::Hive::Utils::Config;
34

35
36
use base ('Bio::EnsEMBL::Hive::Configurable');

37
38
39

=head2 new()

40
41
42
43
44
45
  Arg [1] : Bio::EnsEMBL::Hive::DBSQL::DBAdaptor $dba;
              The adaptor to get information from
  Arg [2] : (optional) string $config_file_name;
                  A JSON file name to initialize the Config object with.
                  If one is not given then we don't pass anything into Config's constructor,
                  which results in loading configuration from Config's standard locations.
46
47
48
49
50
51
52
  Returntype : Graph object
  Exceptions : If the parameters are not as required
  Status     : Beta
  
=cut

sub new {
53
  my ($class, $dba, $config_file_name) = @_;
54

55
  my $self = bless({}, ref($class) || $class);
56

57
  $self->dba($dba);
58
  my $config = Bio::EnsEMBL::Hive::Utils::Config->new( $config_file_name ? $config_file_name : () );
59
  $self->config($config);
60
  $self->context( [ 'Graph' ] );
61

62
63
64
65
66
67
68
69
70
71
72
73
74
75
  return $self;
}


=head2 graph()

  Arg [1] : The GraphViz instance created by this module
  Returntype : GraphViz
  Exceptions : None
  Status     : Beta

=cut

sub graph {
76
  my ($self) = @_;
77
  if(! exists $self->{graph}) {
78
    $self->{graph} = Bio::EnsEMBL::Hive::Utils::GraphViz->new( name => 'AnalysisWorkflow', ratio => 'compress"; pad = "1.0'  ); # injection hack!
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  }
  return $self->{graph};
}


=head2 dba()

  Arg [1] : The DBAdaptor instance
  Returntype : DBAdaptor
  Exceptions : If the given object is not a hive DBAdaptor
  Status     : Beta

=cut

sub dba {
  my ($self, $dba) = @_;
  if(defined $dba) {
    assert_ref($dba, 'Bio::EnsEMBL::Hive::DBSQL::DBAdaptor');
    $self->{dba} = $dba;
  }
  return $self->{dba};
}


103
104
105
106
107
108
sub _analysis_node_name {
    my $analysis_id = shift @_;

    return 'analysis_' . $analysis_id;
}

109

110
111
112
113
114
115
sub _midpoint_name {
    my $rule_id = shift @_;

    return 'dfr_'.$rule_id.'_mp';
}

116
117
118
119
120
121
122
123
124
125
126

=head2 build()

  Returntype : The GraphViz object built & populated
  Exceptions : Raised if there are issues with accessing the database
  Description : Builds the graph object and returns it.
  Status     : Beta

=cut

sub build {
127
    my ($self) = @_;
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152

    my $all_analyses          = $self->dba()->get_AnalysisAdaptor()->fetch_all();
    my $all_ctrl_rules        = $self->dba()->get_AnalysisCtrlRuleAdaptor()->fetch_all();
    my $all_dataflow_rules    = $self->dba()->get_DataflowRuleAdaptor()->fetch_all();

    my %inflow_count = ();    # used to detect sources (nodes with zero inflow)
    my %outflow_rules = ();   # maps from anlaysis_node_name to a list of all dataflow rules that flow out of it
    my %dfr_flows_into= ();   # maps from dfr_id to target analysis_node_name

    foreach my $rule ( @$all_dataflow_rules ) {
        if(my $to_id = $rule->to_analysis->can('dbID') && $rule->to_analysis->dbID()) {
            my $to_node_name    = _analysis_node_name( $to_id );
            $inflow_count{$to_node_name}++;
            $dfr_flows_into{$rule->dbID()} = $to_node_name;
        }
        push @{$outflow_rules{ _analysis_node_name($rule->from_analysis_id()) }}, $rule;
    }

    my %subgraph_allocation = ();

        # NB: this is a very approximate algorithm with rough edges!
        # It will not find all start nodes in cyclic components!
    foreach my $analysis_id ( map { $_->dbID } @$all_analyses ) {
        my $analysis_node_name =  _analysis_node_name( $analysis_id );
        unless($inflow_count{$analysis_node_name}) {
153
            $self->_allocate_to_subgraph(\%outflow_rules, \%dfr_flows_into, $analysis_node_name, \%subgraph_allocation ); # run the recursion in each component that has a non-cyclic start
154
155
156
157
158
159
160
161
162
163
        }
    }

    $self->_add_hive_details();
    foreach my $a (@$all_analyses) {
        $self->_add_analysis_node($a);
    }
    $self->_control_rules( $all_ctrl_rules );
    $self->_dataflow_rules( $all_dataflow_rules );

164
    if($self->config_get('DisplayStretched') ) {
165
166
167
168
169
170

        # The invisible edges will be linked to the destination analysis instead of the midpoint
        my $id_to_rule = {map { $_->dbID => $_ } @$all_dataflow_rules};
        my @all_fdr_id = grep {$_} (map {$_->funnel_dataflow_rule_id} @$all_dataflow_rules);
        my $midpoint_to_analysis = {map { _midpoint_name( $_ ) => _analysis_node_name( $id_to_rule->{$_}->to_analysis->dbID ) } @all_fdr_id};

171
172
        while( my($from, $to) = each %subgraph_allocation) {
            if($to) {
173
                $self->graph->add_edge( $from => $midpoint_to_analysis->{$to},
174
175
176
177
178
179
180
                    color     => 'black',
                    style     => 'invis',   # toggle visibility by changing 'invis' to 'dashed'
                );
            }
        }
    }

181
    if($self->config_get('DisplaySemaphoreBoxes') ) {
182
        $self->graph->subgraphs( \%subgraph_allocation );
183
184
        $self->graph->colour_scheme( $self->config_get('Box', 'ColourScheme') );
        $self->graph->colour_offset( $self->config_get('Box', 'ColourOffset') );
185
186
187
188
189
190
191
    }

    return $self->graph();
}


sub _allocate_to_subgraph {
192
    my ($self, $outflow_rules, $dfr_flows_into, $parent_analysis_node_name, $subgraph_allocation ) = @_;
193
194
195
196
197

    my $parent_allocation = $subgraph_allocation->{ $parent_analysis_node_name };  # for some analyses it will be undef

    foreach my $rule ( @{ $outflow_rules->{$parent_analysis_node_name} } ) {
        my $to_analysis                 = $rule->to_analysis();
198
        next unless $to_analysis->can('dbID') or $self->config_get('DuplicateTables');
199

200
201
202
203
204
        my $this_analysis_node_name;
        if ($to_analysis->can('dbID')) {
            $this_analysis_node_name = _analysis_node_name( $rule->to_analysis->dbID() );
        } else {
            $this_analysis_node_name = $to_analysis->table_name();
205
            $this_analysis_node_name .= '_'.$rule->from_analysis_id() if $self->config_get('DuplicateTables');
206
        }
207
208
209
        my $funnel_dataflow_rule_id     = $rule->funnel_dataflow_rule_id();

        my $proposed_allocation = $funnel_dataflow_rule_id  # depends on whether we start a new semaphore
210
211
#           ? $dfr_flows_into->{$funnel_dataflow_rule_id}       # if we do, report to the new funnel (based on common funnel's analysis name)
            ? _midpoint_name( $funnel_dataflow_rule_id )        # if we do, report to the new funnel (based on common funnel rule's midpoint)
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
            : $parent_allocation;                               # it we don't, inherit the parent's funnel

        if($funnel_dataflow_rule_id) {
            my $fan_midpoint_name = _midpoint_name( $rule->dbID() );
            $subgraph_allocation->{ $fan_midpoint_name } = $proposed_allocation;

            my $funnel_midpoint_name = _midpoint_name( $funnel_dataflow_rule_id );
            $subgraph_allocation->{ $funnel_midpoint_name } = $parent_allocation;   # draw the funnel's midpoint outside of the box
        }
        if( exists $subgraph_allocation->{ $this_analysis_node_name } ) {        # we allocate on first-come basis at the moment
            my $known_allocation = $subgraph_allocation->{ $this_analysis_node_name } || '';
            $proposed_allocation ||= '';

            if( $known_allocation eq $proposed_allocation) {
                # warn "analysis '$this_analysis_node_name' has already been allocated to the same '$known_allocation' by another branch";
            } else {
                # warn "analysis '$this_analysis_node_name' has already been allocated to '$known_allocation' however this branch would allocate it to '$proposed_allocation'";
            }

        } else {
            # warn "allocating analysis '$this_analysis_node_name' to '$proposed_allocation'";
            $subgraph_allocation->{ $this_analysis_node_name } = $proposed_allocation;

235
            $self->_allocate_to_subgraph( $outflow_rules, $dfr_flows_into, $this_analysis_node_name, $subgraph_allocation );
236
237
        }
    }
238
239
}

240

241
242
sub _add_hive_details {
  my ($self) = @_;
243

244
  my $node_fontname  = $self->config_get('Node', 'Details', 'Font');
245

246
  if( $self->config_get('DisplayDetails') ) {
247
    my $dbc = $self->dba()->dbc();
248
    my $label = sprintf('%s@%s', $dbc->dbname, $dbc->host || '-');
249
    $self->graph()->add_node( 'Details',
250
251
252
      label     => $label,
      fontname  => $node_fontname,
      shape     => 'plaintext',
253
254
255
256
    );
  }
}

257

258
259
sub _add_analysis_node {
  my ($self, $a) = @_;
260
261

  my $stats = $a->stats();
262
263

  my ($breakout_label) = $stats->job_count_breakout();
264
  
265
  my $analysis_label    = $a->logic_name().' ('.$a->dbID().')\n'.$breakout_label;
266
  my $shape             = $a->can_be_empty() ? 'doubleoctagon' : 'ellipse' ;
267
268
  my $status_colour     = $self->config_get('Node', $stats->status, 'Colour');
  my $node_fontname     = $self->config_get('Node', $stats->status, 'Font');
269
270
  
  $self->graph->add_node( _analysis_node_name( $a->dbID() ), 
271
    label       => $analysis_label,
272
273
    shape       => $shape,
    style       => 'filled',
274
275
    fontname    => $node_fontname,
    fillcolor   => $status_colour,
276
277
278
279
280
  );
}


sub _control_rules {
281
  my ($self, $all_ctrl_rules) = @_;
282
  
283
  my $control_colour = $self->config_get('Edge', 'Control', 'Colour');
284
285
286
  my $graph = $self->graph();

  #The control rules are always from and to an analysis so no need to search for odd cases here
287
288
289
  foreach my $rule ( @$all_ctrl_rules ) {
    my ($from, $to) = ( _analysis_node_name( $rule->condition_analysis()->dbID() ), _analysis_node_name( $rule->ctrled_analysis()->dbID() ) );
    $graph->add_edge( $from => $to, 
290
      color => $control_colour,
291
      arrowhead => 'tee',
292
293
    );
  }
294
295
}

296

297
sub _dataflow_rules {
298
299
    my ($self, $all_dataflow_rules) = @_;

300
    my $graph = $self->graph();
301
302
303
    my $dataflow_colour  = $self->config_get('Edge', 'Data', 'Colour');
    my $semablock_colour = $self->config_get('Edge', 'Semablock', 'Colour');
    my $df_edge_fontname = $self->config_get('Edge', 'Data', 'Font');
304
305

    my %needs_a_midpoint = ();
306
307
    my %aid2aid_nonsem = ();    # simply a directed graph between numerical analysis_ids, except for semaphored rules
    foreach my $rule ( @$all_dataflow_rules ) {
308
        if(my $to_id = $rule->to_analysis->can('dbID') && $rule->to_analysis->dbID()) {
309
310
311
            unless( $rule->funnel_dataflow_rule_id ) {
                $aid2aid_nonsem{$rule->from_analysis_id()}{$to_id}++;
            }
312
313
314
315
316
317
        }
        if(my $funnel_dataflow_rule_id = $rule->funnel_dataflow_rule_id()) {
            $needs_a_midpoint{$rule->dbID()}++;
            $needs_a_midpoint{$funnel_dataflow_rule_id}++;
        }
    }
318

319
    foreach my $rule ( @$all_dataflow_rules ) {
320
    
321
322
        my ($rule_id, $from_analysis_id, $branch_code, $funnel_dataflow_rule_id, $to) =
            ($rule->dbID(), $rule->from_analysis_id(), $rule->branch_code(), $rule->funnel_dataflow_rule_id(), $rule->to_analysis());
323
        my ($from_node, $to_id, $to_node) = ( _analysis_node_name($from_analysis_id)      );
324
    
325
            # Different treatment for analyses and tables:
326
        if(check_ref($to, 'Bio::EnsEMBL::Hive::Analysis')) {
327
328
            $to_id   = $to->dbID();
            $to_node = _analysis_node_name($to_id);
329
330
        } elsif(check_ref($to, 'Bio::EnsEMBL::Hive::NakedTable')) {
            $to_node = $to->table_name();
331
            $to_node .= '_'.$from_analysis_id if $self->config_get('DuplicateTables');
332
333
334
335
336
337
338
339
            $self->_add_table_node($to_node);
        } else {
            warn('Do not know how to handle the type '.ref($to));
            next;
        }

        if($needs_a_midpoint{$rule_id}) {
            my $midpoint_name = _midpoint_name($rule_id);
340

341
            $graph->add_node( $midpoint_name,   # midpoint itself
342
                color       => $dataflow_colour,
343
344
                label       => '',
                shape       => 'point',
345
346
347
                fixedsize   => 1,
                width       => 0.01,
                height      => 0.01,
348
            );
349
            $graph->add_edge( $from_node => $midpoint_name, # first half of the two-part arrow
350
                color       => $dataflow_colour,
351
352
                arrowhead   => 'none',
                label       => '#'.$branch_code, 
353
                fontname    => $df_edge_fontname,
354
            );
355
            $graph->add_edge( $midpoint_name => $to_node,   # second half of the two-part arrow
356
                color     => $dataflow_colour,
357
358
            );
            if($funnel_dataflow_rule_id) {
359
                $graph->add_edge( $midpoint_name => _midpoint_name($funnel_dataflow_rule_id),   # semaphore inter-rule link
360
                    color     => $semablock_colour,
361
362
363
364
365
366
367
                    style     => 'dashed',
                    arrowhead => 'tee',
                    dir       => 'both',
                    arrowtail => 'crow',
                );
            }
        } else {
368
                # one-part arrow:
369
            $graph->add_edge( $from_node => $to_node, 
370
                color       => $dataflow_colour,
371
                label       => '#'.$branch_code, 
372
                fontname    => $df_edge_fontname,
373
            );
374
375
376
        } # /if($needs_a_midpoint{$rule_id})
    } # /foreach my $rule (@$all_dataflow_rules)

377
378
}

379

380
381
sub _add_table_node {
  my ($self, $table) = @_;
382

383
  my $node_fontname    = $self->config_get('Node', 'Table', 'Font');
384

385
  my $table_name = $table;
386
  if( $self->config_get('DuplicateTables') ) {
387
388
389
390
    $table =~ /^(.*)_([^_]*)$/;
    $table_name = $1;
  }

391
  $self->graph()->add_node( $table, 
392
    label => $table_name.'\n', 
393
    shape => 'tab',
394
    fontname => $node_fontname,
395
    color => $self->config_get('Node', 'Table', 'Colour'),
396
397
398
  );
}

Leo Gordon's avatar
Leo Gordon committed
399
1;