Graph.pm 18.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
package Bio::EnsEMBL::Hive::Utils::Graph;

=head1 NAME

Bio::EnsEMBL::Hive::Utils::Graph

=head1 SYNOPSIS

  my $dba = get_hive_dba();
  my $g = Bio::EnsEMBL::Hive::Utils::Graph->new(-DBA => $dba);
  my $graphviz = $g->build();
  $graphviz->as_png('location.png');

=head1 DESCRIPTION

This is a module for converting a hive database's flow of analyses, control 
rules and dataflows into the GraphViz model language. This information can
then be converted to an image or to the dot language for further manipulation
in GraphViz.

=head1 METHODS/SUBROUTINES

See inline

=cut

use strict;
use warnings;

use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref);

32
use Bio::EnsEMBL::Hive::Utils::GraphViz;
33
use Bio::EnsEMBL::Hive::Utils::Config;
34

35
36
use base ('Bio::EnsEMBL::Hive::Configurable');

37
38
39

=head2 new()

40
41
42
43
44
45
  Arg [1] : Bio::EnsEMBL::Hive::DBSQL::DBAdaptor $dba;
              The adaptor to get information from
  Arg [2] : (optional) string $config_file_name;
                  A JSON file name to initialize the Config object with.
                  If one is not given then we don't pass anything into Config's constructor,
                  which results in loading configuration from Config's standard locations.
46
47
48
49
50
51
52
  Returntype : Graph object
  Exceptions : If the parameters are not as required
  Status     : Beta
  
=cut

sub new {
53
  my ($class, $dba, $config_file_name) = @_;
54

55
  my $self = bless({}, ref($class) || $class);
56

57
  $self->dba($dba);
58
  my $config = Bio::EnsEMBL::Hive::Utils::Config->new( $config_file_name ? $config_file_name : () );
59
  $self->config($config);
60
  $self->context( [ 'Graph' ] );
61

62
63
64
65
66
67
68
69
70
71
72
73
74
75
  return $self;
}


=head2 graph()

  Arg [1] : The GraphViz instance created by this module
  Returntype : GraphViz
  Exceptions : None
  Status     : Beta

=cut

sub graph {
76
77
78
79
80
81
82
    my ($self) = @_;

    if(! exists $self->{graph}) {
        my $padding  = $self->config_get('Pad') || 0;
        $self->{graph} = Bio::EnsEMBL::Hive::Utils::GraphViz->new( name => 'AnalysisWorkflow', ratio => qq{compress"; pad = "$padding}  ); # injection hack!
    }
    return $self->{graph};
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
}


=head2 dba()

  Arg [1] : The DBAdaptor instance
  Returntype : DBAdaptor
  Exceptions : If the given object is not a hive DBAdaptor
  Status     : Beta

=cut

sub dba {
  my ($self, $dba) = @_;
  if(defined $dba) {
    assert_ref($dba, 'Bio::EnsEMBL::Hive::DBSQL::DBAdaptor');
    $self->{dba} = $dba;
  }
  return $self->{dba};
}


105
106
107
108
109
110
sub _analysis_node_name {
    my $analysis_id = shift @_;

    return 'analysis_' . $analysis_id;
}

111
112
113
114
115
116
sub _table_node_name {
    my $table_name = shift @_;

    return 'table_' . $table_name;
}

117

118
119
120
121
122
123
sub _midpoint_name {
    my $rule_id = shift @_;

    return 'dfr_'.$rule_id.'_mp';
}

124
125
126
127
128
129
130
131
132
133
134

=head2 build()

  Returntype : The GraphViz object built & populated
  Exceptions : Raised if there are issues with accessing the database
  Description : Builds the graph object and returns it.
  Status     : Beta

=cut

sub build {
135
    my ($self) = @_;
136
137
138
139
140
141
142

    my $all_analyses          = $self->dba()->get_AnalysisAdaptor()->fetch_all();
    my $all_ctrl_rules        = $self->dba()->get_AnalysisCtrlRuleAdaptor()->fetch_all();
    my $all_dataflow_rules    = $self->dba()->get_DataflowRuleAdaptor()->fetch_all();

    my %inflow_count = ();    # used to detect sources (nodes with zero inflow)
    my %outflow_rules = ();   # maps from anlaysis_node_name to a list of all dataflow rules that flow out of it
143
    my %dfr_flows_into_node = ();   # maps from dfr_id to target analysis_node_name
144
145

    foreach my $rule ( @$all_dataflow_rules ) {
146
147
148
        my $target_object = $rule->to_analysis;
        if(my $to_id = $target_object->can('dbID') && $target_object->dbID()) {
            my $to_node_name = _analysis_node_name( $to_id );
149
            $inflow_count{$to_node_name}++;
150
            $dfr_flows_into_node{$rule->dbID()} = $to_node_name;
151
152
153
154
155
156
157
158
        }
        push @{$outflow_rules{ _analysis_node_name($rule->from_analysis_id()) }}, $rule;
    }

    my %subgraph_allocation = ();

        # NB: this is a very approximate algorithm with rough edges!
        # It will not find all start nodes in cyclic components!
159
160
161
    foreach my $source_analysis_node_name ( map { _analysis_node_name( $_->dbID ) } @$all_analyses ) {
        unless($inflow_count{$source_analysis_node_name}) {    # if there is no dataflow into this analysis
            $self->_allocate_to_subgraph(\%outflow_rules, \%dfr_flows_into_node, $source_analysis_node_name, \%subgraph_allocation ); # run the recursion in each component that has a non-cyclic start
162
163
164
165
166
167
168
169
        }
    }

    $self->_add_hive_details();
    foreach my $a (@$all_analyses) {
        $self->_add_analysis_node($a);
    }
    $self->_control_rules( $all_ctrl_rules );
170
    $self->_dataflow_rules( $all_dataflow_rules, \%subgraph_allocation );
171

172
    if($self->config_get('DisplayStretched') ) {
173
174
175
176
177
178

        # The invisible edges will be linked to the destination analysis instead of the midpoint
        my $id_to_rule = {map { $_->dbID => $_ } @$all_dataflow_rules};
        my @all_fdr_id = grep {$_} (map {$_->funnel_dataflow_rule_id} @$all_dataflow_rules);
        my $midpoint_to_analysis = {map { _midpoint_name( $_ ) => _analysis_node_name( $id_to_rule->{$_}->to_analysis->dbID ) } @all_fdr_id};

179
180
        while( my($from, $to) = each %subgraph_allocation) {
            if($to) {
181
                $self->graph->add_edge( $from => $midpoint_to_analysis->{$to},
182
183
184
185
186
187
188
                    color     => 'black',
                    style     => 'invis',   # toggle visibility by changing 'invis' to 'dashed'
                );
            }
        }
    }

189
    if($self->config_get('DisplaySemaphoreBoxes') ) {
190
        $self->graph->subgraphs( \%subgraph_allocation );
191
192
        $self->graph->colour_scheme( $self->config_get('Box', 'ColourScheme') );
        $self->graph->colour_offset( $self->config_get('Box', 'ColourOffset') );
193
194
195
196
197
198
199
    }

    return $self->graph();
}


sub _allocate_to_subgraph {
200
    my ($self, $outflow_rules, $dfr_flows_into_node, $source_analysis_node_name, $subgraph_allocation ) = @_;
201

202
    my $source_analysis_allocation = $subgraph_allocation->{ $source_analysis_node_name };  # for some analyses it will be undef
203

204
205
206
    foreach my $rule ( @{ $outflow_rules->{$source_analysis_node_name} } ) {
        my $target_object                 = $rule->to_analysis();
        my $target_node_name;
207

208
209
210
211
212
        if ($target_object->can('dbID')) {                      # target is an analysis
            $target_node_name = _analysis_node_name( $rule->to_analysis->dbID() );
        } else {                                                # target is a table
            $target_node_name = _table_node_name($target_object->table_name()) . '_' .
                ($self->config_get('DuplicateTables') ?  $rule->from_analysis_id() : ($source_analysis_allocation||''));
213
        }
214

215
216
217
218
219
220
        my $proposed_allocation;    # will depend on whether we start a new semaphore
        my $funnel_dataflow_rule_id  = $rule->funnel_dataflow_rule_id();
        if( $funnel_dataflow_rule_id ) {
            $proposed_allocation =
                $dfr_flows_into_node->{$funnel_dataflow_rule_id};   # if we do start a new semaphore, report to the new funnel (based on common funnel's analysis name)
#                _midpoint_name( $funnel_dataflow_rule_id );       # if we do start a new semaphore, report to the new funnel (based on common funnel rule's midpoint)
221
222
223
224
225

            my $fan_midpoint_name = _midpoint_name( $rule->dbID() );
            $subgraph_allocation->{ $fan_midpoint_name } = $proposed_allocation;

            my $funnel_midpoint_name = _midpoint_name( $funnel_dataflow_rule_id );
226
227
228
            $subgraph_allocation->{ $funnel_midpoint_name } = $source_analysis_allocation;   # draw the funnel's midpoint outside of the box
        } else {
            $proposed_allocation = $source_analysis_allocation;   # if we don't start a new semaphore, inherit the allocation of the source
229
        }
230
231
232
            # we allocate on first-come basis at the moment:
        if( exists $subgraph_allocation->{ $target_node_name } ) {  # already allocated?
            my $known_allocation = $subgraph_allocation->{ $target_node_name } || '';
233
234
235
            $proposed_allocation ||= '';

            if( $known_allocation eq $proposed_allocation) {
236
                # warn "analysis '$target_node_name' has already been allocated to the same '$known_allocation' by another branch";
237
            } else {
238
239
240
241
242
243
                # warn "analysis '$target_node_name' has already been allocated to '$known_allocation' however this branch would allocate it to '$proposed_allocation'";
            }

            if($funnel_dataflow_rule_id) {  # correction for multiple entries into the same box (probably needs re-thinking)
                my $fan_midpoint_name = _midpoint_name( $rule->dbID() );
                $subgraph_allocation->{ $fan_midpoint_name } = $subgraph_allocation->{ $target_node_name };
244
245
246
            }

        } else {
247
248
            # warn "allocating analysis '$target_node_name' to '$proposed_allocation'";
            $subgraph_allocation->{ $target_node_name } = $proposed_allocation;
249

250
            $self->_allocate_to_subgraph( $outflow_rules, $dfr_flows_into_node, $target_node_name, $subgraph_allocation );
251
252
        }
    }
253
254
}

255

256
257
sub _add_hive_details {
  my ($self) = @_;
258

259
  my $node_fontname  = $self->config_get('Node', 'Details', 'Font');
260

261
  if( $self->config_get('DisplayDetails') ) {
262
    my $dbc = $self->dba()->dbc();
263
    my $label = sprintf('%s@%s', $dbc->dbname, $dbc->host || '-');
264
    $self->graph()->add_node( 'Details',
265
266
267
      label     => $label,
      fontname  => $node_fontname,
      shape     => 'plaintext',
268
269
270
271
    );
  }
}

272

273
sub _add_analysis_node {
274
    my ($self, $analysis) = @_;
275

276
    my $analysis_stats = $analysis->stats();
277

278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
    my ($breakout_label, $total_job_count, $count_hash)   = $analysis_stats->job_count_breakout();
    my $analysis_status                                   = $analysis_stats->status;
    my $analysis_status_colour                            = $self->config_get('Node', 'AnalysisStatus', $analysis_status, 'Colour');
    my $style                                             = $analysis->can_be_empty() ? 'dashed, filled' : 'filled' ;
    my $node_fontname                                     = $self->config_get('Node', 'AnalysisStatus', $analysis_status, 'Font');
    my $display_stats                                     = $self->config_get('DisplayStats');

    my $colspan = 0;
    my $bar_chart = '';

    if( $display_stats eq 'barchart' ) {
        foreach my $count_method (qw(SEMAPHORED READY INPROGRESS DONE FAILED)) {
            if(my $count=$count_hash->{lc($count_method).'_job_count'}) {
                $bar_chart .= '<td bgcolor="'.$self->config_get('Node', 'JobStatus', $count_method, 'Colour').'" width="'.int(100*$count/$total_job_count).'%">'.$count.lc(substr($count_method,0,1)).'</td>';
                ++$colspan;
            }
        }
        if($colspan != 1) {
            $bar_chart .= '<td>='.$total_job_count.'</td>';
            ++$colspan;
        }
    }

    $colspan ||= 1;
    my $analysis_label  = '<<table border="0" cellborder="0" cellspacing="0" cellpadding="1"><tr><td colspan="'.$colspan.'">'.$analysis->logic_name().' ('.$analysis->dbID().')</td></tr>';
    if( $display_stats ) {
        $analysis_label    .= qq{<tr><td colspan="$colspan"> </td></tr>};
        if( $display_stats eq 'barchart') {
            $analysis_label    .= qq{<tr>$bar_chart</tr>};
        } elsif( $display_stats eq 'text') {
            $analysis_label    .= qq{<tr><td colspan="$colspan">$breakout_label</td></tr>};
        }
    }

312
    if( my $job_limit = $self->config_get('DisplayJobs') ) {
313
        my $adaptor = $self->dba->get_AnalysisJobAdaptor();
314
315
316
317
318
319
320
        my @jobs = sort {$a->dbID <=> $b->dbID} @{ $adaptor->fetch_some_by_analysis_id_limit( $analysis->dbID, $job_limit+1 )};

        my $hit_limit;
        if(scalar(@jobs)>$job_limit) {
            pop @jobs;
            $hit_limit = 1;
        }
321
322
323
324
325
326
327
328
329
330
331

        $analysis_label    .= '<tr><td colspan="'.$colspan.'"> </td></tr>';
        foreach my $job (@jobs) {
            my $input_id = $job->input_id;
            my $status   = $job->status;
            my $job_id   = $job->dbID;
            $input_id=~s/\>/&gt;/g;
            $input_id=~s/\</&lt;/g;
            $input_id=~s/\{|\}//g;
            $analysis_label    .= qq{<tr><td colspan="$colspan" bgcolor="}.$self->config_get('Node', 'JobStatus', $status, 'Colour').qq{">$job_id [$status]: $input_id</td></tr>};
        }
332
333
334
335

        if($hit_limit) {
            $analysis_label    .= qq{<tr><td colspan="$colspan">[ and }.($total_job_count-$job_limit).qq{ more ]</td></tr>};
        }
336
337
    }
    $analysis_label    .= '</table>>';
338
  
339
340
341
342
343
344
345
    $self->graph->add_node( _analysis_node_name( $analysis->dbID() ), 
        label       => $analysis_label,
        shape       => 'record',
        fontname    => $node_fontname,
        style       => $style,
        fillcolor   => $analysis_status_colour,
    );
346
347
348
349
}


sub _control_rules {
350
  my ($self, $all_ctrl_rules) = @_;
351
  
352
  my $control_colour = $self->config_get('Edge', 'Control', 'Colour');
353
354
355
  my $graph = $self->graph();

  #The control rules are always from and to an analysis so no need to search for odd cases here
356
357
358
  foreach my $rule ( @$all_ctrl_rules ) {
    my ($from, $to) = ( _analysis_node_name( $rule->condition_analysis()->dbID() ), _analysis_node_name( $rule->ctrled_analysis()->dbID() ) );
    $graph->add_edge( $from => $to, 
359
      color => $control_colour,
360
      arrowhead => 'tee',
361
362
    );
  }
363
364
}

365

366
sub _dataflow_rules {
367
    my ($self, $all_dataflow_rules, $subgraph_allocation) = @_;
368

369
    my $graph = $self->graph();
370
371
372
    my $dataflow_colour  = $self->config_get('Edge', 'Data', 'Colour');
    my $semablock_colour = $self->config_get('Edge', 'Semablock', 'Colour');
    my $df_edge_fontname = $self->config_get('Edge', 'Data', 'Font');
373
374

    my %needs_a_midpoint = ();
375
376
    my %aid2aid_nonsem = ();    # simply a directed graph between numerical analysis_ids, except for semaphored rules
    foreach my $rule ( @$all_dataflow_rules ) {
377
        if(my $to_id = $rule->to_analysis->can('dbID') && $rule->to_analysis->dbID()) {
378
379
380
            unless( $rule->funnel_dataflow_rule_id ) {
                $aid2aid_nonsem{$rule->from_analysis_id()}{$to_id}++;
            }
381
382
383
384
385
386
        }
        if(my $funnel_dataflow_rule_id = $rule->funnel_dataflow_rule_id()) {
            $needs_a_midpoint{$rule->dbID()}++;
            $needs_a_midpoint{$funnel_dataflow_rule_id}++;
        }
    }
387

388
    foreach my $rule ( @$all_dataflow_rules ) {
389
    
390
391
        my ($rule_id, $from_analysis_id, $branch_code, $funnel_dataflow_rule_id, $to) =
            ($rule->dbID(), $rule->from_analysis_id(), $rule->branch_code(), $rule->funnel_dataflow_rule_id(), $rule->to_analysis());
392
        my ($from_node, $to_id, $to_node) = ( _analysis_node_name($from_analysis_id)      );
393
    
394
            # Different treatment for analyses and tables:
395
        if(check_ref($to, 'Bio::EnsEMBL::Hive::Analysis')) {
396
397
            $to_id   = $to->dbID();
            $to_node = _analysis_node_name($to_id);
398
        } elsif(check_ref($to, 'Bio::EnsEMBL::Hive::NakedTable')) {
399
400
401
402

            $to_node = _table_node_name($to->table_name) . '_' .
                ( $self->config_get('DuplicateTables') ? $rule->from_analysis_id() : ($subgraph_allocation->{$from_node}||''));

403
            $self->_add_table_node($to_node, $to->table_name);
404
405
406
407
408
409
410
        } else {
            warn('Do not know how to handle the type '.ref($to));
            next;
        }

        if($needs_a_midpoint{$rule_id}) {
            my $midpoint_name = _midpoint_name($rule_id);
411

412
            $graph->add_node( $midpoint_name,   # midpoint itself
413
                color       => $dataflow_colour,
414
415
                label       => '',
                shape       => 'point',
416
417
418
                fixedsize   => 1,
                width       => 0.01,
                height      => 0.01,
419
            );
420
            $graph->add_edge( $from_node => $midpoint_name, # first half of the two-part arrow
421
                color       => $dataflow_colour,
422
423
                arrowhead   => 'none',
                label       => '#'.$branch_code, 
424
                fontname    => $df_edge_fontname,
425
            );
426
            $graph->add_edge( $midpoint_name => $to_node,   # second half of the two-part arrow
427
                color     => $dataflow_colour,
428
429
            );
            if($funnel_dataflow_rule_id) {
430
                $graph->add_edge( $midpoint_name => _midpoint_name($funnel_dataflow_rule_id),   # semaphore inter-rule link
431
                    color     => $semablock_colour,
432
433
434
435
436
437
438
                    style     => 'dashed',
                    arrowhead => 'tee',
                    dir       => 'both',
                    arrowtail => 'crow',
                );
            }
        } else {
439
                # one-part arrow:
440
            $graph->add_edge( $from_node => $to_node, 
441
                color       => $dataflow_colour,
442
                label       => '#'.$branch_code, 
443
                fontname    => $df_edge_fontname,
444
            );
445
446
447
        } # /if($needs_a_midpoint{$rule_id})
    } # /foreach my $rule (@$all_dataflow_rules)

448
449
}

450

451
sub _add_table_node {
452
    my ($self, $table_node, $table_name) = @_;
453

454
    my $node_fontname    = $self->config_get('Node', 'Table', 'Font');
455
    my (@column_names, $columns, $table_data, $data_limit, $hit_limit);
456

457
    if( $data_limit = $self->config_get('DisplayData') ) {
458
459
        my $adaptor = $self->dba->get_NakedTableAdaptor();
        $adaptor->table_name( $table_name );
460

461
462
        @column_names = sort keys %{$adaptor->column_set};
        $columns = scalar(@column_names);
463
464
465
466
467
468
        $table_data = $adaptor->fetch_all( 'LIMIT '.($data_limit+1) );

        if(scalar(@$table_data)>$data_limit) {
            pop @$table_data;
            $hit_limit = 1;
        }
469
470
471
472
473
474
475
476
477
478
    }

    my $table_label = '<<table border="0" cellborder="0" cellspacing="0" cellpadding="1"><tr><td colspan="'.($columns||1).'">'.$table_name.'</td></tr>';

    if( $self->config_get('DisplayData') ) {
        $table_label .= '<tr><td colspan="'.$columns.'"> </td></tr>';
        $table_label .= '<tr>'.join('', map { qq{<td bgcolor="lightblue" border="1">$_</td>} } @column_names).'</tr>';
        foreach my $row (@$table_data) {
            $table_label .= '<tr>'.join('', map { qq{<td>$_</td>} } @{$row}{@column_names}).'</tr>';
        }
479
480
481
        if($hit_limit) {
            $table_label  .= qq{<tr><td colspan="$columns">[ more data ]</td></tr>};
        }
482
483
484
485
486
487
488
489
490
    }
    $table_label .= '</table>>';

    $self->graph()->add_node( $table_node, 
        label => $table_label,
        shape => 'record',
        fontname => $node_fontname,
        color => $self->config_get('Node', 'Table', 'Colour'),
    );
491
492
}

Leo Gordon's avatar
Leo Gordon committed
493
1;