Graph.pm 14.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
package Bio::EnsEMBL::Hive::Utils::Graph;

=head1 NAME

Bio::EnsEMBL::Hive::Utils::Graph

=head1 SYNOPSIS

  my $dba = get_hive_dba();
  my $g = Bio::EnsEMBL::Hive::Utils::Graph->new(-DBA => $dba);
  my $graphviz = $g->build();
  $graphviz->as_png('location.png');

=head1 DESCRIPTION

This is a module for converting a hive database's flow of analyses, control 
rules and dataflows into the GraphViz model language. This information can
then be converted to an image or to the dot language for further manipulation
in GraphViz.

=head1 METHODS/SUBROUTINES

See inline

=head1 AUTHOR

27
$Author: mm14 $
28
29
30

=head1 VERSION

31
$Revision: 1.21 $
32
33
34
35
36
37
38
39

=cut

use strict;
use warnings;

use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref);

40
use Bio::EnsEMBL::Hive::Utils::GraphViz;
41
use Bio::EnsEMBL::Hive::Utils::Config;
42

43
44
45

=head2 new()

46
47
48
49
50
51
  Arg [1] : Bio::EnsEMBL::Hive::DBSQL::DBAdaptor $dba;
              The adaptor to get information from
  Arg [2] : (optional) string $config_file_name;
                  A JSON file name to initialize the Config object with.
                  If one is not given then we don't pass anything into Config's constructor,
                  which results in loading configuration from Config's standard locations.
52
53
54
55
56
57
58
  Returntype : Graph object
  Exceptions : If the parameters are not as required
  Status     : Beta
  
=cut

sub new {
59
  my ($class, $dba, $config_file_name) = @_;
60

61
  my $self = bless({}, ref($class) || $class);
62

63
  $self->dba($dba);
64
  my $config = Bio::EnsEMBL::Hive::Utils::Config->new( $config_file_name ? $config_file_name : () );
65
  $self->config($config);
66

67
68
69
70
71
72
73
74
75
76
77
78
79
80
  return $self;
}


=head2 graph()

  Arg [1] : The GraphViz instance created by this module
  Returntype : GraphViz
  Exceptions : None
  Status     : Beta

=cut

sub graph {
81
  my ($self) = @_;
82
  if(! exists $self->{graph}) {
83
    $self->{graph} = Bio::EnsEMBL::Hive::Utils::GraphViz->new( name => 'AnalysisWorkflow', ratio => 'compress' );
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
  }
  return $self->{graph};
}


=head2 dba()

  Arg [1] : The DBAdaptor instance
  Returntype : DBAdaptor
  Exceptions : If the given object is not a hive DBAdaptor
  Status     : Beta

=cut

sub dba {
  my ($self, $dba) = @_;
  if(defined $dba) {
    assert_ref($dba, 'Bio::EnsEMBL::Hive::DBSQL::DBAdaptor');
    $self->{dba} = $dba;
  }
  return $self->{dba};
}


=head2 config()

  Arg [1] : The graph configuration object
111
  Returntype : Bio::EnsEMBL::Hive::Utils::Config.
112
113
114
115
116
117
118
119
  Exceptions : If the object given is not of the required type
  Status     : Beta

=cut

sub config {
  my ($self, $config) = @_;
  if(defined $config) {
120
    assert_ref($config, 'Bio::EnsEMBL::Hive::Utils::Config');
121
122
123
124
125
    $self->{config} = $config;
  }
  return $self->{config};
}

126
127
128
129
130
131
132
133
134
135
136
137
138

sub _analysis_node_name {
    my $analysis_id = shift @_;

    return 'analysis_' . $analysis_id;
}

sub _midpoint_name {
    my $rule_id = shift @_;

    return 'dfr_'.$rule_id.'_mp';
}

139
140
141
142
143
144
145
146
147
148
149

=head2 build()

  Returntype : The GraphViz object built & populated
  Exceptions : Raised if there are issues with accessing the database
  Description : Builds the graph object and returns it.
  Status     : Beta

=cut

sub build {
150
    my ($self) = @_;
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175

    my $all_analyses          = $self->dba()->get_AnalysisAdaptor()->fetch_all();
    my $all_ctrl_rules        = $self->dba()->get_AnalysisCtrlRuleAdaptor()->fetch_all();
    my $all_dataflow_rules    = $self->dba()->get_DataflowRuleAdaptor()->fetch_all();

    my %inflow_count = ();    # used to detect sources (nodes with zero inflow)
    my %outflow_rules = ();   # maps from anlaysis_node_name to a list of all dataflow rules that flow out of it
    my %dfr_flows_into= ();   # maps from dfr_id to target analysis_node_name

    foreach my $rule ( @$all_dataflow_rules ) {
        if(my $to_id = $rule->to_analysis->can('dbID') && $rule->to_analysis->dbID()) {
            my $to_node_name    = _analysis_node_name( $to_id );
            $inflow_count{$to_node_name}++;
            $dfr_flows_into{$rule->dbID()} = $to_node_name;
        }
        push @{$outflow_rules{ _analysis_node_name($rule->from_analysis_id()) }}, $rule;
    }

    my %subgraph_allocation = ();

        # NB: this is a very approximate algorithm with rough edges!
        # It will not find all start nodes in cyclic components!
    foreach my $analysis_id ( map { $_->dbID } @$all_analyses ) {
        my $analysis_node_name =  _analysis_node_name( $analysis_id );
        unless($inflow_count{$analysis_node_name}) {
176
            $self->_allocate_to_subgraph(\%outflow_rules, \%dfr_flows_into, $analysis_node_name, \%subgraph_allocation ); # run the recursion in each component that has a non-cyclic start
177
178
179
180
181
182
183
184
185
186
        }
    }

    $self->_add_hive_details();
    foreach my $a (@$all_analyses) {
        $self->_add_analysis_node($a);
    }
    $self->_control_rules( $all_ctrl_rules );
    $self->_dataflow_rules( $all_dataflow_rules );

187
    if($self->config->get('Graph', 'DisplayStretched') ) {
188
189
190
191
192
193

        # The invisible edges will be linked to the destination analysis instead of the midpoint
        my $id_to_rule = {map { $_->dbID => $_ } @$all_dataflow_rules};
        my @all_fdr_id = grep {$_} (map {$_->funnel_dataflow_rule_id} @$all_dataflow_rules);
        my $midpoint_to_analysis = {map { _midpoint_name( $_ ) => _analysis_node_name( $id_to_rule->{$_}->to_analysis->dbID ) } @all_fdr_id};

194
195
        while( my($from, $to) = each %subgraph_allocation) {
            if($to) {
196
                $self->graph->add_edge( $from => $midpoint_to_analysis->{$to},
197
198
199
200
201
202
203
                    color     => 'black',
                    style     => 'invis',   # toggle visibility by changing 'invis' to 'dashed'
                );
            }
        }
    }

204
    if($self->config->get('Graph', 'DisplaySemaphoreBoxes') ) {
205
        $self->graph->subgraphs( \%subgraph_allocation );
206
207
        $self->graph->colour_scheme( $self->config->get('Graph', 'Box', 'ColourScheme') );
        $self->graph->colour_offset( $self->config->get('Graph', 'Box', 'ColourOffset') );
208
209
210
211
212
213
214
    }

    return $self->graph();
}


sub _allocate_to_subgraph {
215
    my ($self, $outflow_rules, $dfr_flows_into, $parent_analysis_node_name, $subgraph_allocation ) = @_;
216
217

    my $parent_allocation = $subgraph_allocation->{ $parent_analysis_node_name };  # for some analyses it will be undef
218
    my $config = $self->config();
219
220
221

    foreach my $rule ( @{ $outflow_rules->{$parent_analysis_node_name} } ) {
        my $to_analysis                 = $rule->to_analysis();
222
        next unless $to_analysis->can('dbID') or $config->get('Graph', 'DuplicateTables');
223

224
225
226
227
228
229
230
        my $this_analysis_node_name;
        if ($to_analysis->can('dbID')) {
            $this_analysis_node_name = _analysis_node_name( $rule->to_analysis->dbID() );
        } else {
            $this_analysis_node_name = $to_analysis->table_name();
            $this_analysis_node_name .= '_'.$rule->from_analysis_id() if $config->get('Graph', 'DuplicateTables');
        }
231
232
233
        my $funnel_dataflow_rule_id     = $rule->funnel_dataflow_rule_id();

        my $proposed_allocation = $funnel_dataflow_rule_id  # depends on whether we start a new semaphore
234
235
#           ? $dfr_flows_into->{$funnel_dataflow_rule_id}       # if we do, report to the new funnel (based on common funnel's analysis name)
            ? _midpoint_name( $funnel_dataflow_rule_id )        # if we do, report to the new funnel (based on common funnel rule's midpoint)
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
            : $parent_allocation;                               # it we don't, inherit the parent's funnel

        if($funnel_dataflow_rule_id) {
            my $fan_midpoint_name = _midpoint_name( $rule->dbID() );
            $subgraph_allocation->{ $fan_midpoint_name } = $proposed_allocation;

            my $funnel_midpoint_name = _midpoint_name( $funnel_dataflow_rule_id );
            $subgraph_allocation->{ $funnel_midpoint_name } = $parent_allocation;   # draw the funnel's midpoint outside of the box
        }
        if( exists $subgraph_allocation->{ $this_analysis_node_name } ) {        # we allocate on first-come basis at the moment
            my $known_allocation = $subgraph_allocation->{ $this_analysis_node_name } || '';
            $proposed_allocation ||= '';

            if( $known_allocation eq $proposed_allocation) {
                # warn "analysis '$this_analysis_node_name' has already been allocated to the same '$known_allocation' by another branch";
            } else {
                # warn "analysis '$this_analysis_node_name' has already been allocated to '$known_allocation' however this branch would allocate it to '$proposed_allocation'";
            }

        } else {
            # warn "allocating analysis '$this_analysis_node_name' to '$proposed_allocation'";
            $subgraph_allocation->{ $this_analysis_node_name } = $proposed_allocation;

259
            $self->_allocate_to_subgraph( $outflow_rules, $dfr_flows_into, $this_analysis_node_name, $subgraph_allocation );
260
261
        }
    }
262
263
}

264

265
266
sub _add_hive_details {
  my ($self) = @_;
267

268
  my $node_fontname  = $self->config->get('Graph', 'Node', 'Details', 'Font');
269
270

  if($self->config->get('Graph', 'DisplayDetails') ) {
271
    my $dbc = $self->dba()->dbc();
272
    my $label = sprintf('%s@%s', $dbc->dbname, $dbc->host || '-');
273
    $self->graph()->add_node( 'Details',
274
275
276
      label     => $label,
      fontname  => $node_fontname,
      shape     => 'plaintext',
277
278
279
280
    );
  }
}

281

282
283
284
285
286
287
288
sub _add_analysis_node {
  my ($self, $a) = @_;
  my $graph = $self->graph();
  
  #Check we can invoke it & then check if it was able to be empty
  my $can_be_empty = $a->stats()->can('can_be_empty') && $a->stats()->can_be_empty();
  my $shape = ($can_be_empty) ? 'doubleoctagon' : 'ellipse' ;
289

290
291
  my $status_colour = $self->config->get('Graph', 'Node', $a->stats->status, 'Colour');
  my $node_fontname  = $self->config->get('Graph', 'Node', $a->stats->status, 'Font');
292
  
293
  $graph->add_node( _analysis_node_name( $a->dbID() ), 
294
295
296
    label       => $a->logic_name().' ('.$a->dbID().')\n'.$a->stats()->done_job_count().'+'.$a->stats()->remaining_job_count().'='.$a->stats()->total_job_count(), 
    shape       => $shape,
    style       => 'filled',
297
298
    fontname    => $node_fontname,
    fillcolor   => $status_colour,
299
300
301
302
303
  );
}


sub _control_rules {
304
  my ($self, $all_ctrl_rules) = @_;
305
  
306
  my $control_colour = $self->config->get('Graph', 'Edge', 'Control', 'Colour');
307
308
309
  my $graph = $self->graph();

  #The control rules are always from and to an analysis so no need to search for odd cases here
310
311
312
  foreach my $rule ( @$all_ctrl_rules ) {
    my ($from, $to) = ( _analysis_node_name( $rule->condition_analysis()->dbID() ), _analysis_node_name( $rule->ctrled_analysis()->dbID() ) );
    $graph->add_edge( $from => $to, 
313
      color => $control_colour,
314
      arrowhead => 'tee',
315
316
    );
  }
317
318
}

319

320
sub _dataflow_rules {
321
322
    my ($self, $all_dataflow_rules) = @_;

323
    my $graph = $self->graph();
324
325
326
    my $dataflow_colour  = $self->config->get('Graph', 'Edge', 'Data', 'Colour');
    my $semablock_colour = $self->config->get('Graph', 'Edge', 'Semablock', 'Colour');
    my $df_edge_fontname    = $self->config->get('Graph', 'Edge', 'Data', 'Font');
327
328

    my %needs_a_midpoint = ();
329
330
    my %aid2aid_nonsem = ();    # simply a directed graph between numerical analysis_ids, except for semaphored rules
    foreach my $rule ( @$all_dataflow_rules ) {
331
        if(my $to_id = $rule->to_analysis->can('dbID') && $rule->to_analysis->dbID()) {
332
333
334
            unless( $rule->funnel_dataflow_rule_id ) {
                $aid2aid_nonsem{$rule->from_analysis_id()}{$to_id}++;
            }
335
336
337
338
339
340
        }
        if(my $funnel_dataflow_rule_id = $rule->funnel_dataflow_rule_id()) {
            $needs_a_midpoint{$rule->dbID()}++;
            $needs_a_midpoint{$funnel_dataflow_rule_id}++;
        }
    }
341

342
    foreach my $rule ( @$all_dataflow_rules ) {
343
    
344
345
        my ($rule_id, $from_analysis_id, $branch_code, $funnel_dataflow_rule_id, $to) =
            ($rule->dbID(), $rule->from_analysis_id(), $rule->branch_code(), $rule->funnel_dataflow_rule_id(), $rule->to_analysis());
346
        my ($from_node, $to_id, $to_node) = ( _analysis_node_name($from_analysis_id)      );
347
    
348
349
            # Different treatment for analyses and tables:
        if(check_ref($to, 'Bio::EnsEMBL::Analysis')) {
350
351
            $to_id   = $to->dbID();
            $to_node = _analysis_node_name($to_id);
352
353
        } elsif(check_ref($to, 'Bio::EnsEMBL::Hive::NakedTable')) {
            $to_node = $to->table_name();
354
            $to_node .= '_'.$from_analysis_id if $self->config->get('Graph', 'DuplicateTables');
355
356
357
358
359
360
361
362
            $self->_add_table_node($to_node);
        } else {
            warn('Do not know how to handle the type '.ref($to));
            next;
        }

        if($needs_a_midpoint{$rule_id}) {
            my $midpoint_name = _midpoint_name($rule_id);
363

364
            $graph->add_node( $midpoint_name,   # midpoint itself
365
                color       => $dataflow_colour,
366
367
                label       => '',
                shape       => 'point',
368
369
370
                fixedsize   => 1,
                width       => 0.01,
                height      => 0.01,
371
            );
372
            $graph->add_edge( $from_node => $midpoint_name, # first half of the two-part arrow
373
                color       => $dataflow_colour,
374
375
                arrowhead   => 'none',
                label       => '#'.$branch_code, 
376
                fontname    => $df_edge_fontname,
377
            );
378
            $graph->add_edge( $midpoint_name => $to_node,   # second half of the two-part arrow
379
                color     => $dataflow_colour,
380
381
            );
            if($funnel_dataflow_rule_id) {
382
                $graph->add_edge( $midpoint_name => _midpoint_name($funnel_dataflow_rule_id),   # semaphore inter-rule link
383
                    color     => $semablock_colour,
384
385
386
387
388
389
390
                    style     => 'dashed',
                    arrowhead => 'tee',
                    dir       => 'both',
                    arrowtail => 'crow',
                );
            }
        } else {
391
                # one-part arrow:
392
            $graph->add_edge( $from_node => $to_node, 
393
                color       => $dataflow_colour,
394
                label       => '#'.$branch_code, 
395
                fontname    => $df_edge_fontname,
396
            );
397
398
399
        } # /if($needs_a_midpoint{$rule_id})
    } # /foreach my $rule (@$all_dataflow_rules)

400
401
}

402

403
404
sub _add_table_node {
  my ($self, $table) = @_;
405
406
407

  my $node_fontname    = $self->config->get('Graph', 'Node', 'Table', 'Font');

408
409
410
411
412
413
  my $table_name = $table;
  if ($self->config->get('Graph', 'DuplicateTables')) {
    $table =~ /^(.*)_([^_]*)$/;
    $table_name = $1;
  }

414
  $self->graph()->add_node( $table, 
415
    label => $table_name.'\n', 
416
    shape => 'tab',
417
418
    fontname => $node_fontname,
    color => $self->config->get('Graph', 'Node', 'Table', 'Colour'),
419
420
421
  );
}

Leo Gordon's avatar
Leo Gordon committed
422
1;