Graph.pm 14 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
package Bio::EnsEMBL::Hive::Utils::Graph;

=head1 NAME

Bio::EnsEMBL::Hive::Utils::Graph

=head1 SYNOPSIS

  my $dba = get_hive_dba();
  my $g = Bio::EnsEMBL::Hive::Utils::Graph->new(-DBA => $dba);
  my $graphviz = $g->build();
  $graphviz->as_png('location.png');

=head1 DESCRIPTION

This is a module for converting a hive database's flow of analyses, control 
rules and dataflows into the GraphViz model language. This information can
then be converted to an image or to the dot language for further manipulation
in GraphViz.

=head1 METHODS/SUBROUTINES

See inline

=head1 AUTHOR

27
$Author: lg4 $
28
29
30

=head1 VERSION

31
$Revision: 1.20 $
32
33
34
35
36
37
38
39

=cut

use strict;
use warnings;

use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref);

40
use Bio::EnsEMBL::Hive::Utils::GraphViz;
41
use Bio::EnsEMBL::Hive::Utils::Config;
42

43
44
45

=head2 new()

46
47
48
49
50
51
  Arg [1] : Bio::EnsEMBL::Hive::DBSQL::DBAdaptor $dba;
              The adaptor to get information from
  Arg [2] : (optional) string $config_file_name;
                  A JSON file name to initialize the Config object with.
                  If one is not given then we don't pass anything into Config's constructor,
                  which results in loading configuration from Config's standard locations.
52
53
54
55
56
57
58
  Returntype : Graph object
  Exceptions : If the parameters are not as required
  Status     : Beta
  
=cut

sub new {
59
  my ($class, $dba, $config_file_name) = @_;
60

61
  my $self = bless({}, ref($class) || $class);
62

63
  $self->dba($dba);
64
  my $config = Bio::EnsEMBL::Hive::Utils::Config->new( $config_file_name ? $config_file_name : () );
65
  $self->config($config);
66

67
68
69
70
71
72
73
74
75
76
77
78
79
80
  return $self;
}


=head2 graph()

  Arg [1] : The GraphViz instance created by this module
  Returntype : GraphViz
  Exceptions : None
  Status     : Beta

=cut

sub graph {
81
  my ($self) = @_;
82
  if(! exists $self->{graph}) {
83
    $self->{graph} = Bio::EnsEMBL::Hive::Utils::GraphViz->new( name => 'AnalysisWorkflow', ratio => 'compress' );
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
  }
  return $self->{graph};
}


=head2 dba()

  Arg [1] : The DBAdaptor instance
  Returntype : DBAdaptor
  Exceptions : If the given object is not a hive DBAdaptor
  Status     : Beta

=cut

sub dba {
  my ($self, $dba) = @_;
  if(defined $dba) {
    assert_ref($dba, 'Bio::EnsEMBL::Hive::DBSQL::DBAdaptor');
    $self->{dba} = $dba;
  }
  return $self->{dba};
}


=head2 config()

  Arg [1] : The graph configuration object
111
  Returntype : Bio::EnsEMBL::Hive::Utils::Config.
112
113
114
115
116
117
118
119
  Exceptions : If the object given is not of the required type
  Status     : Beta

=cut

sub config {
  my ($self, $config) = @_;
  if(defined $config) {
120
    assert_ref($config, 'Bio::EnsEMBL::Hive::Utils::Config');
121
122
123
124
125
    $self->{config} = $config;
  }
  return $self->{config};
}

126
127
128
129
130
131
132
133
134
135
136
137
138

sub _analysis_node_name {
    my $analysis_id = shift @_;

    return 'analysis_' . $analysis_id;
}

sub _midpoint_name {
    my $rule_id = shift @_;

    return 'dfr_'.$rule_id.'_mp';
}

139
140
141
142
143
144
145
146
147
148
149

=head2 build()

  Returntype : The GraphViz object built & populated
  Exceptions : Raised if there are issues with accessing the database
  Description : Builds the graph object and returns it.
  Status     : Beta

=cut

sub build {
150
    my ($self) = @_;
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175

    my $all_analyses          = $self->dba()->get_AnalysisAdaptor()->fetch_all();
    my $all_ctrl_rules        = $self->dba()->get_AnalysisCtrlRuleAdaptor()->fetch_all();
    my $all_dataflow_rules    = $self->dba()->get_DataflowRuleAdaptor()->fetch_all();

    my %inflow_count = ();    # used to detect sources (nodes with zero inflow)
    my %outflow_rules = ();   # maps from anlaysis_node_name to a list of all dataflow rules that flow out of it
    my %dfr_flows_into= ();   # maps from dfr_id to target analysis_node_name

    foreach my $rule ( @$all_dataflow_rules ) {
        if(my $to_id = $rule->to_analysis->can('dbID') && $rule->to_analysis->dbID()) {
            my $to_node_name    = _analysis_node_name( $to_id );
            $inflow_count{$to_node_name}++;
            $dfr_flows_into{$rule->dbID()} = $to_node_name;
        }
        push @{$outflow_rules{ _analysis_node_name($rule->from_analysis_id()) }}, $rule;
    }

    my %subgraph_allocation = ();

        # NB: this is a very approximate algorithm with rough edges!
        # It will not find all start nodes in cyclic components!
    foreach my $analysis_id ( map { $_->dbID } @$all_analyses ) {
        my $analysis_node_name =  _analysis_node_name( $analysis_id );
        unless($inflow_count{$analysis_node_name}) {
176
            $self->_allocate_to_subgraph(\%outflow_rules, \%dfr_flows_into, $analysis_node_name, \%subgraph_allocation ); # run the recursion in each component that has a non-cyclic start
177
178
179
180
181
182
183
184
185
186
        }
    }

    $self->_add_hive_details();
    foreach my $a (@$all_analyses) {
        $self->_add_analysis_node($a);
    }
    $self->_control_rules( $all_ctrl_rules );
    $self->_dataflow_rules( $all_dataflow_rules );

187
    if($self->config->get('Graph', 'DisplayStretched') ) {
188
189
190
191
192
193
194
195
196
197
        while( my($from, $to) = each %subgraph_allocation) {
            if($to) {
                $self->graph->add_edge( $from => $to,
                    color     => 'black',
                    style     => 'invis',   # toggle visibility by changing 'invis' to 'dashed'
                );
            }
        }
    }

198
    if($self->config->get('Graph', 'DisplaySemaphoreBoxes') ) {
199
        $self->graph->subgraphs( \%subgraph_allocation );
200
201
        $self->graph->colour_scheme( $self->config->get('Graph', 'Box', 'ColourScheme') );
        $self->graph->colour_offset( $self->config->get('Graph', 'Box', 'ColourOffset') );
202
203
204
205
206
207
208
    }

    return $self->graph();
}


sub _allocate_to_subgraph {
209
    my ($self, $outflow_rules, $dfr_flows_into, $parent_analysis_node_name, $subgraph_allocation ) = @_;
210
211

    my $parent_allocation = $subgraph_allocation->{ $parent_analysis_node_name };  # for some analyses it will be undef
212
    my $config = $self->config();
213
214
215

    foreach my $rule ( @{ $outflow_rules->{$parent_analysis_node_name} } ) {
        my $to_analysis                 = $rule->to_analysis();
216
        next unless $to_analysis->can('dbID') or $config->get('Graph', 'DuplicateTables');
217

218
219
220
221
222
223
224
        my $this_analysis_node_name;
        if ($to_analysis->can('dbID')) {
            $this_analysis_node_name = _analysis_node_name( $rule->to_analysis->dbID() );
        } else {
            $this_analysis_node_name = $to_analysis->table_name();
            $this_analysis_node_name .= '_'.$rule->from_analysis_id() if $config->get('Graph', 'DuplicateTables');
        }
225
226
227
        my $funnel_dataflow_rule_id     = $rule->funnel_dataflow_rule_id();

        my $proposed_allocation = $funnel_dataflow_rule_id  # depends on whether we start a new semaphore
228
229
#           ? $dfr_flows_into->{$funnel_dataflow_rule_id}       # if we do, report to the new funnel (based on common funnel's analysis name)
            ? _midpoint_name( $funnel_dataflow_rule_id )        # if we do, report to the new funnel (based on common funnel rule's midpoint)
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
            : $parent_allocation;                               # it we don't, inherit the parent's funnel

        if($funnel_dataflow_rule_id) {
            my $fan_midpoint_name = _midpoint_name( $rule->dbID() );
            $subgraph_allocation->{ $fan_midpoint_name } = $proposed_allocation;

            my $funnel_midpoint_name = _midpoint_name( $funnel_dataflow_rule_id );
            $subgraph_allocation->{ $funnel_midpoint_name } = $parent_allocation;   # draw the funnel's midpoint outside of the box
        }
        if( exists $subgraph_allocation->{ $this_analysis_node_name } ) {        # we allocate on first-come basis at the moment
            my $known_allocation = $subgraph_allocation->{ $this_analysis_node_name } || '';
            $proposed_allocation ||= '';

            if( $known_allocation eq $proposed_allocation) {
                # warn "analysis '$this_analysis_node_name' has already been allocated to the same '$known_allocation' by another branch";
            } else {
                # warn "analysis '$this_analysis_node_name' has already been allocated to '$known_allocation' however this branch would allocate it to '$proposed_allocation'";
            }

        } else {
            # warn "allocating analysis '$this_analysis_node_name' to '$proposed_allocation'";
            $subgraph_allocation->{ $this_analysis_node_name } = $proposed_allocation;

253
            $self->_allocate_to_subgraph( $outflow_rules, $dfr_flows_into, $this_analysis_node_name, $subgraph_allocation );
254
255
        }
    }
256
257
}

258

259
260
sub _add_hive_details {
  my ($self) = @_;
261

262
  my $node_fontname  = $self->config->get('Graph', 'Node', 'Details', 'Font');
263
264

  if($self->config->get('Graph', 'DisplayDetails') ) {
265
    my $dbc = $self->dba()->dbc();
266
    my $label = sprintf('%s@%s', $dbc->dbname, $dbc->host || '-');
267
    $self->graph()->add_node( 'Details',
268
269
270
      label     => $label,
      fontname  => $node_fontname,
      shape     => 'plaintext',
271
272
273
274
    );
  }
}

275

276
277
278
279
280
281
282
sub _add_analysis_node {
  my ($self, $a) = @_;
  my $graph = $self->graph();
  
  #Check we can invoke it & then check if it was able to be empty
  my $can_be_empty = $a->stats()->can('can_be_empty') && $a->stats()->can_be_empty();
  my $shape = ($can_be_empty) ? 'doubleoctagon' : 'ellipse' ;
283

284
285
  my $status_colour = $self->config->get('Graph', 'Node', $a->stats->status, 'Colour');
  my $node_fontname  = $self->config->get('Graph', 'Node', $a->stats->status, 'Font');
286
  
287
  $graph->add_node( _analysis_node_name( $a->dbID() ), 
288
289
290
    label       => $a->logic_name().' ('.$a->dbID().')\n'.$a->stats()->done_job_count().'+'.$a->stats()->remaining_job_count().'='.$a->stats()->total_job_count(), 
    shape       => $shape,
    style       => 'filled',
291
292
    fontname    => $node_fontname,
    fillcolor   => $status_colour,
293
294
295
296
297
  );
}


sub _control_rules {
298
  my ($self, $all_ctrl_rules) = @_;
299
  
300
  my $control_colour = $self->config->get('Graph', 'Edge', 'Control', 'Colour');
301
302
303
  my $graph = $self->graph();

  #The control rules are always from and to an analysis so no need to search for odd cases here
304
305
306
  foreach my $rule ( @$all_ctrl_rules ) {
    my ($from, $to) = ( _analysis_node_name( $rule->condition_analysis()->dbID() ), _analysis_node_name( $rule->ctrled_analysis()->dbID() ) );
    $graph->add_edge( $from => $to, 
307
      color => $control_colour,
308
      arrowhead => 'tee',
309
310
    );
  }
311
312
}

313

314
sub _dataflow_rules {
315
316
    my ($self, $all_dataflow_rules) = @_;

317
    my $graph = $self->graph();
318
319
320
    my $dataflow_colour  = $self->config->get('Graph', 'Edge', 'Data', 'Colour');
    my $semablock_colour = $self->config->get('Graph', 'Edge', 'Semablock', 'Colour');
    my $df_edge_fontname    = $self->config->get('Graph', 'Edge', 'Data', 'Font');
321
322

    my %needs_a_midpoint = ();
323
324
    my %aid2aid_nonsem = ();    # simply a directed graph between numerical analysis_ids, except for semaphored rules
    foreach my $rule ( @$all_dataflow_rules ) {
325
        if(my $to_id = $rule->to_analysis->can('dbID') && $rule->to_analysis->dbID()) {
326
327
328
            unless( $rule->funnel_dataflow_rule_id ) {
                $aid2aid_nonsem{$rule->from_analysis_id()}{$to_id}++;
            }
329
330
331
332
333
334
        }
        if(my $funnel_dataflow_rule_id = $rule->funnel_dataflow_rule_id()) {
            $needs_a_midpoint{$rule->dbID()}++;
            $needs_a_midpoint{$funnel_dataflow_rule_id}++;
        }
    }
335

336
    foreach my $rule ( @$all_dataflow_rules ) {
337
    
338
339
        my ($rule_id, $from_analysis_id, $branch_code, $funnel_dataflow_rule_id, $to) =
            ($rule->dbID(), $rule->from_analysis_id(), $rule->branch_code(), $rule->funnel_dataflow_rule_id(), $rule->to_analysis());
340
        my ($from_node, $to_id, $to_node) = ( _analysis_node_name($from_analysis_id)      );
341
    
342
343
            # Different treatment for analyses and tables:
        if(check_ref($to, 'Bio::EnsEMBL::Analysis')) {
344
345
            $to_id   = $to->dbID();
            $to_node = _analysis_node_name($to_id);
346
347
        } elsif(check_ref($to, 'Bio::EnsEMBL::Hive::NakedTable')) {
            $to_node = $to->table_name();
348
            $to_node .= '_'.$from_analysis_id if $self->config->get('Graph', 'DuplicateTables');
349
350
351
352
353
354
355
356
            $self->_add_table_node($to_node);
        } else {
            warn('Do not know how to handle the type '.ref($to));
            next;
        }

        if($needs_a_midpoint{$rule_id}) {
            my $midpoint_name = _midpoint_name($rule_id);
357

358
            $graph->add_node( $midpoint_name,   # midpoint itself
359
                color       => $dataflow_colour,
360
361
                label       => '',
                shape       => 'point',
362
363
364
                fixedsize   => 1,
                width       => 0.01,
                height      => 0.01,
365
            );
366
            $graph->add_edge( $from_node => $midpoint_name, # first half of the two-part arrow
367
                color       => $dataflow_colour,
368
369
                arrowhead   => 'none',
                label       => '#'.$branch_code, 
370
                fontname    => $df_edge_fontname,
371
            );
372
            $graph->add_edge( $midpoint_name => $to_node,   # second half of the two-part arrow
373
                color     => $dataflow_colour,
374
375
            );
            if($funnel_dataflow_rule_id) {
376
                $graph->add_edge( $midpoint_name => _midpoint_name($funnel_dataflow_rule_id),   # semaphore inter-rule link
377
                    color     => $semablock_colour,
378
379
380
381
382
383
384
                    style     => 'dashed',
                    arrowhead => 'tee',
                    dir       => 'both',
                    arrowtail => 'crow',
                );
            }
        } else {
385
                # one-part arrow:
386
            $graph->add_edge( $from_node => $to_node, 
387
                color       => $dataflow_colour,
388
                label       => '#'.$branch_code, 
389
                fontname    => $df_edge_fontname,
390
            );
391
392
393
        } # /if($needs_a_midpoint{$rule_id})
    } # /foreach my $rule (@$all_dataflow_rules)

394
395
}

396

397
398
sub _add_table_node {
  my ($self, $table) = @_;
399
400
401

  my $node_fontname    = $self->config->get('Graph', 'Node', 'Table', 'Font');

402
403
404
405
406
407
  my $table_name = $table;
  if ($self->config->get('Graph', 'DuplicateTables')) {
    $table =~ /^(.*)_([^_]*)$/;
    $table_name = $1;
  }

408
  $self->graph()->add_node( $table, 
409
    label => $table_name.'\n', 
410
    shape => 'tab',
411
412
    fontname => $node_fontname,
    color => $self->config->get('Graph', 'Node', 'Table', 'Colour'),
413
414
415
  );
}

Leo Gordon's avatar
Leo Gordon committed
416
1;