Graph.pm 13.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
package Bio::EnsEMBL::Hive::Utils::Graph;

=head1 NAME

Bio::EnsEMBL::Hive::Utils::Graph

=head1 SYNOPSIS

  my $dba = get_hive_dba();
  my $g = Bio::EnsEMBL::Hive::Utils::Graph->new(-DBA => $dba);
  my $graphviz = $g->build();
  $graphviz->as_png('location.png');

=head1 DESCRIPTION

This is a module for converting a hive database's flow of analyses, control 
rules and dataflows into the GraphViz model language. This information can
then be converted to an image or to the dot language for further manipulation
in GraphViz.

=head1 METHODS/SUBROUTINES

See inline

=head1 AUTHOR

27
$Author: lg4 $
28 29 30

=head1 VERSION

31
$Revision: 1.17 $
32 33 34 35 36 37 38 39

=cut

use strict;
use warnings;

use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref);

40
use Bio::EnsEMBL::Hive::Utils::GraphViz;
41
use Bio::EnsEMBL::Hive::Utils::Config;
42

43 44 45

=head2 new()

46 47 48 49 50 51
  Arg [1] : Bio::EnsEMBL::Hive::DBSQL::DBAdaptor $dba;
              The adaptor to get information from
  Arg [2] : (optional) string $config_file_name;
                  A JSON file name to initialize the Config object with.
                  If one is not given then we don't pass anything into Config's constructor,
                  which results in loading configuration from Config's standard locations.
52 53 54 55 56 57 58
  Returntype : Graph object
  Exceptions : If the parameters are not as required
  Status     : Beta
  
=cut

sub new {
59
  my ($class, $dba, $config_file_name) = @_;
60

61
  my $self = bless({}, ref($class) || $class);
62

63
  $self->dba($dba);
64
  my $config = Bio::EnsEMBL::Hive::Utils::Config->new( $config_file_name ? $config_file_name : () );
65
  $self->config($config);
66

67 68 69 70 71 72 73 74 75 76 77 78 79 80
  return $self;
}


=head2 graph()

  Arg [1] : The GraphViz instance created by this module
  Returntype : GraphViz
  Exceptions : None
  Status     : Beta

=cut

sub graph {
81
  my ($self) = @_;
82
  if(! exists $self->{graph}) {
83
    $self->{graph} = Bio::EnsEMBL::Hive::Utils::GraphViz->new( name => 'AnalysisWorkflow', ratio => 'compress' );
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
  }
  return $self->{graph};
}


=head2 dba()

  Arg [1] : The DBAdaptor instance
  Returntype : DBAdaptor
  Exceptions : If the given object is not a hive DBAdaptor
  Status     : Beta

=cut

sub dba {
  my ($self, $dba) = @_;
  if(defined $dba) {
    assert_ref($dba, 'Bio::EnsEMBL::Hive::DBSQL::DBAdaptor');
    $self->{dba} = $dba;
  }
  return $self->{dba};
}


=head2 config()

  Arg [1] : The graph configuration object
111
  Returntype : Bio::EnsEMBL::Hive::Utils::Config.
112 113 114 115 116 117 118 119
  Exceptions : If the object given is not of the required type
  Status     : Beta

=cut

sub config {
  my ($self, $config) = @_;
  if(defined $config) {
120
    assert_ref($config, 'Bio::EnsEMBL::Hive::Utils::Config');
121 122 123 124 125
    $self->{config} = $config;
  }
  return $self->{config};
}

126 127 128 129 130 131 132 133 134 135 136 137 138

sub _analysis_node_name {
    my $analysis_id = shift @_;

    return 'analysis_' . $analysis_id;
}

sub _midpoint_name {
    my $rule_id = shift @_;

    return 'dfr_'.$rule_id.'_mp';
}

139 140 141 142 143 144 145 146 147 148 149

=head2 build()

  Returntype : The GraphViz object built & populated
  Exceptions : Raised if there are issues with accessing the database
  Description : Builds the graph object and returns it.
  Status     : Beta

=cut

sub build {
150
    my ($self) = @_;
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186

    my $all_analyses          = $self->dba()->get_AnalysisAdaptor()->fetch_all();
    my $all_ctrl_rules        = $self->dba()->get_AnalysisCtrlRuleAdaptor()->fetch_all();
    my $all_dataflow_rules    = $self->dba()->get_DataflowRuleAdaptor()->fetch_all();

    my %inflow_count = ();    # used to detect sources (nodes with zero inflow)
    my %outflow_rules = ();   # maps from anlaysis_node_name to a list of all dataflow rules that flow out of it
    my %dfr_flows_into= ();   # maps from dfr_id to target analysis_node_name

    foreach my $rule ( @$all_dataflow_rules ) {
        if(my $to_id = $rule->to_analysis->can('dbID') && $rule->to_analysis->dbID()) {
            my $to_node_name    = _analysis_node_name( $to_id );
            $inflow_count{$to_node_name}++;
            $dfr_flows_into{$rule->dbID()} = $to_node_name;
        }
        push @{$outflow_rules{ _analysis_node_name($rule->from_analysis_id()) }}, $rule;
    }

    my %subgraph_allocation = ();

        # NB: this is a very approximate algorithm with rough edges!
        # It will not find all start nodes in cyclic components!
    foreach my $analysis_id ( map { $_->dbID } @$all_analyses ) {
        my $analysis_node_name =  _analysis_node_name( $analysis_id );
        unless($inflow_count{$analysis_node_name}) {
            _allocate_to_subgraph(\%outflow_rules, \%dfr_flows_into, $analysis_node_name, \%subgraph_allocation ); # run the recursion in each component that has a non-cyclic start
        }
    }

    $self->_add_hive_details();
    foreach my $a (@$all_analyses) {
        $self->_add_analysis_node($a);
    }
    $self->_control_rules( $all_ctrl_rules );
    $self->_dataflow_rules( $all_dataflow_rules );

187
    if($self->config->get('Graph', 'DisplayStretched') ) {
188 189 190 191 192 193 194 195 196 197
        while( my($from, $to) = each %subgraph_allocation) {
            if($to) {
                $self->graph->add_edge( $from => $to,
                    color     => 'black',
                    style     => 'invis',   # toggle visibility by changing 'invis' to 'dashed'
                );
            }
        }
    }

198
    if($self->config->get('Graph', 'DisplaySemaphoreBoxes') ) {
199
        $self->graph->subgraphs( \%subgraph_allocation );
200 201
        $self->graph->colour_scheme( $self->config->get('Graph', 'Box', 'ColourScheme') );
        $self->graph->colour_offset( $self->config->get('Graph', 'Box', 'ColourOffset') );
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
    }

    return $self->graph();
}


sub _allocate_to_subgraph {
    my ( $outflow_rules, $dfr_flows_into, $parent_analysis_node_name, $subgraph_allocation ) = @_;

    my $parent_allocation = $subgraph_allocation->{ $parent_analysis_node_name };  # for some analyses it will be undef

    foreach my $rule ( @{ $outflow_rules->{$parent_analysis_node_name} } ) {
        my $to_analysis                 = $rule->to_analysis();
        next unless( $to_analysis->can('dbID'));    # skip dataflow-into-tables

        my $this_analysis_node_name     = _analysis_node_name( $rule->to_analysis->dbID() );
        my $funnel_dataflow_rule_id     = $rule->funnel_dataflow_rule_id();

        my $proposed_allocation = $funnel_dataflow_rule_id  # depends on whether we start a new semaphore
            ? $dfr_flows_into->{$funnel_dataflow_rule_id}       # if we do, report to the new funnel
            : $parent_allocation;                               # it we don't, inherit the parent's funnel

        if($funnel_dataflow_rule_id) {
            my $fan_midpoint_name = _midpoint_name( $rule->dbID() );
            $subgraph_allocation->{ $fan_midpoint_name } = $proposed_allocation;

            my $funnel_midpoint_name = _midpoint_name( $funnel_dataflow_rule_id );
            $subgraph_allocation->{ $funnel_midpoint_name } = $parent_allocation;   # draw the funnel's midpoint outside of the box
        }
        if( exists $subgraph_allocation->{ $this_analysis_node_name } ) {        # we allocate on first-come basis at the moment
            my $known_allocation = $subgraph_allocation->{ $this_analysis_node_name } || '';
            $proposed_allocation ||= '';

            if( $known_allocation eq $proposed_allocation) {
                # warn "analysis '$this_analysis_node_name' has already been allocated to the same '$known_allocation' by another branch";
            } else {
                # warn "analysis '$this_analysis_node_name' has already been allocated to '$known_allocation' however this branch would allocate it to '$proposed_allocation'";
            }

        } else {
            # warn "allocating analysis '$this_analysis_node_name' to '$proposed_allocation'";
            $subgraph_allocation->{ $this_analysis_node_name } = $proposed_allocation;

            _allocate_to_subgraph( $outflow_rules, $dfr_flows_into, $this_analysis_node_name, $subgraph_allocation );
        }
    }
248 249
}

250

251 252
sub _add_hive_details {
  my ($self) = @_;
253

254
  my $node_fontname  = $self->config->get('Graph', 'Node', 'Details', 'Font');
255 256

  if($self->config->get('Graph', 'DisplayDetails') ) {
257
    my $dbc = $self->dba()->dbc();
258
    my $label = sprintf('%s@%s', $dbc->dbname, $dbc->host || '-');
259
    $self->graph()->add_node( 'Details',
260 261 262
      label     => $label,
      fontname  => $node_fontname,
      shape     => 'plaintext',
263 264 265 266
    );
  }
}

267

268 269 270 271 272 273 274
sub _add_analysis_node {
  my ($self, $a) = @_;
  my $graph = $self->graph();
  
  #Check we can invoke it & then check if it was able to be empty
  my $can_be_empty = $a->stats()->can('can_be_empty') && $a->stats()->can_be_empty();
  my $shape = ($can_be_empty) ? 'doubleoctagon' : 'ellipse' ;
275

276 277
  my $status_colour = $self->config->get('Graph', 'Node', $a->stats->status, 'Colour');
  my $node_fontname  = $self->config->get('Graph', 'Node', $a->stats->status, 'Font');
278
  
279
  $graph->add_node( _analysis_node_name( $a->dbID() ), 
280 281 282
    label       => $a->logic_name().' ('.$a->dbID().')\n'.$a->stats()->done_job_count().'+'.$a->stats()->remaining_job_count().'='.$a->stats()->total_job_count(), 
    shape       => $shape,
    style       => 'filled',
283 284
    fontname    => $node_fontname,
    fillcolor   => $status_colour,
285 286 287 288 289
  );
}


sub _control_rules {
290
  my ($self, $all_ctrl_rules) = @_;
291
  
292
  my $control_colour = $self->config->get('Graph', 'Edge', 'Control', 'Colour');
293 294 295
  my $graph = $self->graph();

  #The control rules are always from and to an analysis so no need to search for odd cases here
296 297 298
  foreach my $rule ( @$all_ctrl_rules ) {
    my ($from, $to) = ( _analysis_node_name( $rule->condition_analysis()->dbID() ), _analysis_node_name( $rule->ctrled_analysis()->dbID() ) );
    $graph->add_edge( $from => $to, 
299
      color => $control_colour,
300
      arrowhead => 'tee',
301 302
    );
  }
303 304
}

305

306
sub _dataflow_rules {
307 308
    my ($self, $all_dataflow_rules) = @_;

309
    my $graph = $self->graph();
310 311 312
    my $dataflow_colour  = $self->config->get('Graph', 'Edge', 'Data', 'Colour');
    my $semablock_colour = $self->config->get('Graph', 'Edge', 'Semablock', 'Colour');
    my $df_edge_fontname    = $self->config->get('Graph', 'Edge', 'Data', 'Font');
313 314

    my %needs_a_midpoint = ();
315 316
    my %aid2aid_nonsem = ();    # simply a directed graph between numerical analysis_ids, except for semaphored rules
    foreach my $rule ( @$all_dataflow_rules ) {
317
        if(my $to_id = $rule->to_analysis->can('dbID') && $rule->to_analysis->dbID()) {
318 319 320
            unless( $rule->funnel_dataflow_rule_id ) {
                $aid2aid_nonsem{$rule->from_analysis_id()}{$to_id}++;
            }
321 322 323 324 325 326
        }
        if(my $funnel_dataflow_rule_id = $rule->funnel_dataflow_rule_id()) {
            $needs_a_midpoint{$rule->dbID()}++;
            $needs_a_midpoint{$funnel_dataflow_rule_id}++;
        }
    }
327

328
    foreach my $rule ( @$all_dataflow_rules ) {
329
    
330 331
        my ($rule_id, $from_analysis_id, $branch_code, $funnel_dataflow_rule_id, $to) =
            ($rule->dbID(), $rule->from_analysis_id(), $rule->branch_code(), $rule->funnel_dataflow_rule_id(), $rule->to_analysis());
332
        my ($from_node, $to_id, $to_node) = ( _analysis_node_name($from_analysis_id)      );
333
    
334 335
            # Different treatment for analyses and tables:
        if(check_ref($to, 'Bio::EnsEMBL::Analysis')) {
336 337
            $to_id   = $to->dbID();
            $to_node = _analysis_node_name($to_id);
338 339 340 341 342 343 344 345 346 347
        } elsif(check_ref($to, 'Bio::EnsEMBL::Hive::NakedTable')) {
            $to_node = $to->table_name();
            $self->_add_table_node($to_node);
        } else {
            warn('Do not know how to handle the type '.ref($to));
            next;
        }

        if($needs_a_midpoint{$rule_id}) {
            my $midpoint_name = _midpoint_name($rule_id);
348

349
            $graph->add_node( $midpoint_name,   # midpoint itself
350
                color       => $dataflow_colour,
351 352
                label       => '',
                shape       => 'point',
353 354 355
                fixedsize   => 1,
                width       => 0.01,
                height      => 0.01,
356
            );
357
            $graph->add_edge( $from_node => $midpoint_name, # first half of the two-part arrow
358
                color       => $dataflow_colour,
359 360
                arrowhead   => 'none',
                label       => '#'.$branch_code, 
361
                fontname    => $df_edge_fontname,
362
            );
363
            $graph->add_edge( $midpoint_name => $to_node,   # second half of the two-part arrow
364
                color     => $dataflow_colour,
365 366
            );
            if($funnel_dataflow_rule_id) {
367
                $graph->add_edge( $midpoint_name => _midpoint_name($funnel_dataflow_rule_id),   # semaphore inter-rule link
368
                    color     => $semablock_colour,
369 370 371 372 373 374 375
                    style     => 'dashed',
                    arrowhead => 'tee',
                    dir       => 'both',
                    arrowtail => 'crow',
                );
            }
        } else {
376
                # one-part arrow:
377
            $graph->add_edge( $from_node => $to_node, 
378
                color       => $dataflow_colour,
379
                label       => '#'.$branch_code, 
380
                fontname    => $df_edge_fontname,
381
            );
382 383 384
        } # /if($needs_a_midpoint{$rule_id})
    } # /foreach my $rule (@$all_dataflow_rules)

385 386
}

387

388 389
sub _add_table_node {
  my ($self, $table) = @_;
390 391 392

  my $node_fontname    = $self->config->get('Graph', 'Node', 'Table', 'Font');

393
  $self->graph()->add_node( $table, 
394 395
    label => $table.'\n', 
    shape => 'tab',
396 397
    fontname => $node_fontname,
    color => $self->config->get('Graph', 'Node', 'Table', 'Colour'),
398 399 400
  );
}

Leo Gordon's avatar
Leo Gordon committed
401
1;