diff --git a/modules/Bio/EnsEMBL/Hive/Utils/Graph.pm b/modules/Bio/EnsEMBL/Hive/Utils/Graph.pm index b223f88a62448669f1ae027930b3b69187a09a4b..e346c8ab1643b99bf7657667e8145c51f2bb0e84 100644 --- a/modules/Bio/EnsEMBL/Hive/Utils/Graph.pm +++ b/modules/Bio/EnsEMBL/Hive/Utils/Graph.pm @@ -1,7 +1,5 @@ package Bio::EnsEMBL::Hive::Utils::Graph; -=pod - =head1 NAME Bio::EnsEMBL::Hive::Utils::Graph @@ -30,13 +28,13 @@ $Author: lg4 $ =head1 VERSION -$Revision: 1.10 $ +$Revision: 1.11 $ =cut use strict; use warnings; -use GraphViz; +use Bio::EnsEMBL::Hive::Utils::GraphViz; use Bio::EnsEMBL::Utils::Argument qw(rearrange); use Bio::EnsEMBL::Utils::Exception qw(throw); @@ -44,7 +42,6 @@ use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref); use Bio::EnsEMBL::Hive::Utils::Graph::Config; -=pod =head2 new() @@ -68,7 +65,6 @@ sub new { return $self; } -=pod =head2 graph() @@ -82,12 +78,11 @@ sub new { sub graph { my ($self) = @_; if(! exists $self->{graph}) { - $self->{graph} = GraphViz->new( name => 'AnalysisWorkflow', ratio => 'compress' ); + $self->{graph} = Bio::EnsEMBL::Hive::Utils::GraphViz->new( name => 'AnalysisWorkflow', ratio => 'compress' ); } return $self->{graph}; } -=pod =head2 dba() @@ -107,7 +102,6 @@ sub dba { return $self->{dba}; } -=pod =head2 config() @@ -130,7 +124,19 @@ sub config { return $self->{config}; } -=pod + +sub _analysis_node_name { + my $analysis_id = shift @_; + + return 'analysis_' . $analysis_id; +} + +sub _midpoint_name { + my $rule_id = shift @_; + + return 'dfr_'.$rule_id.'_mp'; +} + =head2 build() @@ -142,24 +148,111 @@ sub config { =cut sub build { - my ($self) = @_; - $self->_add_hive_details(); - my $analyses = $self->dba()->get_AnalysisAdaptor()->fetch_all(); - foreach my $a (@{$analyses}) { - $self->_add_analysis_node($a); - } - $self->_control_rules(); - $self->_dataflow_rules(); - return $self->graph(); + my ($self, $box, $stretch) = @_; + + my $all_analyses = $self->dba()->get_AnalysisAdaptor()->fetch_all(); + my $all_ctrl_rules = $self->dba()->get_AnalysisCtrlRuleAdaptor()->fetch_all(); + my $all_dataflow_rules = $self->dba()->get_DataflowRuleAdaptor()->fetch_all(); + + my %inflow_count = (); # used to detect sources (nodes with zero inflow) + my %outflow_rules = (); # maps from anlaysis_node_name to a list of all dataflow rules that flow out of it + my %dfr_flows_into= (); # maps from dfr_id to target analysis_node_name + + foreach my $rule ( @$all_dataflow_rules ) { + if(my $to_id = $rule->to_analysis->can('dbID') && $rule->to_analysis->dbID()) { + my $to_node_name = _analysis_node_name( $to_id ); + $inflow_count{$to_node_name}++; + $dfr_flows_into{$rule->dbID()} = $to_node_name; + } + push @{$outflow_rules{ _analysis_node_name($rule->from_analysis_id()) }}, $rule; + } + + my %subgraph_allocation = (); + + # NB: this is a very approximate algorithm with rough edges! + # It will not find all start nodes in cyclic components! + foreach my $analysis_id ( map { $_->dbID } @$all_analyses ) { + my $analysis_node_name = _analysis_node_name( $analysis_id ); + unless($inflow_count{$analysis_node_name}) { + _allocate_to_subgraph(\%outflow_rules, \%dfr_flows_into, $analysis_node_name, \%subgraph_allocation ); # run the recursion in each component that has a non-cyclic start + } + } + + $self->_add_hive_details(); + foreach my $a (@$all_analyses) { + $self->_add_analysis_node($a); + } + $self->_control_rules( $all_ctrl_rules ); + $self->_dataflow_rules( $all_dataflow_rules ); + + if($stretch) { + while( my($from, $to) = each %subgraph_allocation) { + if($to) { + $self->graph->add_edge( $from => $to, + color => 'black', + style => 'invis', # toggle visibility by changing 'invis' to 'dashed' + ); + } + } + } + + if($box) { + $self->graph->subgraphs( \%subgraph_allocation ); + } + + return $self->graph(); +} + + +sub _allocate_to_subgraph { + my ( $outflow_rules, $dfr_flows_into, $parent_analysis_node_name, $subgraph_allocation ) = @_; + + my $parent_allocation = $subgraph_allocation->{ $parent_analysis_node_name }; # for some analyses it will be undef + + foreach my $rule ( @{ $outflow_rules->{$parent_analysis_node_name} } ) { + my $to_analysis = $rule->to_analysis(); + next unless( $to_analysis->can('dbID')); # skip dataflow-into-tables + + my $this_analysis_node_name = _analysis_node_name( $rule->to_analysis->dbID() ); + my $funnel_dataflow_rule_id = $rule->funnel_dataflow_rule_id(); + + my $proposed_allocation = $funnel_dataflow_rule_id # depends on whether we start a new semaphore + ? $dfr_flows_into->{$funnel_dataflow_rule_id} # if we do, report to the new funnel + : $parent_allocation; # it we don't, inherit the parent's funnel + + if($funnel_dataflow_rule_id) { + my $fan_midpoint_name = _midpoint_name( $rule->dbID() ); + $subgraph_allocation->{ $fan_midpoint_name } = $proposed_allocation; + + my $funnel_midpoint_name = _midpoint_name( $funnel_dataflow_rule_id ); + $subgraph_allocation->{ $funnel_midpoint_name } = $parent_allocation; # draw the funnel's midpoint outside of the box + } + if( exists $subgraph_allocation->{ $this_analysis_node_name } ) { # we allocate on first-come basis at the moment + my $known_allocation = $subgraph_allocation->{ $this_analysis_node_name } || ''; + $proposed_allocation ||= ''; + + if( $known_allocation eq $proposed_allocation) { + # warn "analysis '$this_analysis_node_name' has already been allocated to the same '$known_allocation' by another branch"; + } else { + # warn "analysis '$this_analysis_node_name' has already been allocated to '$known_allocation' however this branch would allocate it to '$proposed_allocation'"; + } + + } else { + # warn "allocating analysis '$this_analysis_node_name' to '$proposed_allocation'"; + $subgraph_allocation->{ $this_analysis_node_name } = $proposed_allocation; + + _allocate_to_subgraph( $outflow_rules, $dfr_flows_into, $this_analysis_node_name, $subgraph_allocation ); + } + } } + sub _add_hive_details { my ($self) = @_; if($self->config()->{DisplayDetails}) { my $dbc = $self->dba()->dbc(); my $label = sprintf('%s@%s', $dbc->dbname, $dbc->host || '-'); - $self->graph()->add_node( - 'details', + $self->graph()->add_node( 'details', label => $label, fontname => $self->config()->{Fonts}->{node}, shape => 'plaintext' @@ -179,8 +272,7 @@ sub _add_analysis_node { my $config = $self->config()->{Colours}->{Status}; my $colour = $config->{$a->stats()->status()} || $config->{OTHER}; - $graph->add_node( - $a->dbID(), + $graph->add_node( _analysis_node_name( $a->dbID() ), label => $a->logic_name().' ('.$a->dbID().')\n'.$a->stats()->done_job_count().'+'.$a->stats()->remaining_job_count().'='.$a->stats()->total_job_count(), shape => $shape, style => 'filled', @@ -191,16 +283,15 @@ sub _add_analysis_node { sub _control_rules { - my ($self) = @_; + my ($self, $all_ctrl_rules) = @_; my $config = $self->config()->{Colours}->{Flows}; my $graph = $self->graph(); - my $ctrl_rules = $self->dba()->get_AnalysisCtrlRuleAdaptor()->fetch_all(); #The control rules are always from and to an analysis so no need to search for odd cases here - foreach my $rule (@{$ctrl_rules}) { - my ($from, $to) = ($rule->condition_analysis()->dbID(), $rule->ctrled_analysis()->dbID()); - $graph->add_edge($from => $to, + foreach my $rule ( @$all_ctrl_rules ) { + my ($from, $to) = ( _analysis_node_name( $rule->condition_analysis()->dbID() ), _analysis_node_name( $rule->ctrled_analysis()->dbID() ) ); + $graph->add_edge( $from => $to, color => $config->{control}, fontname => $self->config()->{Fonts}->{edge}, arrowhead => 'tee', @@ -208,37 +299,20 @@ sub _control_rules { } } -sub _midpoint_name { - my $rule_id = shift @_; - - return 'dfr_'.$rule_id.'_mp'; -} - -sub saturated_set { - my ($a2a, $set, $from, $except) = @_; - - foreach my $to ( keys %{$a2a->{$from}} ) { - if( defined($to) and ($to != $except) and !$set->{ $to }++ ) { - saturated_set($a2a, $set, $to, $except); - } - } - return $set; -} - sub _dataflow_rules { - my ($self) = @_; + my ($self, $all_dataflow_rules) = @_; + my $graph = $self->graph(); my $config = $self->config()->{Colours}->{Flows}; - my $dataflow_rules = $self->dba()->get_DataflowRuleAdaptor()->fetch_all(); my %needs_a_midpoint = (); - my %dfr_flows_into = (); - my %aid2aid = (); - foreach my $rule (@{$dataflow_rules}) { + my %aid2aid_nonsem = (); # simply a directed graph between numerical analysis_ids, except for semaphored rules + foreach my $rule ( @$all_dataflow_rules ) { if(my $to_id = $rule->to_analysis->can('dbID') && $rule->to_analysis->dbID()) { - $dfr_flows_into{$rule->dbID()} = $to_id; - $aid2aid{$rule->from_analysis_id()}{$to_id}++; + unless( $rule->funnel_dataflow_rule_id ) { + $aid2aid_nonsem{$rule->from_analysis_id()}{$to_id}++; + } } if(my $funnel_dataflow_rule_id = $rule->funnel_dataflow_rule_id()) { $needs_a_midpoint{$rule->dbID()}++; @@ -246,15 +320,16 @@ sub _dataflow_rules { } } - foreach my $rule (@{$dataflow_rules}) { + foreach my $rule ( @$all_dataflow_rules ) { my ($rule_id, $from_analysis_id, $branch_code, $funnel_dataflow_rule_id, $to) = ($rule->dbID(), $rule->from_analysis_id(), $rule->branch_code(), $rule->funnel_dataflow_rule_id(), $rule->to_analysis()); - my $to_node; + my ($from_node, $to_id, $to_node) = ( _analysis_node_name($from_analysis_id) ); # Different treatment for analyses and tables: if(check_ref($to, 'Bio::EnsEMBL::Analysis')) { - $to_node = $to->dbID(); + $to_id = $to->dbID(); + $to_node = _analysis_node_name($to_id); } elsif(check_ref($to, 'Bio::EnsEMBL::Hive::NakedTable')) { $to_node = $to->table_name(); $self->_add_table_node($to_node); @@ -266,8 +341,7 @@ sub _dataflow_rules { if($needs_a_midpoint{$rule_id}) { my $midpoint_name = _midpoint_name($rule_id); - # midpoint itself: - $graph->add_node( $midpoint_name, + $graph->add_node( $midpoint_name, # midpoint itself color => $config->{data}, label => '', shape => 'point', @@ -275,21 +349,17 @@ sub _dataflow_rules { width => 0.01, height => 0.01, ); - # first half of the two-part arrow: - $graph->add_edge( $from_analysis_id => $midpoint_name, + $graph->add_edge( $from_node => $midpoint_name, # first half of the two-part arrow color => $config->{data}, arrowhead => 'none', label => '#'.$branch_code, fontname => $self->config()->{Fonts}->{edge}, ); - # second half of the two-part arrow: - $graph->add_edge( $midpoint_name => $to_node, + $graph->add_edge( $midpoint_name => $to_node, # second half of the two-part arrow color => $config->{data}, ); if($funnel_dataflow_rule_id) { - - # semaphore inter-rule link: - $graph->add_edge( $midpoint_name => _midpoint_name($funnel_dataflow_rule_id), + $graph->add_edge( $midpoint_name => _midpoint_name($funnel_dataflow_rule_id), # semaphore inter-rule link color => $config->{semablock}, fontname => $self->config()->{Fonts}->{edge}, style => 'dashed', @@ -297,33 +367,23 @@ sub _dataflow_rules { dir => 'both', arrowtail => 'crow', ); - - my $funnel_analysis_id = $dfr_flows_into{$funnel_dataflow_rule_id}; - - # invisible links to enforce semaphore-generated order: - foreach my $dependant ( keys %{ saturated_set(\%aid2aid, {$to_node =>1}, $to_node, $from_analysis_id) } ) { - $graph->add_edge( $dependant => $funnel_analysis_id, - color => 'black', - dir => 'none', - style => 'invis', # toggle visibility by changing 'invis' to 'dashed' - ); - } } } else { # one-part arrow: - $graph->add_edge($from_analysis_id => $to_node, + $graph->add_edge( $from_node => $to_node, color => $config->{data}, label => '#'.$branch_code, fontname => $self->config()->{Fonts}->{edge}, ); - } - } + } # /if($needs_a_midpoint{$rule_id}) + } # /foreach my $rule (@$all_dataflow_rules) + } + sub _add_table_node { my ($self, $table) = @_; - $self->graph()->add_node( - $table, + $self->graph()->add_node( $table, label => $table.'\n', fontname => 'serif', shape => 'tab', diff --git a/modules/Bio/EnsEMBL/Hive/Utils/GraphViz.pm b/modules/Bio/EnsEMBL/Hive/Utils/GraphViz.pm new file mode 100644 index 0000000000000000000000000000000000000000..e1212c3d8eb3c29704997555b9972511e07a9588 --- /dev/null +++ b/modules/Bio/EnsEMBL/Hive/Utils/GraphViz.pm @@ -0,0 +1,103 @@ + +# an extension of GraphViz that supports nested clusters + +package Bio::EnsEMBL::Hive::Utils::GraphViz; + +use strict; +use warnings; +use base ('GraphViz'); + +#my ($colorscheme, $coloroffset) = ('ylorbr9', 1); +#my ($colorscheme, $coloroffset) = ('purples7', 1); +#my ($colorscheme, $coloroffset) = ('orrd8', 1); +#my ($colorscheme, $coloroffset) = ('bugn7', 0); +my ($colorscheme, $coloroffset) = ('blues9', 1); + + +sub subgraphs { + my $self = shift @_; + if(@_) { + $self->{_subgraphs} = shift @_; + } + return $self->{_subgraphs}; +} + + +sub get_top_clusters { + my $self = shift @_; + + my $subgraphs = $self->subgraphs(); + + my %set = (); + foreach my $top_cluster (values %$subgraphs) { + if( $top_cluster and !$subgraphs->{ $top_cluster } ) { # if it's a valid node not mentioned in the keys, it is a top cluster + $set{$top_cluster}++; + } + } + return [ keys %set ]; +} + + +sub get_nodes_that_point_at { + my ($self, $node) = @_; + + my $subgraphs = $self->subgraphs(); + my %set = (); + while( my ($key,$value) = each %$subgraphs) { + if($value and ($value eq $node)) { + $set{$key}++; + } + } + return [ keys %set ]; +} + + +sub generate_subgraph { + my ($self, $cluster_name, $depth) = @_; + + my $subgraphs = $self->subgraphs(); + + my $prefix = "\t" x $depth; + + my $text = ''; + + $text .= $prefix . "subgraph cluster_${cluster_name} {\n"; +# $text .= $prefix . "\tlabel=\"$cluster_name\";\n"; + $text .= $prefix . "\tcolorscheme=$colorscheme;\n"; + $text .= $prefix . "\tstyle=filled;\n"; + $text .= $prefix . "\tcolor=".($depth+$coloroffset).";\n"; + + foreach my $node_name ( @{ $self->get_nodes_that_point_at( $cluster_name ) } ) { + + $text .= $prefix . "\t${node_name};\n"; + if( @{ $self->get_nodes_that_point_at( $node_name ) } ) { + $text .= $self->generate_subgraph( $node_name, $depth+1 ); + } + } + $text .= $prefix . "}\n"; + + return $text; +} + + +sub _as_debug { + my $self = shift @_; + + my $text = $self->SUPER::_as_debug; + + my $subgraphs = $self->subgraphs(); + + $text=~s/^}$//m; + + foreach my $node_name ( @{ $self->get_top_clusters() } ) { + $text .= $self->generate_subgraph( $node_name, 1); + } + $text .= "}\n"; + +# print $text; + + return $text; +} + +1; + diff --git a/scripts/generate_graph.pl b/scripts/generate_graph.pl index e10764689d14136d93affed33ef8e07f48a38cc8..621fac7fd716f56aebc1b0fee1f82167f5bee722 100644 --- a/scripts/generate_graph.pl +++ b/scripts/generate_graph.pl @@ -15,14 +15,13 @@ use Bio::EnsEMBL::Hive::Utils::Graph::Config; my $self = bless({}, __PACKAGE__); -$self->run(); +$self->main(); -sub run { +sub main { my ($self) = @_; $self->_options(); $self->_process_options(); $self->_write_graph(); - return; } sub _options { @@ -40,11 +39,13 @@ sub _options { 'f|format=s' => \$self->{format}, 'o|output=s' => \$self->{output}, 'config' => \$self->{config}, + + 's|stretch!' => \$self->{stretch}, + 'b|box!' => \$self->{box}, 'h|help' => \$self->{help}, 'm|man' => \$self->{man}, ); - return; } sub _process_options { @@ -106,8 +107,6 @@ sub _process_options { my $hash = do $self->{config}; $self->{config_hash} = $hash; } - - return; } sub _write_graph { @@ -119,7 +118,7 @@ sub _write_graph { } my $graph = Bio::EnsEMBL::Hive::Utils::Graph->new(-DBA => $self->{dba}, -CONFIG => $config); - my $graphviz = $graph->build(); + my $graphviz = $graph->build( $self->{box}, $self->{stretch} ); my $call = q{as_}.$self->{format}; @@ -132,8 +131,6 @@ sub _write_graph { -verbose => 1 }); } - - return; } __END__ @@ -235,7 +232,7 @@ $Author: lg4 $ =head1 VERSION -$Revision: 1.5 $ +$Revision: 1.6 $ =head1 REQUIREMENTS