Commit 7e45b86c authored by Leo Gordon's avatar Leo Gordon
Browse files

code for showing semaphores as nested boxes

parent 9ddf14ed
package Bio::EnsEMBL::Hive::Utils::Graph;
=pod
=head1 NAME
Bio::EnsEMBL::Hive::Utils::Graph
......@@ -30,13 +28,13 @@ $Author: lg4 $
=head1 VERSION
$Revision: 1.10 $
$Revision: 1.11 $
=cut
use strict;
use warnings;
use GraphViz;
use Bio::EnsEMBL::Hive::Utils::GraphViz;
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Exception qw(throw);
......@@ -44,7 +42,6 @@ use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref);
use Bio::EnsEMBL::Hive::Utils::Graph::Config;
=pod
=head2 new()
......@@ -68,7 +65,6 @@ sub new {
return $self;
}
=pod
=head2 graph()
......@@ -82,12 +78,11 @@ sub new {
sub graph {
my ($self) = @_;
if(! exists $self->{graph}) {
$self->{graph} = GraphViz->new( name => 'AnalysisWorkflow', ratio => 'compress' );
$self->{graph} = Bio::EnsEMBL::Hive::Utils::GraphViz->new( name => 'AnalysisWorkflow', ratio => 'compress' );
}
return $self->{graph};
}
=pod
=head2 dba()
......@@ -107,7 +102,6 @@ sub dba {
return $self->{dba};
}
=pod
=head2 config()
......@@ -130,7 +124,19 @@ sub config {
return $self->{config};
}
=pod
sub _analysis_node_name {
my $analysis_id = shift @_;
return 'analysis_' . $analysis_id;
}
sub _midpoint_name {
my $rule_id = shift @_;
return 'dfr_'.$rule_id.'_mp';
}
=head2 build()
......@@ -142,24 +148,111 @@ sub config {
=cut
sub build {
my ($self) = @_;
$self->_add_hive_details();
my $analyses = $self->dba()->get_AnalysisAdaptor()->fetch_all();
foreach my $a (@{$analyses}) {
$self->_add_analysis_node($a);
}
$self->_control_rules();
$self->_dataflow_rules();
return $self->graph();
my ($self, $box, $stretch) = @_;
my $all_analyses = $self->dba()->get_AnalysisAdaptor()->fetch_all();
my $all_ctrl_rules = $self->dba()->get_AnalysisCtrlRuleAdaptor()->fetch_all();
my $all_dataflow_rules = $self->dba()->get_DataflowRuleAdaptor()->fetch_all();
my %inflow_count = (); # used to detect sources (nodes with zero inflow)
my %outflow_rules = (); # maps from anlaysis_node_name to a list of all dataflow rules that flow out of it
my %dfr_flows_into= (); # maps from dfr_id to target analysis_node_name
foreach my $rule ( @$all_dataflow_rules ) {
if(my $to_id = $rule->to_analysis->can('dbID') && $rule->to_analysis->dbID()) {
my $to_node_name = _analysis_node_name( $to_id );
$inflow_count{$to_node_name}++;
$dfr_flows_into{$rule->dbID()} = $to_node_name;
}
push @{$outflow_rules{ _analysis_node_name($rule->from_analysis_id()) }}, $rule;
}
my %subgraph_allocation = ();
# NB: this is a very approximate algorithm with rough edges!
# It will not find all start nodes in cyclic components!
foreach my $analysis_id ( map { $_->dbID } @$all_analyses ) {
my $analysis_node_name = _analysis_node_name( $analysis_id );
unless($inflow_count{$analysis_node_name}) {
_allocate_to_subgraph(\%outflow_rules, \%dfr_flows_into, $analysis_node_name, \%subgraph_allocation ); # run the recursion in each component that has a non-cyclic start
}
}
$self->_add_hive_details();
foreach my $a (@$all_analyses) {
$self->_add_analysis_node($a);
}
$self->_control_rules( $all_ctrl_rules );
$self->_dataflow_rules( $all_dataflow_rules );
if($stretch) {
while( my($from, $to) = each %subgraph_allocation) {
if($to) {
$self->graph->add_edge( $from => $to,
color => 'black',
style => 'invis', # toggle visibility by changing 'invis' to 'dashed'
);
}
}
}
if($box) {
$self->graph->subgraphs( \%subgraph_allocation );
}
return $self->graph();
}
sub _allocate_to_subgraph {
my ( $outflow_rules, $dfr_flows_into, $parent_analysis_node_name, $subgraph_allocation ) = @_;
my $parent_allocation = $subgraph_allocation->{ $parent_analysis_node_name }; # for some analyses it will be undef
foreach my $rule ( @{ $outflow_rules->{$parent_analysis_node_name} } ) {
my $to_analysis = $rule->to_analysis();
next unless( $to_analysis->can('dbID')); # skip dataflow-into-tables
my $this_analysis_node_name = _analysis_node_name( $rule->to_analysis->dbID() );
my $funnel_dataflow_rule_id = $rule->funnel_dataflow_rule_id();
my $proposed_allocation = $funnel_dataflow_rule_id # depends on whether we start a new semaphore
? $dfr_flows_into->{$funnel_dataflow_rule_id} # if we do, report to the new funnel
: $parent_allocation; # it we don't, inherit the parent's funnel
if($funnel_dataflow_rule_id) {
my $fan_midpoint_name = _midpoint_name( $rule->dbID() );
$subgraph_allocation->{ $fan_midpoint_name } = $proposed_allocation;
my $funnel_midpoint_name = _midpoint_name( $funnel_dataflow_rule_id );
$subgraph_allocation->{ $funnel_midpoint_name } = $parent_allocation; # draw the funnel's midpoint outside of the box
}
if( exists $subgraph_allocation->{ $this_analysis_node_name } ) { # we allocate on first-come basis at the moment
my $known_allocation = $subgraph_allocation->{ $this_analysis_node_name } || '';
$proposed_allocation ||= '';
if( $known_allocation eq $proposed_allocation) {
# warn "analysis '$this_analysis_node_name' has already been allocated to the same '$known_allocation' by another branch";
} else {
# warn "analysis '$this_analysis_node_name' has already been allocated to '$known_allocation' however this branch would allocate it to '$proposed_allocation'";
}
} else {
# warn "allocating analysis '$this_analysis_node_name' to '$proposed_allocation'";
$subgraph_allocation->{ $this_analysis_node_name } = $proposed_allocation;
_allocate_to_subgraph( $outflow_rules, $dfr_flows_into, $this_analysis_node_name, $subgraph_allocation );
}
}
}
sub _add_hive_details {
my ($self) = @_;
if($self->config()->{DisplayDetails}) {
my $dbc = $self->dba()->dbc();
my $label = sprintf('%s@%s', $dbc->dbname, $dbc->host || '-');
$self->graph()->add_node(
'details',
$self->graph()->add_node( 'details',
label => $label,
fontname => $self->config()->{Fonts}->{node},
shape => 'plaintext'
......@@ -179,8 +272,7 @@ sub _add_analysis_node {
my $config = $self->config()->{Colours}->{Status};
my $colour = $config->{$a->stats()->status()} || $config->{OTHER};
$graph->add_node(
$a->dbID(),
$graph->add_node( _analysis_node_name( $a->dbID() ),
label => $a->logic_name().' ('.$a->dbID().')\n'.$a->stats()->done_job_count().'+'.$a->stats()->remaining_job_count().'='.$a->stats()->total_job_count(),
shape => $shape,
style => 'filled',
......@@ -191,16 +283,15 @@ sub _add_analysis_node {
sub _control_rules {
my ($self) = @_;
my ($self, $all_ctrl_rules) = @_;
my $config = $self->config()->{Colours}->{Flows};
my $graph = $self->graph();
my $ctrl_rules = $self->dba()->get_AnalysisCtrlRuleAdaptor()->fetch_all();
#The control rules are always from and to an analysis so no need to search for odd cases here
foreach my $rule (@{$ctrl_rules}) {
my ($from, $to) = ($rule->condition_analysis()->dbID(), $rule->ctrled_analysis()->dbID());
$graph->add_edge($from => $to,
foreach my $rule ( @$all_ctrl_rules ) {
my ($from, $to) = ( _analysis_node_name( $rule->condition_analysis()->dbID() ), _analysis_node_name( $rule->ctrled_analysis()->dbID() ) );
$graph->add_edge( $from => $to,
color => $config->{control},
fontname => $self->config()->{Fonts}->{edge},
arrowhead => 'tee',
......@@ -208,37 +299,20 @@ sub _control_rules {
}
}
sub _midpoint_name {
my $rule_id = shift @_;
return 'dfr_'.$rule_id.'_mp';
}
sub saturated_set {
my ($a2a, $set, $from, $except) = @_;
foreach my $to ( keys %{$a2a->{$from}} ) {
if( defined($to) and ($to != $except) and !$set->{ $to }++ ) {
saturated_set($a2a, $set, $to, $except);
}
}
return $set;
}
sub _dataflow_rules {
my ($self) = @_;
my ($self, $all_dataflow_rules) = @_;
my $graph = $self->graph();
my $config = $self->config()->{Colours}->{Flows};
my $dataflow_rules = $self->dba()->get_DataflowRuleAdaptor()->fetch_all();
my %needs_a_midpoint = ();
my %dfr_flows_into = ();
my %aid2aid = ();
foreach my $rule (@{$dataflow_rules}) {
my %aid2aid_nonsem = (); # simply a directed graph between numerical analysis_ids, except for semaphored rules
foreach my $rule ( @$all_dataflow_rules ) {
if(my $to_id = $rule->to_analysis->can('dbID') && $rule->to_analysis->dbID()) {
$dfr_flows_into{$rule->dbID()} = $to_id;
$aid2aid{$rule->from_analysis_id()}{$to_id}++;
unless( $rule->funnel_dataflow_rule_id ) {
$aid2aid_nonsem{$rule->from_analysis_id()}{$to_id}++;
}
}
if(my $funnel_dataflow_rule_id = $rule->funnel_dataflow_rule_id()) {
$needs_a_midpoint{$rule->dbID()}++;
......@@ -246,15 +320,16 @@ sub _dataflow_rules {
}
}
foreach my $rule (@{$dataflow_rules}) {
foreach my $rule ( @$all_dataflow_rules ) {
my ($rule_id, $from_analysis_id, $branch_code, $funnel_dataflow_rule_id, $to) =
($rule->dbID(), $rule->from_analysis_id(), $rule->branch_code(), $rule->funnel_dataflow_rule_id(), $rule->to_analysis());
my $to_node;
my ($from_node, $to_id, $to_node) = ( _analysis_node_name($from_analysis_id) );
# Different treatment for analyses and tables:
if(check_ref($to, 'Bio::EnsEMBL::Analysis')) {
$to_node = $to->dbID();
$to_id = $to->dbID();
$to_node = _analysis_node_name($to_id);
} elsif(check_ref($to, 'Bio::EnsEMBL::Hive::NakedTable')) {
$to_node = $to->table_name();
$self->_add_table_node($to_node);
......@@ -266,8 +341,7 @@ sub _dataflow_rules {
if($needs_a_midpoint{$rule_id}) {
my $midpoint_name = _midpoint_name($rule_id);
# midpoint itself:
$graph->add_node( $midpoint_name,
$graph->add_node( $midpoint_name, # midpoint itself
color => $config->{data},
label => '',
shape => 'point',
......@@ -275,21 +349,17 @@ sub _dataflow_rules {
width => 0.01,
height => 0.01,
);
# first half of the two-part arrow:
$graph->add_edge( $from_analysis_id => $midpoint_name,
$graph->add_edge( $from_node => $midpoint_name, # first half of the two-part arrow
color => $config->{data},
arrowhead => 'none',
label => '#'.$branch_code,
fontname => $self->config()->{Fonts}->{edge},
);
# second half of the two-part arrow:
$graph->add_edge( $midpoint_name => $to_node,
$graph->add_edge( $midpoint_name => $to_node, # second half of the two-part arrow
color => $config->{data},
);
if($funnel_dataflow_rule_id) {
# semaphore inter-rule link:
$graph->add_edge( $midpoint_name => _midpoint_name($funnel_dataflow_rule_id),
$graph->add_edge( $midpoint_name => _midpoint_name($funnel_dataflow_rule_id), # semaphore inter-rule link
color => $config->{semablock},
fontname => $self->config()->{Fonts}->{edge},
style => 'dashed',
......@@ -297,33 +367,23 @@ sub _dataflow_rules {
dir => 'both',
arrowtail => 'crow',
);
my $funnel_analysis_id = $dfr_flows_into{$funnel_dataflow_rule_id};
# invisible links to enforce semaphore-generated order:
foreach my $dependant ( keys %{ saturated_set(\%aid2aid, {$to_node =>1}, $to_node, $from_analysis_id) } ) {
$graph->add_edge( $dependant => $funnel_analysis_id,
color => 'black',
dir => 'none',
style => 'invis', # toggle visibility by changing 'invis' to 'dashed'
);
}
}
} else {
# one-part arrow:
$graph->add_edge($from_analysis_id => $to_node,
$graph->add_edge( $from_node => $to_node,
color => $config->{data},
label => '#'.$branch_code,
fontname => $self->config()->{Fonts}->{edge},
);
}
}
} # /if($needs_a_midpoint{$rule_id})
} # /foreach my $rule (@$all_dataflow_rules)
}
sub _add_table_node {
my ($self, $table) = @_;
$self->graph()->add_node(
$table,
$self->graph()->add_node( $table,
label => $table.'\n',
fontname => 'serif',
shape => 'tab',
......
# an extension of GraphViz that supports nested clusters
package Bio::EnsEMBL::Hive::Utils::GraphViz;
use strict;
use warnings;
use base ('GraphViz');
#my ($colorscheme, $coloroffset) = ('ylorbr9', 1);
#my ($colorscheme, $coloroffset) = ('purples7', 1);
#my ($colorscheme, $coloroffset) = ('orrd8', 1);
#my ($colorscheme, $coloroffset) = ('bugn7', 0);
my ($colorscheme, $coloroffset) = ('blues9', 1);
sub subgraphs {
my $self = shift @_;
if(@_) {
$self->{_subgraphs} = shift @_;
}
return $self->{_subgraphs};
}
sub get_top_clusters {
my $self = shift @_;
my $subgraphs = $self->subgraphs();
my %set = ();
foreach my $top_cluster (values %$subgraphs) {
if( $top_cluster and !$subgraphs->{ $top_cluster } ) { # if it's a valid node not mentioned in the keys, it is a top cluster
$set{$top_cluster}++;
}
}
return [ keys %set ];
}
sub get_nodes_that_point_at {
my ($self, $node) = @_;
my $subgraphs = $self->subgraphs();
my %set = ();
while( my ($key,$value) = each %$subgraphs) {
if($value and ($value eq $node)) {
$set{$key}++;
}
}
return [ keys %set ];
}
sub generate_subgraph {
my ($self, $cluster_name, $depth) = @_;
my $subgraphs = $self->subgraphs();
my $prefix = "\t" x $depth;
my $text = '';
$text .= $prefix . "subgraph cluster_${cluster_name} {\n";
# $text .= $prefix . "\tlabel=\"$cluster_name\";\n";
$text .= $prefix . "\tcolorscheme=$colorscheme;\n";
$text .= $prefix . "\tstyle=filled;\n";
$text .= $prefix . "\tcolor=".($depth+$coloroffset).";\n";
foreach my $node_name ( @{ $self->get_nodes_that_point_at( $cluster_name ) } ) {
$text .= $prefix . "\t${node_name};\n";
if( @{ $self->get_nodes_that_point_at( $node_name ) } ) {
$text .= $self->generate_subgraph( $node_name, $depth+1 );
}
}
$text .= $prefix . "}\n";
return $text;
}
sub _as_debug {
my $self = shift @_;
my $text = $self->SUPER::_as_debug;
my $subgraphs = $self->subgraphs();
$text=~s/^}$//m;
foreach my $node_name ( @{ $self->get_top_clusters() } ) {
$text .= $self->generate_subgraph( $node_name, 1);
}
$text .= "}\n";
# print $text;
return $text;
}
1;
......@@ -15,14 +15,13 @@ use Bio::EnsEMBL::Hive::Utils::Graph::Config;
my $self = bless({}, __PACKAGE__);
$self->run();
$self->main();
sub run {
sub main {
my ($self) = @_;
$self->_options();
$self->_process_options();
$self->_write_graph();
return;
}
sub _options {
......@@ -40,11 +39,13 @@ sub _options {
'f|format=s' => \$self->{format},
'o|output=s' => \$self->{output},
'config' => \$self->{config},
's|stretch!' => \$self->{stretch},
'b|box!' => \$self->{box},
'h|help' => \$self->{help},
'm|man' => \$self->{man},
);
return;
}
sub _process_options {
......@@ -106,8 +107,6 @@ sub _process_options {
my $hash = do $self->{config};
$self->{config_hash} = $hash;
}
return;
}
sub _write_graph {
......@@ -119,7 +118,7 @@ sub _write_graph {
}
my $graph = Bio::EnsEMBL::Hive::Utils::Graph->new(-DBA => $self->{dba}, -CONFIG => $config);
my $graphviz = $graph->build();
my $graphviz = $graph->build( $self->{box}, $self->{stretch} );
my $call = q{as_}.$self->{format};
......@@ -132,8 +131,6 @@ sub _write_graph {
-verbose => 1
});
}
return;
}
__END__
......@@ -235,7 +232,7 @@ $Author: lg4 $
=head1 VERSION
$Revision: 1.5 $
$Revision: 1.6 $
=head1 REQUIREMENTS
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment