Commit c245f10b authored by Leo Gordon's avatar Leo Gordon
Browse files

start using Hive.pm for global storage of collections; Storable first checks...

start using Hive.pm for global storage of collections; Storable first checks these collections, then resorts to fetching
parent 0f7bd948
......@@ -57,8 +57,53 @@
=cut
package Bio::EnsEMBL::Hive;
use strict;
use warnings;
use Bio::EnsEMBL::Hive::Utils::Collection;
our %hash_of_collections;
sub collection {
my $key = pop @_;
return $hash_of_collections{$key} ||= Bio::EnsEMBL::Hive::Utils::Collection->new();
}
sub load_collections_from_dba {
my $hive_dba = pop @_;
foreach my $AdaptorType ('ResourceClass', 'ResourceDescription', 'Analysis', 'AnalysisStats', 'AnalysisCtrlRule', 'DataflowRule') {
my $adaptor = $hive_dba->get_adaptor( $AdaptorType );
$hash_of_collections{$AdaptorType} = Bio::EnsEMBL::Hive::Utils::Collection->new( $adaptor->fetch_all );
}
}
sub save_collections_to_dba {
my $hive_dba = pop @_;
foreach my $AdaptorType ('ResourceClass', 'ResourceDescription', 'Analysis', 'AnalysisStats', 'AnalysisCtrlRule', 'DataflowRule') {
my $adaptor = $hive_dba->get_adaptor( $AdaptorType );
foreach my $storable_object ( Bio::EnsEMBL::Hive->collection( $AdaptorType )->list ) {
$adaptor->store_or_update_one( $storable_object );
# warn "Stored/updated ".$storable_object->toString()."\n";
}
}
my $job_adaptor = $hive_dba->get_AnalysisJobAdaptor;
foreach my $analysis ( Bio::EnsEMBL::Hive->collection('Analysis')->list ) {
if(my $our_jobs = $analysis->jobs_collection ) {
$job_adaptor->store_jobs_and_adjust_counters( $our_jobs );
foreach my $job (@$our_jobs) {
# warn "Stored ".$job->toString()."\n";
}
}
}
}
1;
......@@ -60,6 +60,7 @@ package Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
use strict;
use warnings;
use Bio::EnsEMBL::Hive;
use Bio::EnsEMBL::Hive::Utils::Collection;
use Bio::EnsEMBL::Hive::Utils::URL;
use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
......@@ -443,15 +444,7 @@ sub run {
my $hive_dba = Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->new( -url => $pipeline_url, -no_sql_schema_version_check => 1 );
my $resource_class_adaptor = $hive_dba->get_ResourceClassAdaptor;
my $all_rc_coll = Bio::EnsEMBL::Hive::Utils::Collection->new( $resource_class_adaptor->fetch_all ); # load all previously stored RCs
my $resource_description_adaptor = $hive_dba->get_ResourceDescriptionAdaptor;
my $all_rd_coll = Bio::EnsEMBL::Hive::Utils::Collection->new( $resource_description_adaptor->fetch_all ); # load all previously stored RDs
foreach my $resource_description ( $all_rd_coll->list ) { # pre-mating of RC and RD objects
$resource_description->resource_class( $all_rc_coll->find_one_by('dbID', $resource_description->resource_class_id ) );
}
Bio::EnsEMBL::Hive->load_collections_from_dba( $hive_dba );
unless($job_topup) {
my $meta_adaptor = $hive_dba->get_MetaAdaptor; # the new adaptor for 'hive_meta' table
......@@ -485,14 +478,14 @@ sub run {
my $resource_class;
if( $resource_class = $all_rc_coll->find_one_by('name', $rc_name) ) {
if( $resource_class = Bio::EnsEMBL::Hive->collection('ResourceClass')->find_one_by('name', $rc_name) ) {
warn "Found an already existing resource_class '$rc_name'.\n";
} else {
warn "Creating a new resource_class '$rc_name'.\n";
$resource_class = Bio::EnsEMBL::Hive::ResourceClass->new(
'name' => $rc_name,
);
push @{ $all_rc_coll->listref }, $resource_class;
Bio::EnsEMBL::Hive->collection('ResourceClass')->add( $resource_class );
}
while( my($meadow_type, $resource_param_list) = each %{ $resource_classes_hash->{$rc_name} } ) {
......@@ -500,7 +493,7 @@ sub run {
my $resource_description;
if( $resource_description = $all_rd_coll->find_one_by('resource_class', $resource_class, 'meadow_type', $meadow_type) ) {
if( $resource_description = Bio::EnsEMBL::Hive->collection('ResourceDescription')->find_one_by('resource_class', $resource_class, 'meadow_type', $meadow_type) ) {
warn "Attempting to redefine an existing description for '$rc_name/$meadow_type' resource class\n";
$resource_description->submission_cmd_args( $resource_param_list->[0] );
$resource_description->worker_cmd_args( $resource_param_list->[1] );
......@@ -512,25 +505,22 @@ sub run {
'submission_cmd_args' => $resource_param_list->[0],
'worker_cmd_args' => $resource_param_list->[1],
);
push @{ $all_rd_coll->listref }, $resource_description;
Bio::EnsEMBL::Hive->collection('ResourceDescription')->add( $resource_description );
}
}
}
unless(my $default_rc = $all_rc_coll->find_one_by('name', 'default') ) {
unless(my $default_rc = Bio::EnsEMBL::Hive->collection('ResourceClass')->find_one_by('name', 'default') ) {
warn "\tNB:'default' resource class is not in the database (did you forget to inherit from SUPER::resource_classes ?) - creating it for you\n";
$default_rc = Bio::EnsEMBL::Hive::ResourceClass->new(
'name' => 'default',
);
push @{ $all_rc_coll->listref }, $default_rc;
Bio::EnsEMBL::Hive->collection('ResourceClass')->add( $default_rc );
}
warn "Done.\n\n";
}
my $valley = Bio::EnsEMBL::Hive::Valley->new( {}, 'LOCAL' );
my $analysis_adaptor = $hive_dba->get_AnalysisAdaptor;
my $all_analyses_coll = Bio::EnsEMBL::Hive::Utils::Collection->new( $analysis_adaptor->fetch_all ); # load all previously stored analyses
my %seen_logic_name = ();
foreach my $aha (@{$self->pipeline_analyses}) {
......@@ -551,7 +541,7 @@ sub run {
die "(-rc_id => $rc_id) syntax is deprecated, please use (-rc_name => 'your_resource_class_name')";
}
my $analysis = $all_analyses_coll->find_one_by('logic_name', $logic_name); # the analysis with this logic_name may have already been stored in the db
my $analysis = Bio::EnsEMBL::Hive->collection('Analysis')->find_one_by('logic_name', $logic_name); # the analysis with this logic_name may have already been stored in the db
if( $analysis ) {
if($analysis_topup) {
......@@ -568,7 +558,7 @@ sub run {
warn "Creating analysis '$logic_name'.\n";
$rc_name ||= 'default';
my $resource_class = $all_rc_coll->find_one_by('name', $rc_name)
my $resource_class = Bio::EnsEMBL::Hive->collection('ResourceClass')->find_one_by('name', $rc_name)
or die "Could not find local resource with name '$rc_name', please check that resource_classes() method of your PipeConfig either contains or inherits it from the parent class";
if ($meadow_type and not exists $valley->available_meadow_hash()->{$meadow_type}) {
......@@ -610,9 +600,8 @@ sub run {
'sync_lock' => 0,
);
$analysis->stats( $stats ); # link stats to the analysis
push @{ $all_analyses_coll->listref }, $analysis;
Bio::EnsEMBL::Hive->collection('Analysis')->add( $analysis );
Bio::EnsEMBL::Hive->collection('AnalysisStats')->add( $stats );
}
# now create the corresponding jobs (if there are any):
......@@ -633,7 +622,7 @@ sub run {
my ($logic_name, $wait_for, $flow_into)
= @{$aha}{qw(-logic_name -wait_for -flow_into)}; # slicing a hash reference
my $analysis = $all_analyses_coll->find_one_by('logic_name', $logic_name);
my $analysis = Bio::EnsEMBL::Hive->collection('Analysis')->find_one_by('logic_name', $logic_name);
$wait_for ||= [];
$wait_for = [ $wait_for ] unless(ref($wait_for) eq 'ARRAY'); # force scalar into an arrayref
......@@ -641,14 +630,14 @@ sub run {
# create control rules:
foreach my $condition_url (@$wait_for) {
unless ($condition_url =~ m{^\w*://}) {
my $condition_analysis = $all_analyses_coll->find_one_by('logic_name', $condition_url)
my $condition_analysis = Bio::EnsEMBL::Hive->collection('Analysis')->find_one_by('logic_name', $condition_url)
or die "Could not find a local analysis '$condition_url' to create a control rule (in '".($analysis->logic_name)."')\n";
}
my $c_rule = Bio::EnsEMBL::Hive::AnalysisCtrlRule->new(
'condition_analysis_url' => $condition_url,
'ctrled_analysis' => $analysis,
);
push @{ $analysis->control_rules_collection }, $c_rule;
Bio::EnsEMBL::Hive->collection('AnalysisCtrlRule')->add( $c_rule );
}
$flow_into ||= {};
......@@ -690,7 +679,7 @@ sub run {
while(my ($heir_url, $input_id_template_list) = each %$heirs) {
unless ($heir_url =~ m{^\w*://}) {
my $heir_analysis = $all_analyses_coll->find_one_by('logic_name', $heir_url)
my $heir_analysis = Bio::EnsEMBL::Hive->collection('Analysis')->find_one_by('logic_name', $heir_url)
or die "Could not find a local analysis named '$heir_url' (dataflow from analysis '".($analysis->logic_name)."')\n";
}
......@@ -705,7 +694,7 @@ sub run {
'funnel_dataflow_rule' => $funnel_dataflow_rule,
'input_id_template' => $input_id_template,
);
push @{ $analysis->dataflow_rules_collection }, $df_rule;
Bio::EnsEMBL::Hive->collection('DataflowRule')->add( $df_rule );
if($group_role eq 'funnel') {
if($group_tag_to_funnel_dataflow_rule{$group_tag}) {
......@@ -720,40 +709,7 @@ sub run {
} # /for all pipeline_analyses
} # /unless($job_topup)
# now storing all this stuff:
my $analysis_stats_adaptor = $hive_dba->get_AnalysisStatsAdaptor;
my $job_adaptor = $hive_dba->get_AnalysisJobAdaptor;
my $ctrl_rule_adaptor = $hive_dba->get_AnalysisCtrlRuleAdaptor;
my $dataflow_rule_adaptor = $hive_dba->get_DataflowRuleAdaptor;
foreach my $resource_class ( $all_rc_coll->list ) {
$resource_class_adaptor->store_or_update_one( $resource_class );
foreach my $resource_description (@{ $all_rd_coll->find_all_by('resource_class', $resource_class) }) {
$resource_description_adaptor->store_or_update_one( $resource_description );
}
}
foreach my $analysis ( $all_analyses_coll->list ) {
$analysis_adaptor->store_or_update_one( $analysis );
$analysis_stats_adaptor->store_or_update_one( $analysis->stats ); # should be taking the value cached previously
if(my $our_jobs = $analysis->jobs_collection ) {
$job_adaptor->store_jobs_and_adjust_counters( $our_jobs );
}
}
foreach my $analysis ( $all_analyses_coll->list ) {
foreach my $c_rule (@{ $analysis->control_rules_collection }) {
$ctrl_rule_adaptor->store_or_update_one( $c_rule );
warn $c_rule->toString."\n";
}
foreach my $df_rule (@{ $analysis->dataflow_rules_collection }) {
$dataflow_rule_adaptor->store_or_update_one( $df_rule );
warn $df_rule->toString."\n";
}
}
Bio::EnsEMBL::Hive->save_collections_to_dba( $hive_dba );
print "\n\n# --------------------[Useful commands]--------------------------\n";
print "\n";
......
......@@ -45,6 +45,7 @@ use strict;
use warnings;
use Scalar::Util qw(weaken);
use Bio::EnsEMBL::Hive;
use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
......@@ -135,13 +136,14 @@ sub AUTOLOAD {
if(@_) {
$self->{$foo_id_method_name} = shift @_;
if( $self->{$foo_obj_method_name} ) {
warn "setting $foo_id_method_name in an object that had $foo_obj_method_name defined";
# warn "setting $foo_id_method_name in an object that had $foo_obj_method_name defined";
$self->{$foo_obj_method_name} = undef;
}
# attempt to lazy-load:
} elsif( !$self->{$foo_id_method_name} and my $foo_object=$self->{$foo_obj_method_name}) {
$self->{$foo_id_method_name} = $foo_object->dbID;
# warn "Lazy-loaded dbID (".$self->{$foo_id_method_name}.") from $AdaptorType object\n";
}
return $self->{$foo_id_method_name};
......@@ -153,10 +155,13 @@ sub AUTOLOAD {
# attempt to lazy-load:
} elsif( !$self->{$foo_obj_method_name} and my $foo_object_id = $self->{$foo_id_method_name}) {
if(my $adaptor = $self->adaptor) {
if( $self->{$foo_obj_method_name} = Bio::EnsEMBL::Hive->collection($AdaptorType)->find_one_by('dbID', $foo_object_id) ) { # careful: $AdaptorType may not be unique (aliases)
# warn "Lazy-loading object from $AdaptorType collection\n";
} elsif(my $adaptor = $self->adaptor) {
# warn "Lazy-loading object from $AdaptorType adaptor\n";
$self->{$foo_obj_method_name} = $adaptor->db->get_adaptor( $AdaptorType )->fetch_by_dbID( $foo_object_id );
} else {
warn "Cannot lazy-load $foo_obj_method_name because the ".ref($self)." is not attached to an adaptor";
# warn "Cannot lazy-load $foo_obj_method_name because the ".ref($self)." is not attached to an adaptor";
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment