Commit f1c83bdb authored by Matthieu Muffato's avatar Matthieu Muffato
Browse files

Initial guiHive version for eHive database v72

This is a mere copy of v62
parent 372576ce
......@@ -231,16 +231,19 @@ func main() {
http.HandleFunc("/versions/", unknown)
http.Handle("/versions/56/", http.FileServer(http.Dir(relPath)))
http.Handle("/versions/62/", http.FileServer(http.Dir(relPath)))
http.Handle("/versions/72/", http.FileServer(http.Dir(relPath)))
http.Handle("/styles/", http.FileServer(http.Dir(relPath)))
http.Handle("/javascript/", http.FileServer(http.Dir(relPath)))
http.Handle("/versions/56/javascript/", http.FileServer(http.Dir(relPath)))
http.Handle("/versions/62/javascript/", http.FileServer(http.Dir(relPath)))
http.Handle("/versions/72/javascript/", http.FileServer(http.Dir(relPath)))
http.Handle("/images/", http.FileServer(http.Dir(relPath)))
http.HandleFunc("/scripts/", scriptHandler)
http.HandleFunc("/versions/56/scripts/", scriptHandler)
http.HandleFunc("/versions/62/scripts/", scriptHandler)
http.HandleFunc("/versions/72/scripts/", scriptHandler)
debug("Listening to port: %s", port)
err := http.ListenAndServe(":"+port, nil)
checkError("ListenAndServe ", err)
......
language: "perl"
perl:
- "5.10"
- "5.14"
env:
- COVERALLS=true
- COVERALLS=false
before_install:
- sudo apt-get -y update
- sudo apt-get -y install unzip python3
- wget https://github.com/bioperl/bioperl-live/archive/bioperl-release-1-2-3.zip
- unzip bioperl-release-1-2-3.zip
- sudo sed -i '1s/usr\/bin\/perl -w/usr\/bin\/env perl/' `which psql`
- ln -s /usr/share/perl5/PgCommon.pm modules/
install:
- cpanm -v --installdeps --notest .
- cpanm -n Devel::Cover::Report::Coveralls
script: "./scripts/travis_run_tests.sh"
notifications:
email:
on_success: always
on_failure: always
# Get the matrix to only build coveralls support when on 5.10
matrix:
exclude:
- perl: "5.10"
env: COVERALLS=false
- perl: "5.14"
env: COVERALLS=true
d6950e073c941cb791c49a43c7ffcdfde9c24d10
{
"VERSION" : 0.01,
"Valley" : {
"SubmitWorkersMax" : 50
},
"Meadow" : {
"LOCAL" : {
"TotalRunningWorkersMax" : 2,
"my-quadcore-desktop" : {
"TotalRunningWorkersMax" : 4
},
"your-weak-laptop" : {
"TotalRunningWorkersMax" : 1
}
},
"LSF" : {
"SubmissionOptions" : "",
"TotalRunningWorkersMax" : 2000,
"farm3" : {
"SubmissionOptions" : "",
"TotalRunningWorkersMax" : 5000
},
"EBI" : {
"SubmissionOptions" : ""
}
},
"SGE" : {
"SubmissionOptions" : "",
"TotalRunningWorkersMax" : 2000,
},
"CONDOR" : {
"SubmissionOptions" : "",
"TotalRunningWorkersMax" : 500,
}
},
"Graph" : {
"Pad" : "1.0",
"Node" : {
"Font" : "Times-Roman",
"Colour" : "cyan",
"Table" : { "Colour" : "black", "Font" : "Courier" },
"Details" : { "Font" : "Helvetica" },
"JobStatus" : {
"SEMAPHORED" : { "Colour" : "grey" },
"READY" : { "Colour" : "green" },
"INPROGRESS" : { "Colour" : "yellow" },
"DONE" : { "Colour" : "DeepSkyBlue" },
"FAILED" : { "Colour" : "red" }
},
"AnalysisStatus" : {
"EMPTY" : { "Colour" : "white" },
"BLOCKED" : { "Colour" : "grey" },
"LOADING" : { "Colour" : "green" },
"ALL_CLAIMED" : { "Colour" : "grey" },
"SYNCHING" : { "Colour" : "green" },
"READY" : { "Colour" : "green" },
"WORKING" : { "Colour" : "yellow" },
"DONE" : { "Colour" : "DeepSkyBlue" },
"FAILED" : { "Colour" : "red" }
}
},
"Edge" : {
"Font" : "Helvetica",
"Data" : { "Colour" : "blue" },
"Control" : { "Colour" : "red" },
"Semablock" : { "Colour" : "red" },
"Accu" : { "Colour" : "darkgreen" }
},
"Box" : {
"ColourScheme" : "blues9",
"ColourOffset" : 1
},
"DisplayStats" : "barchart",
"DisplayJobs" : 0,
"DisplayData" : 0,
"DisplayInputIDTemplate": 1,
"DisplayDetails" : 1,
"DisplayStretched" : 0,
"DisplaySemaphoreBoxes" : 1,
"DuplicateTables" : 0
}
}
=pod
=head1 NAME
Bio::EnsEMBL::Hive
=head1 DESCRIPTION
Hive based processing is a concept based on a more controlled version
of an autonomous agent type system. Each worker is not told what to do
(like a centralized control system - like the current pipeline system)
but rather queries a central database for jobs (give me jobs).
Each worker is linked to an analysis_id, registers its self on creation
into the Hive, creates a RunnableDB instance of the Analysis->module,
gets relevant configuration information from the database, does its
work, creates the next layer of job entries by interfacing to
the DataflowRuleAdaptor to determine the analyses it needs to pass its
output data to and creates jobs on the database of the next analysis.
It repeats this cycle until it has lived its lifetime or until there are no
more jobs left to process.
The lifetime limit is a safety limit to prevent these from 'infecting'
a system and sitting on a compute node for longer than is socially exceptable.
This is primarily needed on compute resources like an LSF system where jobs
are not preempted and run until they are done.
The Queen's primary job is to create Workers to get the work done.
As part of this, she is also responsible for summarizing the status of the
analyses by querying the jobs, summarizing, and updating the
analysis_stats table. From this she is also responsible for monitoring and
'unblocking' analyses via the analysis_ctrl_rules.
The Queen is also responsible for freeing up jobs that were claimed by Workers
that died unexpectedly so that other workers can take over the work.
The Beekeeper is in charge of interfacing between the Queen and a compute resource
or 'compute farm'. Its job is to query Queens if they need any workers and to
send the requested number of workers to open machines via the runWorker.pl script.
It is also responsible for interfacing with the Queen to identify workers which died
unexpectedly so that she can free the dead workers unfinished jobs.
=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
=head1 CONTACT
Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
=cut
package Bio::EnsEMBL::Hive;
use strict;
use warnings;
1;
=pod
=head1 NAME
Bio::EnsEMBL::Hive::Accumulator
=head1 DESCRIPTION
A data container object that defines parameters for accumulated dataflow.
This object is generated from specially designed datalow URLs.
=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
=head1 CONTACT
Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
=cut
package Bio::EnsEMBL::Hive::Accumulator;
use strict;
use warnings;
use Bio::EnsEMBL::Hive::Utils ('stringify');
use base ( 'Bio::EnsEMBL::Hive::Storable' );
sub struct_name {
my $self = shift @_;
if(@_) {
$self->{'_struct_name'} = shift @_;
}
return $self->{'_struct_name'};
}
sub signature_template {
my $self = shift @_;
if(@_) {
$self->{'_signature_template'} = shift @_;
}
return $self->{'_signature_template'};
}
sub url {
my ($self, $ref_dba) = @_; # if reference dba is the same as 'my' dba, a shorter url is generated
my $my_dba = $self->adaptor && $self->adaptor->db;
return ( ($my_dba and $my_dba ne ($ref_dba//'') ) ? $my_dba->dbc->url : ':///' )
. '/accu?' . $self->struct_name . '=' . $self->signature_template;
}
sub display_name {
my ($self, $ref_dba) = @_; # if reference dba is the same as 'my' dba, a shorter display_name is generated
my $my_dba = $self->adaptor && $self->adaptor->db;
return ( ($my_dba and $my_dba ne ($ref_dba//'') ) ? $my_dba->dbc->dbname.'/' : '') . $self->struct_name . $self->signature_template;
}
sub dataflow {
my ( $self, $output_ids, $emitting_job ) = @_;
my $sending_job_id = $emitting_job->dbID();
my $receiving_job_id = $emitting_job->semaphored_job_id() || die "No semaphored job, cannot perform accumulated dataflow";
my $struct_name = $self->struct_name();
my $signature_template = $self->signature_template();
my @rows = ();
foreach my $output_id (@$output_ids) {
my $key_signature = $signature_template;
$key_signature=~s/(\w+)/$emitting_job->_param_possibly_overridden($1,$output_id)/eg;
push @rows, {
'sending_job_id' => $sending_job_id,
'receiving_job_id' => $receiving_job_id,
'struct_name' => $struct_name,
'key_signature' => $key_signature,
'value' => stringify( $emitting_job->_param_possibly_overridden($struct_name, $output_id) ),
};
}
$self->adaptor->store( \@rows );
}
1;
=pod
=head1 NAME
Bio::EnsEMBL::Hive::Analysis
=head1 DESCRIPTION
An Analysis object represents a "stage" of the Hive pipeline that groups together
all jobs that share the same module and the same common parameters.
Individual Jobs are said to "belong" to an Analysis.
Control rules unblock when their condition Analyses are done.
=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
=head1 CONTACT
Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
=cut
package Bio::EnsEMBL::Hive::Analysis;
use strict;
use warnings;
use Bio::EnsEMBL::Hive::Utils ('stringify');
use Bio::EnsEMBL::Hive::AnalysisCtrlRule;
use Bio::EnsEMBL::Hive::DataflowRule;
use Bio::EnsEMBL::Hive::GuestProcess;
use base ( 'Bio::EnsEMBL::Hive::Cacheable', 'Bio::EnsEMBL::Hive::Storable' );
sub unikey { # override the default from Cacheable parent
return [ 'logic_name' ];
}
=head1 AUTOLOADED
resource_class_id / resource_class
=cut
sub logic_name {
my $self = shift;
$self->{'_logic_name'} = shift if(@_);
return $self->{'_logic_name'};
}
sub name { # a useful synonym
my $self = shift;
return $self->logic_name(@_);
}
sub module {
my $self = shift;
$self->{'_module'} = shift if(@_);
return $self->{'_module'};
}
sub language {
my $self = shift;
$self->{'_language'} = shift if(@_);
return $self->{'_language'};
}
sub parameters {
my $self = shift;
if(@_) {
my $parameters = shift @_;
$self->{'_parameters'} = ref($parameters) ? stringify($parameters) : $parameters;
}
return $self->{'_parameters'};
}
sub failed_job_tolerance {
my $self = shift;
$self->{'_failed_job_tolerance'} = shift if(@_);
$self->{'_failed_job_tolerance'} = 0 unless(defined($self->{'_failed_job_tolerance'}));
return $self->{'_failed_job_tolerance'};
}
sub max_retry_count {
my $self = shift;
$self->{'_max_retry_count'} = shift if(@_);
$self->{'_max_retry_count'} = 3 unless(defined($self->{'_max_retry_count'}));
return $self->{'_max_retry_count'};
}
sub can_be_empty {
my $self = shift;
$self->{'_can_be_empty'} = shift if(@_);
$self->{'_can_be_empty'} = 0 unless(defined($self->{'_can_be_empty'}));
return $self->{'_can_be_empty'};
}
sub priority {
my $self = shift;
$self->{'_priority'} = shift if(@_);
$self->{'_priority'} = 0 unless(defined($self->{'_priority'}));
return $self->{'_priority'};
}
sub meadow_type {
my $self = shift;
$self->{'_meadow_type'} = shift if(@_);
return $self->{'_meadow_type'};
}
sub analysis_capacity {
my $self = shift;
$self->{'_analysis_capacity'} = shift if(@_);
return $self->{'_analysis_capacity'};
}
sub get_compiled_module_name {
my $self = shift;
my $runnable_module_name = $self->module
or die "Analysis '".$self->logic_name."' does not have its 'module' defined";
if ($self->language) {
my $wrapper = Bio::EnsEMBL::Hive::GuestProcess::_get_wrapper_for_language($self->language);
if (system($wrapper, 'compile', $runnable_module_name)) {
die "The runnable module '$runnable_module_name' cannot be loaded or compiled:\n";
}
return 'Bio::EnsEMBL::Hive::GuestProcess';
}
eval "require $runnable_module_name";
die "The runnable module '$runnable_module_name' cannot be loaded or compiled:\n$@" if($@);
die "Problem accessing methods in '$runnable_module_name'. Please check that it inherits from Bio::EnsEMBL::Hive::Process and is named correctly.\n"
unless($runnable_module_name->isa('Bio::EnsEMBL::Hive::Process'));
die "DEPRECATED: the strict_hash_format() method is no longer supported in Runnables - the input_id() in '$runnable_module_name' has to be a hash now.\n"
if($runnable_module_name->can('strict_hash_format'));
return $runnable_module_name;
}
=head2 url
Arg [1] : none
Example : $url = $analysis->url;
Description: Constructs a URL string for this database connection
Follows the general URL rules.
Returntype : string of format
mysql://<user>:<pass>@<host>:<port>/<dbname>/analysis?logic_name=<name>
Exceptions : none
Caller : general
=cut
sub url {
my ($self, $ref_dba) = @_; # if reference dba is the same as 'my' dba, a shorter url is generated
my $my_dba = $self->adaptor && $self->adaptor->db;
return ( ($my_dba and $my_dba ne ($ref_dba//'') ) ? $my_dba->dbc->url . '/analysis?logic_name=' : '') . $self->logic_name;
}
sub display_name {
my ($self, $ref_dba) = @_; # if reference dba is the same as 'my' dba, a shorter display_name is generated
my $my_dba = $self->adaptor && $self->adaptor->db;
return ( ($my_dba and $my_dba ne ($ref_dba//'') ) ? $my_dba->dbc->dbname . '/' : '' ) . $self->logic_name;
}
=head2 stats
Arg [1] : none
Example : $stats = $analysis->stats;
Description: returns either the previously cached AnalysisStats object, or if it is missing - pulls a fresh one from the DB.
Returntype : Bio::EnsEMBL::Hive::AnalysisStats object
Exceptions : none
Caller : general
=cut
sub stats {
my $self = shift @_;
my $collection = Bio::EnsEMBL::Hive::AnalysisStats->collection();
return ($collection && $collection->find_one_by('analysis', $self) )
|| $self->adaptor->db->get_AnalysisStatsAdaptor->fetch_by_analysis_id( $self->dbID );
}
sub jobs_collection {
my $self = shift @_;
$self->{'_jobs_collection'} = shift if(@_);
return $self->{'_jobs_collection'} ||= [];
}
sub control_rules_collection {
my $self = shift @_;
my $collection = Bio::EnsEMBL::Hive::AnalysisCtrlRule->collection();
return $collection
? $collection->find_all_by('ctrled_analysis', $self)
: $self->adaptor->db->get_AnalysisCtrlRuleAdaptor->fetch_all_by_ctrled_analysis_id( $self->dbID );
}
sub dataflow_rules_collection {
my $self = shift @_;
my $collection = Bio::EnsEMBL::Hive::DataflowRule->collection();
return $collection->find_all_by('from_analysis', $self);
}
sub dataflow_rules_by_branch {
my $self = shift @_;
if (not $self->{'_dataflow_rules_by_branch'}) {
my %dataflow_rules_by_branch = ();
foreach my $dataflow (@{$self->dataflow_rules_collection}) {
push @{$dataflow_rules_by_branch{$dataflow->branch_code}}, $dataflow;
}
$self->{'_dataflow_rules_by_branch'} = \%dataflow_rules_by_branch;
}