Skip to content
Snippets Groups Projects
Commit 5cf5d02e authored by Dan Staines's avatar Dan Staines
Browse files

files for running handover pipeline over EG databases

parent 16056298
No related branches found
No related tags found
No related merge requests found
package Bio::EnsEMBL::Pipeline::PipeConfig::EG_misc_tasks_conf;
use strict;
use warnings;
use base ('Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf');
use Bio::EnsEMBL::ApiVersion qw/software_version/;
sub default_options {
my ($self) = @_;
return {
# inherit other stuff from the base class
%{ $self->SUPER::default_options() },
### OVERRIDE
### Optional overrides
species => [],
division => [],
release => software_version(),
run_all => 0,
bin_count => '150',
max_run => '100',
### Defaults
pipeline_name => 'misc_tasks_'.$self->o('release'),
email => $self->o('ENV', 'USER').'@ebi.ac.uk',
};
}
sub pipeline_create_commands {
my ($self) = @_;
return [
# inheriting database and hive tables' creation
@{$self->SUPER::pipeline_create_commands},
];
}
## See diagram for pipeline structure
sub pipeline_analyses {
my ($self) = @_;
return [
{
-logic_name => 'ScheduleSpecies',
-module => 'Bio::EnsEMBL::Pipeline::Production::EGSpeciesFactory',
-parameters => {
species => $self->o('species'),
division => $self->o('division'),
run_all => $self->o('run_all'),
max_run => $self->o('max_run')
},
-input_ids => [ {} ],
-max_retry_count => 1,
-flow_into => {
'3->B' => ['PercentRepeat'],
'B->3' => ['PercentGC'],
'3->C' => ['CodingDensity'],
'C->3' => ['NonCodingDensity'],
'3->A' => ['PercentRepeat', 'CodingDensity', 'NonCodingDensity', 'PercentGC'],
'2->A' => ['GeneGC', 'GeneCount', 'ConstitutiveExons'], # Should inclued 'PepStats'
'A->1' => ['NotifyCore'],
'4->D' => ['SnpDensity', 'SnpCount'],
'D->1' => ['NotifyVariation'],
},
},
{
-logic_name => 'ConstitutiveExons',
-module => 'Bio::EnsEMBL::Pipeline::Production::ConstitutiveExons',
-parameters => {
dbtype => 'core',
},
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'normal',
},
{
-logic_name => 'PepStats',
-module => 'Bio::EnsEMBL::Pipeline::Production::PepStats',
-parameters => {
tmpdir => '/tmp', binpath => '/nfs/panda/ensemblgenomes/external/EMBOSS',
dbtype => 'core',
},
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'mem',
},
{
-logic_name => 'GeneCount',
-module => 'Bio::EnsEMBL::Pipeline::Production::GeneCount',
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'normal',
},
{
-logic_name => 'NonCodingDensity',
-module => 'Bio::EnsEMBL::Pipeline::Production::NonCodingDensity',
-parameters => {
logic_name => 'noncodingdensity', value_type => 'sum',
bin_count => $self->o('bin_count'), max_run => $self->o('max_run'),
},
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'normal',
-can_be_empty => 1,
-flow_into => ['PseudogeneDensity'],
},
{
-logic_name => 'PseudogeneDensity',
-module => 'Bio::EnsEMBL::Pipeline::Production::PseudogeneDensity',
-parameters => {
logic_name => 'pseudogenedensity', value_type => 'sum',
bin_count => $self->o('bin_count'), max_run => $self->o('max_run'),
},
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'normal',
-can_be_empty => 1,
},
{
-logic_name => 'CodingDensity',
-module => 'Bio::EnsEMBL::Pipeline::Production::CodingDensity',
-parameters => {
logic_name => 'codingdensity', value_type => 'sum',
bin_count => $self->o('bin_count'), max_run => $self->o('max_run'),
},
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'normal',
-can_be_empty => 1,
},
{
-logic_name => 'GeneGC',
-module => 'Bio::EnsEMBL::Pipeline::Production::GeneGC',
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'normal',
},
{
-logic_name => 'PercentGC',
-module => 'Bio::EnsEMBL::Pipeline::Production::PercentGC',
-parameters => {
table => 'repeat', logic_name => 'percentgc', value_type => 'ratio',
bin_count => $self->o('bin_count'), max_run => $self->o('max_run'),
},
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'normal',
-can_be_empty => 1,
},
{
-logic_name => 'PercentRepeat',
-module => 'Bio::EnsEMBL::Pipeline::Production::PercentRepeat',
-parameters => {
logic_name => 'percentagerepeat', value_type => 'ratio',
bin_count => $self->o('bin_count'), max_run => $self->o('max_run'),
},
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'mem',
-can_be_empty => 1,
},
{
-logic_name => 'SnpCount',
-module => 'Bio::EnsEMBL::Pipeline::Production::SnpCount',
-max_retry_count => 1,
-hive_capacity => 10,
-rc_name => 'normal',
-can_be_empty => 1,
},
{
-logic_name => 'SnpDensity',
-module => 'Bio::EnsEMBL::Pipeline::Production::SnpDensity',
-parameters => {
table => 'gene', logic_name => 'snpdensity', value_type => 'sum',
bin_count => $self->o('bin_count'), max_run => $self->o('max_run'),
},
-max_retry_count => 1,
-hive_capacity => 10,
-rc_name => 'normal',
-can_be_empty => 1,
},
####### NOTIFICATION
{
-logic_name => 'NotifyCore',
-module => 'Bio::EnsEMBL::Pipeline::Production::EmailSummaryCore',
-parameters => {
email => $self->o('email'),
subject => $self->o('pipeline_name').' (core) has finished',
},
},
{
-logic_name => 'NotifyVariation',
-module => 'Bio::EnsEMBL::Pipeline::Production::EmailSummaryVariation',
-parameters => {
email => $self->o('email'),
subject => $self->o('pipeline_name').' (variation) has finished',
},
}
];
}
sub pipeline_wide_parameters {
my ($self) = @_;
return {
%{ $self->SUPER::pipeline_wide_parameters() }, # inherit other stuff from the base class
release => $self->o('release'),
species => $self->o('species'),
species => $self->o('division'),
};
}
# override the default method, to force an automatic loading of the registry in all workers
sub beekeeper_extra_cmdline_options {
my $self = shift;
return "-reg_conf ".$self->o("registry");
}
sub resource_classes {
my $self = shift;
return {
'default' => { 'LSF' => ''},
'normal' => { 'LSF' => '-q production-rh6 -n 4 -M 4000 -R "rusage[mem=4000]"'},
'mem' => { 'LSF' => '-q production-rh6 -n 4 -M 12000 -R "rusage[mem=12000]"'},
}
}
1;
=pod
=head1 LICENSE
Copyright (c) 1999-2012 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license.
For license details, please see
http://www.ensembl.org/info/about/code_licence.html
=head1 CONTACT
Please email comments or questions to the public Ensembl
developers list at <dev@ensembl.org>.
Questions may also be sent to the Ensembl help desk at
<helpdesk@ensembl.org>.
=head1 NAME
Bio::EnsEMBL::Pipeline::Production::EGSpeciesFactory
=head1 DESCRIPTION
An extension of the ClassSpeciesFactory code, for use with
EnsemblGenomes, which uses the production database differently
and thus needs a simpler 'is_run' function.
=cut
package Bio::EnsEMBL::Pipeline::Production::EGSpeciesFactory;
use strict;
use warnings;
use base qw/Bio::EnsEMBL::Pipeline::Production::ClassSpeciesFactory/;
sub is_run {
my ( $self, $dba, $class ) = @_;
my $production_name = $dba->get_MetaContainer()->get_production_name();
if ( $class =~ 'karyotype' ) {
return $self->has_karyotype($dba);
}
$dba->dbc()->disconnect_if_idle();
return 1;
}
sub process_dba {
my ( $self, $dba ) = @_;
my $result = $self->SUPER::process_dba($dba);
if ( $result == 1 && @{ $self->param('division') } ) {
$result = 0;
for my $division (@{$self->param('division')}) {
if($dba->get_MetaContainer()->get_division() eq $division) {
$result = 1;
last;
}
}
$dba->dbc()->disconnect_if_idle();
}
return $result;
}
1;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment