Commit 5cf5d02e authored by Dan Staines's avatar Dan Staines
Browse files

files for running handover pipeline over EG databases

parent 16056298
package Bio::EnsEMBL::Pipeline::PipeConfig::EG_misc_tasks_conf;
use strict;
use warnings;
use base ('Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf');
use Bio::EnsEMBL::ApiVersion qw/software_version/;
sub default_options {
my ($self) = @_;
return {
# inherit other stuff from the base class
%{ $self->SUPER::default_options() },
### OVERRIDE
### Optional overrides
species => [],
division => [],
release => software_version(),
run_all => 0,
bin_count => '150',
max_run => '100',
### Defaults
pipeline_name => 'misc_tasks_'.$self->o('release'),
email => $self->o('ENV', 'USER').'@ebi.ac.uk',
};
}
sub pipeline_create_commands {
my ($self) = @_;
return [
# inheriting database and hive tables' creation
@{$self->SUPER::pipeline_create_commands},
];
}
## See diagram for pipeline structure
sub pipeline_analyses {
my ($self) = @_;
return [
{
-logic_name => 'ScheduleSpecies',
-module => 'Bio::EnsEMBL::Pipeline::Production::EGSpeciesFactory',
-parameters => {
species => $self->o('species'),
division => $self->o('division'),
run_all => $self->o('run_all'),
max_run => $self->o('max_run')
},
-input_ids => [ {} ],
-max_retry_count => 1,
-flow_into => {
'3->B' => ['PercentRepeat'],
'B->3' => ['PercentGC'],
'3->C' => ['CodingDensity'],
'C->3' => ['NonCodingDensity'],
'3->A' => ['PercentRepeat', 'CodingDensity', 'NonCodingDensity', 'PercentGC'],
'2->A' => ['GeneGC', 'GeneCount', 'ConstitutiveExons'], # Should inclued 'PepStats'
'A->1' => ['NotifyCore'],
'4->D' => ['SnpDensity', 'SnpCount'],
'D->1' => ['NotifyVariation'],
},
},
{
-logic_name => 'ConstitutiveExons',
-module => 'Bio::EnsEMBL::Pipeline::Production::ConstitutiveExons',
-parameters => {
dbtype => 'core',
},
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'normal',
},
{
-logic_name => 'PepStats',
-module => 'Bio::EnsEMBL::Pipeline::Production::PepStats',
-parameters => {
tmpdir => '/tmp', binpath => '/nfs/panda/ensemblgenomes/external/EMBOSS',
dbtype => 'core',
},
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'mem',
},
{
-logic_name => 'GeneCount',
-module => 'Bio::EnsEMBL::Pipeline::Production::GeneCount',
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'normal',
},
{
-logic_name => 'NonCodingDensity',
-module => 'Bio::EnsEMBL::Pipeline::Production::NonCodingDensity',
-parameters => {
logic_name => 'noncodingdensity', value_type => 'sum',
bin_count => $self->o('bin_count'), max_run => $self->o('max_run'),
},
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'normal',
-can_be_empty => 1,
-flow_into => ['PseudogeneDensity'],
},
{
-logic_name => 'PseudogeneDensity',
-module => 'Bio::EnsEMBL::Pipeline::Production::PseudogeneDensity',
-parameters => {
logic_name => 'pseudogenedensity', value_type => 'sum',
bin_count => $self->o('bin_count'), max_run => $self->o('max_run'),
},
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'normal',
-can_be_empty => 1,
},
{
-logic_name => 'CodingDensity',
-module => 'Bio::EnsEMBL::Pipeline::Production::CodingDensity',
-parameters => {
logic_name => 'codingdensity', value_type => 'sum',
bin_count => $self->o('bin_count'), max_run => $self->o('max_run'),
},
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'normal',
-can_be_empty => 1,
},
{
-logic_name => 'GeneGC',
-module => 'Bio::EnsEMBL::Pipeline::Production::GeneGC',
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'normal',
},
{
-logic_name => 'PercentGC',
-module => 'Bio::EnsEMBL::Pipeline::Production::PercentGC',
-parameters => {
table => 'repeat', logic_name => 'percentgc', value_type => 'ratio',
bin_count => $self->o('bin_count'), max_run => $self->o('max_run'),
},
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'normal',
-can_be_empty => 1,
},
{
-logic_name => 'PercentRepeat',
-module => 'Bio::EnsEMBL::Pipeline::Production::PercentRepeat',
-parameters => {
logic_name => 'percentagerepeat', value_type => 'ratio',
bin_count => $self->o('bin_count'), max_run => $self->o('max_run'),
},
-max_retry_count => 3,
-hive_capacity => 100,
-rc_name => 'mem',
-can_be_empty => 1,
},
{
-logic_name => 'SnpCount',
-module => 'Bio::EnsEMBL::Pipeline::Production::SnpCount',
-max_retry_count => 1,
-hive_capacity => 10,
-rc_name => 'normal',
-can_be_empty => 1,
},
{
-logic_name => 'SnpDensity',
-module => 'Bio::EnsEMBL::Pipeline::Production::SnpDensity',
-parameters => {
table => 'gene', logic_name => 'snpdensity', value_type => 'sum',
bin_count => $self->o('bin_count'), max_run => $self->o('max_run'),
},
-max_retry_count => 1,
-hive_capacity => 10,
-rc_name => 'normal',
-can_be_empty => 1,
},
####### NOTIFICATION
{
-logic_name => 'NotifyCore',
-module => 'Bio::EnsEMBL::Pipeline::Production::EmailSummaryCore',
-parameters => {
email => $self->o('email'),
subject => $self->o('pipeline_name').' (core) has finished',
},
},
{
-logic_name => 'NotifyVariation',
-module => 'Bio::EnsEMBL::Pipeline::Production::EmailSummaryVariation',
-parameters => {
email => $self->o('email'),
subject => $self->o('pipeline_name').' (variation) has finished',
},
}
];
}
sub pipeline_wide_parameters {
my ($self) = @_;
return {
%{ $self->SUPER::pipeline_wide_parameters() }, # inherit other stuff from the base class
release => $self->o('release'),
species => $self->o('species'),
species => $self->o('division'),
};
}
# override the default method, to force an automatic loading of the registry in all workers
sub beekeeper_extra_cmdline_options {
my $self = shift;
return "-reg_conf ".$self->o("registry");
}
sub resource_classes {
my $self = shift;
return {
'default' => { 'LSF' => ''},
'normal' => { 'LSF' => '-q production-rh6 -n 4 -M 4000 -R "rusage[mem=4000]"'},
'mem' => { 'LSF' => '-q production-rh6 -n 4 -M 12000 -R "rusage[mem=12000]"'},
}
}
1;
=pod
=head1 LICENSE
Copyright (c) 1999-2012 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license.
For license details, please see
http://www.ensembl.org/info/about/code_licence.html
=head1 CONTACT
Please email comments or questions to the public Ensembl
developers list at <dev@ensembl.org>.
Questions may also be sent to the Ensembl help desk at
<helpdesk@ensembl.org>.
=head1 NAME
Bio::EnsEMBL::Pipeline::Production::EGSpeciesFactory
=head1 DESCRIPTION
An extension of the ClassSpeciesFactory code, for use with
EnsemblGenomes, which uses the production database differently
and thus needs a simpler 'is_run' function.
=cut
package Bio::EnsEMBL::Pipeline::Production::EGSpeciesFactory;
use strict;
use warnings;
use base qw/Bio::EnsEMBL::Pipeline::Production::ClassSpeciesFactory/;
sub is_run {
my ( $self, $dba, $class ) = @_;
my $production_name = $dba->get_MetaContainer()->get_production_name();
if ( $class =~ 'karyotype' ) {
return $self->has_karyotype($dba);
}
$dba->dbc()->disconnect_if_idle();
return 1;
}
sub process_dba {
my ( $self, $dba ) = @_;
my $result = $self->SUPER::process_dba($dba);
if ( $result == 1 && @{ $self->param('division') } ) {
$result = 0;
for my $division (@{$self->param('division')}) {
if($dba->get_MetaContainer()->get_division() eq $division) {
$result = 1;
last;
}
}
$dba->dbc()->disconnect_if_idle();
}
return $result;
}
1;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment