diff --git a/modules/Bio/EnsEMBL/Pipeline/Production/CodingDensity.pm b/modules/Bio/EnsEMBL/Pipeline/Production/CodingDensity.pm new file mode 100644 index 0000000000000000000000000000000000000000..ed5455ad5816c943c56a5e9b4beb54b6ca6b4e08 --- /dev/null +++ b/modules/Bio/EnsEMBL/Pipeline/Production/CodingDensity.pm @@ -0,0 +1,22 @@ +package Bio::EnsEMBL::Pipeline::Production::CodingDensity; + +use base qw/Bio::EnsEMBL::Pipeline::Production::DensityGenerator/; + + +use strict; +use warnings; + + +sub get_density { + my ($self, $block) = @_; + my @biotypes = $self->get_biotype_group("coding"); + my $count = 0; + foreach my $biotype (@biotypes) { + $count += scalar(@{ $block->get_all_Genes_by_type($biotype) }); + } + return $count; +} + +1; + + diff --git a/modules/Bio/EnsEMBL/Pipeline/Production/DensityGenerator.pm b/modules/Bio/EnsEMBL/Pipeline/Production/DensityGenerator.pm index 77b4629effe20030aecf99e3e4e3642f35879052..f71a0278892f55c612eb660ad4f75c323cb5e3fd 100644 --- a/modules/Bio/EnsEMBL/Pipeline/Production/DensityGenerator.pm +++ b/modules/Bio/EnsEMBL/Pipeline/Production/DensityGenerator.pm @@ -162,5 +162,20 @@ sub get_analysis { return $analysis; } +sub get_biotype_group { + my ($self, $biotype) = @_; + my $prod_dba = $self->get_production_DBAdaptor(); + my $helper = $prod_dba->dbc()->sql_helper(); + my $sql = q{ + SELECT name + FROM biotype + WHERE object_type = 'gene' + AND is_current = 1 + AND biotype_group = ? + AND db_type like '%core%' }; + my @biotypes = @{ $helper->execute_simple(-SQL => $sql, -PARAMS => [$biotype]) }; + return @biotypes; +} + 1; diff --git a/modules/Bio/EnsEMBL/Pipeline/Production/EmailSummaryCore.pm b/modules/Bio/EnsEMBL/Pipeline/Production/EmailSummaryCore.pm index ff199478edff6ae2b387d3df403e9206136712d4..df7020175b8ce4d8f527f8c8e4bc305942ebc490 100644 --- a/modules/Bio/EnsEMBL/Pipeline/Production/EmailSummaryCore.pm +++ b/modules/Bio/EnsEMBL/Pipeline/Production/EmailSummaryCore.pm @@ -14,6 +14,10 @@ sub fetch_input { my $gene_gc = $self->jobs('GeneGC'); my $percent_gc = $self->jobs('PercentGC'); my $percent_repeat = $self->jobs('PercentRepeat'); + my $coding_density = $self->jobs('CodingDensity'); + my $pseudogene_density = $self->jobs('PseudogeneDensity'); + my $non_coding_density = $self->jobs('NonCodingDensity'); + my @args = ( $pep_stats->{successful_jobs}, @@ -24,11 +28,20 @@ sub fetch_input { $percent_gc->{failed_jobs}, $percent_repeat->{successful_jobs}, $percent_repeat->{failed_jobs}, + $coding_density->{successful_jobs}, + $coding_density->{failed_jobs}, + $pseudogene_density->{successful_jobs}, + $pseudogene_density->{failed_jobs}, + $non_coding_density->{successful_jobs}, + $non_coding_density->{failed_jobs}, $self->failed(), $self->summary($pep_stats), $self->summary($gene_gc), $self->summary($percent_gc), $self->summary($percent_repeat), + $self->summary($coding_density), + $self->summary($pseudogene_density), + $self->summary($non_coding_density), ); my $msg = sprintf(<<'MSG', @args); @@ -38,6 +51,9 @@ Your FASTA Pipeline has finished. We have: * %d species with gene gc (%d failed) * %d species with percent gc (%d failed) * %d species with percent repeat (%d failed) + * %d species with coding density (%d failed) + * %d species with pseudogene density (%d failed) + * %d species with non coding density (%d failed) %s diff --git a/modules/Bio/EnsEMBL/Pipeline/Production/NonCodingDensity.pm b/modules/Bio/EnsEMBL/Pipeline/Production/NonCodingDensity.pm new file mode 100644 index 0000000000000000000000000000000000000000..d9c7fff6deca9feb12353d00a0e3612f73e08d0a --- /dev/null +++ b/modules/Bio/EnsEMBL/Pipeline/Production/NonCodingDensity.pm @@ -0,0 +1,22 @@ +package Bio::EnsEMBL::Pipeline::Production::NonCodingDensity; + +use base qw/Bio::EnsEMBL::Pipeline::Production::DensityGenerator/; + + +use strict; +use warnings; + + +sub get_density { + my ($self, $block) = @_; + my @biotypes = $self->get_biotype_group("non-coding"); + my $count = 0; + foreach my $biotype (@biotypes) { + $count += scalar(@{ $block->get_all_Genes_by_type($biotype) }); + } + return $count; +} + +1; + + diff --git a/modules/Bio/EnsEMBL/Pipeline/Production/PepStats.pm b/modules/Bio/EnsEMBL/Pipeline/Production/PepStats.pm index dda3b87a26b8406f6a023324c63513128b33526c..0f98b2e7101d8c9ff69f6d8aee7cd3640cddf2c8 100644 --- a/modules/Bio/EnsEMBL/Pipeline/Production/PepStats.pm +++ b/modules/Bio/EnsEMBL/Pipeline/Production/PepStats.pm @@ -122,8 +122,7 @@ sub dump_translation { WHERE tr.transcript_id = tl.transcript_id AND tr.seq_region_id = s.seq_region_id AND s.coord_system_id = cs.coord_system_id - AND cs.species_id = ? - ORDER by tl.translation_id }; + AND cs.species_id = ? }; my @translation_ids = @{ $helper->execute_simple(-SQL => $sql, -PARAMS => [$dba->species_id()]) }; for my $dbid (@translation_ids) { my $translation = $ta->fetch_by_dbID($dbid); diff --git a/modules/Bio/EnsEMBL/Pipeline/Production/PseudogeneDensity.pm b/modules/Bio/EnsEMBL/Pipeline/Production/PseudogeneDensity.pm new file mode 100644 index 0000000000000000000000000000000000000000..a95843c3230f752456775635f1048b6b031dd6be --- /dev/null +++ b/modules/Bio/EnsEMBL/Pipeline/Production/PseudogeneDensity.pm @@ -0,0 +1,22 @@ +package Bio::EnsEMBL::Pipeline::Production::PseudogeneDensity; + +use base qw/Bio::EnsEMBL::Pipeline::Production::DensityGenerator/; + + +use strict; +use warnings; + + +sub get_density { + my ($self, $block) = @_; + my @biotypes = $self->get_biotype_group("pseudogene"); + my $count = 0; + foreach my $biotype (@biotypes) { + $count += scalar(@{ $block->get_all_Genes_by_type($biotype) }); + } + return $count; +} + +1; + +