Skip to content
Snippets Groups Projects
Commit eca922b5 authored by Dan Staines's avatar Dan Staines
Browse files

efficiency improvement - retrieve attrib type details at start and reuse

parent cc546ef9
No related branches found
No related tags found
No related merge requests found
......@@ -5,29 +5,37 @@ use warnings;
use base qw/Bio::EnsEMBL::Pipeline::Production::StatsGenerator/;
sub run {
my ($self) = @_;
my $species = $self->param('species');
my $dba = Bio::EnsEMBL::Registry->get_DBAdaptor($species, 'core');
my $species = $self->param('species');
my $dba = Bio::EnsEMBL::Registry->get_DBAdaptor($species, 'core');
my $attrib_code = 'GeneGC';
$self->delete_old_attrib($dba, $attrib_code);
my $genes = Bio::EnsEMBL::Registry->get_adaptor($species, 'core', 'gene')->fetch_all();
my $aa = Bio::EnsEMBL::Registry->get_adaptor($self->param('species'), 'core', 'Attribute');
my $prod_helper = $self->get_production_DBAdaptor()->dbc()->sql_helper();
my ($name, $description) = @{
$prod_helper->execute(
-SQL => q{
SELECT code, name, description
FROM attrib_type
WHERE code = ? },
-PARAMS => [$attrib_code])->[0]};
while (my $gene = shift @$genes) {
my $count = $gene->feature_Slice()->get_base_count->{'%gc'};
if ($count > 0) {
$self->store_attrib($gene, $count, $attrib_code);
}
my $count = $gene->feature_Slice()->get_base_count->{'%gc'};
if ($count > 0) {
$self->store_attrib($aa, $gene, $count, $attrib_code, $name, $description);
}
}
}
} ## end sub run
sub delete_old_attrib {
my ($self, $dba, $attrib_code) = @_;
my $helper = $dba->dbc()->sql_helper();
my $sql = q{
my $sql = q{
DELETE ga
FROM gene_attrib ga, attrib_type at, gene g, seq_region s, coord_system cs
WHERE s.seq_region_id = g.seq_region_id
......@@ -39,27 +47,14 @@ sub delete_old_attrib {
$helper->execute_update(-SQL => $sql, -PARAMS => [$dba->species_id(), $attrib_code]);
}
sub store_attrib {
my ($self, $gene, $count, $code) = @_;
my $aa = Bio::EnsEMBL::Registry->get_adaptor($self->param('species'), 'core', 'Attribute');
my $prod_dba = $self->get_production_DBAdaptor();
my $prod_helper = $prod_dba->dbc()->sql_helper();
my $sql = q{
SELECT name, description
FROM attrib_type
WHERE code = ? };
my ($name, $description) = @{$prod_helper->execute(-SQL => $sql, -PARAMS => [$code])->[0]};
my $attrib = Bio::EnsEMBL::Attribute->new(
-NAME => $name,
-CODE => $code,
-VALUE => $count,
-DESCRIPTION => $description
);
my @attribs = ($attrib);
$aa->store_on_Gene($gene, \@attribs);
my ($self, $aa, $gene, $count, $code, $name, $description) = @_;
my $attrib = Bio::EnsEMBL::Attribute->new(-NAME => $name,
-CODE => $code,
-VALUE => $count,
-DESCRIPTION => $description);
$aa->store_on_Gene($gene, [$attrib]);
}
1;
......@@ -7,17 +7,16 @@ use base qw/Bio::EnsEMBL::Pipeline::Base/;
use Bio::EnsEMBL::Attribute;
sub run {
my ($self) = @_;
my ($self) = @_;
my $species = $self->param('species');
my $dbtype = $self->param('dbtype');
my $dba = Bio::EnsEMBL::Registry->get_DBAdaptor($species, $dbtype);
if ( $dbtype =~ 'vega' || $dbtype =~ 'otherf' ) {
my $core_dba = Bio::EnsEMBL::Registry->get_DBAdaptor($species, 'core');
$dba->dnadb($core_dba);
my $dba = Bio::EnsEMBL::Registry->get_DBAdaptor($species, $dbtype);
if ($dbtype =~ 'vega' || $dbtype =~ 'otherf') {
my $core_dba = Bio::EnsEMBL::Registry->get_DBAdaptor($species, 'core');
$dba->dnadb($core_dba);
}
my $helper = $dba->dbc()->sql_helper();
my $helper = $dba->dbc()->sql_helper();
my @attrib_codes = $self->get_attrib_codes();
$self->delete_old_attrib($dba, @attrib_codes);
......@@ -26,81 +25,80 @@ sub run {
$self->dump_translation($dba, $tmpfile);
my $results = $self->run_pepstats($tmpfile);
my $attrib_types = $self->get_attrib_types();
foreach my $translation (keys %$results) {
$self->store_attrib($translation, $results->{$translation});
$self->store_attrib($attrib_types, $translation, $results->{$translation});
}
}
sub store_attrib {
my ($self, $translation, $results) = @_;
my $dbtype = $self->param('dbtype');
my $aa = Bio::EnsEMBL::Registry->get_adaptor($self->param('species'), $dbtype, 'Attribute');
my $prod_dba = $self->get_production_DBAdaptor();
my $prod_helper = $prod_dba->dbc()->sql_helper();
my @attribs;
my $sqlName = q{
SELECT name
FROM attrib_type
WHERE code = ? };
my $sqlDesc = q{
SELECT description
sub get_attrib_types {
my ($self) = @_;
my $prod_helper = $self->get_production_DBAdaptor()->dbc()->sql_helper();
my $attrib_types = {};
for my $row (
@{$prod_helper->execute(
-SQL => q{
SELECT code,name,description
FROM attrib_type
WHERE code = ? };
foreach my $key (keys %$results) {
my ($name, $description);
my @names = @{ $prod_helper->execute_simple(-SQL => $sqlName, -PARAMS => [$key]) };
foreach my $bit (@names) {
$name .= $bit . " ";
}
my @descriptions = @{ $prod_helper->execute_simple(-SQL => $sqlDesc, -PARAMS => [$key]) };
foreach my $bit (@descriptions) {
$description .= $bit . " ";
}
my $value = $results->{$key};
my $attrib = Bio::EnsEMBL::Attribute->new(
-NAME => $name,
-CODE => $key,
-VALUE => $value,
-DESCRIPTION => $description,
);
push(@attribs, $attrib);
WHERE code in ('NumResidues','MolecularWeight','AvgResWeight','Charge','IsoPoint') })})
{
$attrib_types->{$row->[0]} = {name => $row->[1],
description => $row->[2]};
}
$aa->store_on_Translation($translation, \@attribs);
return $attrib_types;
}
my $key_names = {};
my $key_descriptions = {};
sub store_attrib {
my ($self, $attrib_types, $translation, $results) = @_;
my $dbtype = $self->param('dbtype');
my $aa = Bio::EnsEMBL::Registry->get_adaptor($self->param('species'), $dbtype, 'Attribute');
my @attribs = ();
foreach my $key (keys %$results) {
my $value = $results->{$key};
my $attrib = Bio::EnsEMBL::Attribute->new(-NAME => $attrib_types->{$key}{name},
-CODE => $key,
-VALUE => $value,
-DESCRIPTION => $attrib_types->{$key}{description});
push(@attribs, $attrib);
} ## end foreach my $key (keys %$results)
$aa->store_on_Translation($translation, \@attribs);
} ## end sub store_attrib
sub run_pepstats {
my ($self, $tmpfile) = @_;
my $PEPSTATS = $self->param('binpath') . '/bin/pepstats';
open(OUT, "$PEPSTATS -filter < $tmpfile 2>&1 |" );
my @lines = <OUT>;
open(OUT, "$PEPSTATS -filter < $tmpfile 2>&1 |");
my @lines = <OUT>;
my $attribs = {};
my $tid;
close(OUT);
foreach my $line (@lines) {
if ( $line =~ /PEPSTATS of ([^ ]+)/ ) {
$tid = $1;
} elsif (defined $tid) {
if ( $line =~ /^Molecular weight = (\S+)(\s+)Residues = (\d+).*/ ) {
$attribs->{$tid}{'NumResidues'} = $3;
$attribs->{$tid}{'MolecularWeight'} = $1;
} elsif ( $line =~ /^Average(\s+)(\S+)(\s+)(\S+)(\s+)=(\s+)(\S+)(\s+)(\S+)(\s+)=(\s+)(\S+)/ ) {
$attribs->{$tid}{'AvgResWeight'} = $7;
$attribs->{$tid}{'Charge'} = $12;
} elsif ( $line =~ /^Isoelectric(\s+)(\S+)(\s+)=(\s+)(\S+)/ ) {
$attribs->{$tid}{'IsoPoint'} = $5;
}
}
if ($line =~ /PEPSTATS of ([^ ]+)/) {
$tid = $1;
} elsif (defined $tid) {
if ($line =~ /^Molecular weight = (\S+)(\s+)Residues = (\d+).*/) {
$attribs->{$tid}{'NumResidues'} = $3;
$attribs->{$tid}{'MolecularWeight'} = $1;
} elsif ($line =~ /^Average(\s+)(\S+)(\s+)(\S+)(\s+)=(\s+)(\S+)(\s+)(\S+)(\s+)=(\s+)(\S+)/) {
$attribs->{$tid}{'AvgResWeight'} = $7;
$attribs->{$tid}{'Charge'} = $12;
} elsif ($line =~ /^Isoelectric(\s+)(\S+)(\s+)=(\s+)(\S+)/) {
$attribs->{$tid}{'IsoPoint'} = $5;
}
}
}
return $attribs;
}
} ## end sub run_pepstats
sub delete_old_attrib {
my ($self, $dba, @attrib_codes) = @_;
my $helper = $dba->dbc()->sql_helper();
my $sql = q{
my $sql = q{
DELETE ta
FROM translation_attrib ta, attrib_type at, translation tl, transcript tr, seq_region s, coord_system c
WHERE at.attrib_type_id = ta.attrib_type_id
......@@ -111,24 +109,22 @@ sub delete_old_attrib {
AND c.species_id = ?
AND at.code = ? };
foreach my $code (@attrib_codes) {
$helper->execute_update(-SQL => $sql, -PARAMS => [$dba->species_id(), $code]) ;
$helper->execute_update(-SQL => $sql, -PARAMS => [$dba->species_id(), $code]);
}
}
sub get_attrib_codes {
my ($self) = @_;
my $prod_dba = $self->get_production_DBAdaptor();
my $prod_helper = $prod_dba->dbc()->sql_helper();
my $sql = q{
my ($self) = @_;
my $prod_dba = $self->get_production_DBAdaptor();
my $prod_helper = $prod_dba->dbc()->sql_helper();
my $sql = q{
SELECT code
FROM attrib_type
WHERE description = 'Pepstats attributes' };
my @attrib_codes = @{ $prod_helper->execute_simple(-SQL => $sql) };
my @attrib_codes = @{$prod_helper->execute_simple(-SQL => $sql)};
return @attrib_codes;
}
sub dump_translation {
my ($self, $dba, $tmpfile) = @_;
my $helper = $dba->dbc()->sql_helper();
......@@ -142,20 +138,17 @@ sub dump_translation {
AND tr.seq_region_id = s.seq_region_id
AND s.coord_system_id = cs.coord_system_id
AND cs.species_id = ? };
my @translation_ids = @{ $helper->execute_simple(-SQL => $sql, -PARAMS => [$dba->species_id()]) };
my @translation_ids = @{$helper->execute_simple(-SQL => $sql, -PARAMS => [$dba->species_id()])};
for my $dbid (@translation_ids) {
my $translation = $ta->fetch_by_dbID($dbid);
my $peptide_seq = $translation->seq();
if ( $peptide_seq !~ /\n$/ ) {
$peptide_seq .= "\n";
}
print TMP ">$dbid\n$peptide_seq";
my $translation = $ta->fetch_by_dbID($dbid);
my $peptide_seq = $translation->seq();
if ($peptide_seq !~ /\n$/) {
$peptide_seq .= "\n";
}
print TMP ">$dbid\n$peptide_seq";
}
close(TMP);
}
} ## end sub dump_translation
1;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment