PepStats.pm 4.83 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
package Bio::EnsEMBL::Pipeline::Production::PepStats;

use strict;
use warnings;

use base qw/Bio::EnsEMBL::Pipeline::Base/;

use Bio::EnsEMBL::Attribute;

sub run {
11
  my ($self)  = @_;
12 13
  my $species = $self->param('species');
  my $dbtype  = $self->param('dbtype');
14 15 16 17
  my $dba = Bio::EnsEMBL::Registry->get_DBAdaptor($species, $dbtype);
  if ($dbtype =~ 'vega' || $dbtype =~ 'otherf') {
	my $core_dba = Bio::EnsEMBL::Registry->get_DBAdaptor($species, 'core');
	$dba->dnadb($core_dba);
18
  }
19
  my $helper = $dba->dbc()->sql_helper();
20

Magali Ruffier's avatar
Magali Ruffier committed
21 22
  my @attrib_codes = $self->get_attrib_codes();
  $self->delete_old_attrib($dba, @attrib_codes);
23 24 25 26 27

  my $tmpfile = $self->param('tmpdir') . "/$$.pep";
  $self->dump_translation($dba, $tmpfile);
  my $results = $self->run_pepstats($tmpfile);

28 29
  my $attrib_types = $self->get_attrib_types();

30
  foreach my $translation (keys %$results) {
31
	$self->store_attrib($attrib_types, $translation, $results->{$translation});
32 33 34
  }
}

35 36 37 38 39 40 41 42
sub get_attrib_types {
  my ($self)       = @_;
  my $prod_helper  = $self->get_production_DBAdaptor()->dbc()->sql_helper();
  my $attrib_types = {};
  for my $row (
	@{$prod_helper->execute(
		-SQL => q{
    SELECT code,name,description
43
    FROM attrib_type
44 45 46 47
    WHERE code in ('NumResidues','MolecularWeight','AvgResWeight','Charge','IsoPoint') })})
  {
	$attrib_types->{$row->[0]} = {name        => $row->[1],
								  description => $row->[2]};
48
  }
49
  return $attrib_types;
50 51
}

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
my $key_names        = {};
my $key_descriptions = {};

sub store_attrib {
  my ($self, $attrib_types, $translation, $results) = @_;
  my $dbtype = $self->param('dbtype');
  my $aa = Bio::EnsEMBL::Registry->get_adaptor($self->param('species'), $dbtype, 'Attribute');
  my @attribs = ();
  foreach my $key (keys %$results) {
	my $value = $results->{$key};
	my $attrib = Bio::EnsEMBL::Attribute->new(-NAME        => $attrib_types->{$key}{name},
											  -CODE        => $key,
											  -VALUE       => $value,
											  -DESCRIPTION => $attrib_types->{$key}{description});
	push(@attribs, $attrib);
  } ## end foreach my $key (keys %$results)
  $aa->store_on_Translation($translation, \@attribs);
} ## end sub store_attrib
70 71 72 73

sub run_pepstats {
  my ($self, $tmpfile) = @_;
  my $PEPSTATS = $self->param('binpath') . '/bin/pepstats';
74 75
  open(my $fh, "$PEPSTATS -filter < $tmpfile 2>&1 |"); ## no critic
  my @lines   = <$fh>;
76 77
  my $attribs = {};
  my $tid;
78
  close($fh);
79
  foreach my $line (@lines) {
80 81 82 83 84 85 86 87 88 89 90 91 92 93

	if ($line =~ /PEPSTATS of ([^ ]+)/) {
	  $tid = $1;
	} elsif (defined $tid) {
	  if ($line =~ /^Molecular weight = (\S+)(\s+)Residues = (\d+).*/) {
		$attribs->{$tid}{'NumResidues'}     = $3;
		$attribs->{$tid}{'MolecularWeight'} = $1;
	  } elsif ($line =~ /^Average(\s+)(\S+)(\s+)(\S+)(\s+)=(\s+)(\S+)(\s+)(\S+)(\s+)=(\s+)(\S+)/) {
		$attribs->{$tid}{'AvgResWeight'} = $7;
		$attribs->{$tid}{'Charge'}       = $12;
	  } elsif ($line =~ /^Isoelectric(\s+)(\S+)(\s+)=(\s+)(\S+)/) {
		$attribs->{$tid}{'IsoPoint'} = $5;
	  }
	}
94 95
  }
  return $attribs;
96
} ## end sub run_pepstats
97 98

sub delete_old_attrib {
Magali Ruffier's avatar
Magali Ruffier committed
99
  my ($self, $dba, @attrib_codes) = @_;
100
  my $helper = $dba->dbc()->sql_helper();
101
  my $sql    = q{
102 103 104 105 106 107 108 109 110
     DELETE ta
     FROM translation_attrib ta, attrib_type at, translation tl, transcript tr, seq_region s, coord_system c
     WHERE at.attrib_type_id = ta.attrib_type_id
     AND ta.translation_id = tl.translation_id
     AND tl.transcript_id = tr.transcript_id
     AND tr.seq_region_id = s.seq_region_id
     AND s.coord_system_id = c.coord_system_id
     AND c.species_id = ?
     AND at.code = ? };
Magali Ruffier's avatar
Magali Ruffier committed
111
  foreach my $code (@attrib_codes) {
112
	$helper->execute_update(-SQL => $sql, -PARAMS => [$dba->species_id(), $code]);
113 114 115 116
  }
}

sub get_attrib_codes {
117 118 119 120
  my ($self)      = @_;
  my $prod_dba    = $self->get_production_DBAdaptor();
  my $prod_helper = $prod_dba->dbc()->sql_helper();
  my $sql         = q{
Magali Ruffier's avatar
Magali Ruffier committed
121
    SELECT code
122 123
    FROM attrib_type
    WHERE description = 'Pepstats attributes' };
124
  my @attrib_codes = @{$prod_helper->execute_simple(-SQL => $sql)};
Magali Ruffier's avatar
Magali Ruffier committed
125
  return @attrib_codes;
126 127 128 129 130
}

sub dump_translation {
  my ($self, $dba, $tmpfile) = @_;
  my $helper = $dba->dbc()->sql_helper();
131 132
  my $dbtype = $self->param('dbtype');
  my $ta     = Bio::EnsEMBL::Registry->get_adaptor($self->param('species'), $dbtype, 'translation');
133
  open(my $fh, '>', $tmpfile);
134 135 136 137 138 139
  my $sql = q{
    SELECT tl.translation_id
    FROM translation tl, transcript tr, seq_region s, coord_system cs
    WHERE tr.transcript_id = tl.transcript_id
    AND tr.seq_region_id = s.seq_region_id
    AND s.coord_system_id = cs.coord_system_id
140
    AND cs.species_id = ? };
141 142
  my @translation_ids = @{$helper->execute_simple(-SQL => $sql, -PARAMS => [$dba->species_id()])};

143
  for my $dbid (@translation_ids) {
144 145 146 147 148
	my $translation = $ta->fetch_by_dbID($dbid);
	my $peptide_seq = $translation->seq();
	if ($peptide_seq !~ /\n$/) {
	  $peptide_seq .= "\n";
	}
149
	print $fh ">$dbid\n$peptide_seq";
150
  }
151
  close($fh);
152
} ## end sub dump_translation
153 154

1;