PepStats.pm 4.86 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
package Bio::EnsEMBL::Pipeline::Production::PepStats;

use strict;
use warnings;

use base qw/Bio::EnsEMBL::Pipeline::Base/;

use Bio::EnsEMBL::Attribute;


sub run {
  my ($self) = @_;
13
14
15
  my $species = $self->param('species');
  my $dbtype  = $self->param('dbtype');
  my $dba     = Bio::EnsEMBL::Registry->get_DBAdaptor($species, $dbtype);
16
17
18
19
  if ( $dbtype =~ 'vega' || $dbtype =~ 'otherf' ) {
    my $core_dba = Bio::EnsEMBL::Registry->get_DBAdaptor($species, 'core');
    $dba->dnadb($core_dba);
  }
20
  my $helper  = $dba->dbc()->sql_helper();
21

Magali Ruffier's avatar
Magali Ruffier committed
22
23
  my @attrib_codes = $self->get_attrib_codes();
  $self->delete_old_attrib($dba, @attrib_codes);
24
25
26
27
28
29
30
31
32
33
34
35
36

  my $tmpfile = $self->param('tmpdir') . "/$$.pep";
  $self->dump_translation($dba, $tmpfile);
  my $results = $self->run_pepstats($tmpfile);

  foreach my $translation (keys %$results) {
    $self->store_attrib($translation, $results->{$translation});
  }
}


sub store_attrib {
  my ($self, $translation, $results) = @_;
37
38
  my $dbtype      = $self->param('dbtype');
  my $aa          = Bio::EnsEMBL::Registry->get_adaptor($self->param('species'), $dbtype, 'Attribute');
39
40
41
  my $prod_dba    = $self->get_production_DBAdaptor();
  my $prod_helper = $prod_dba->dbc()->sql_helper();
  my @attribs;
Magali Ruffier's avatar
Magali Ruffier committed
42
43
44
45
46
47
  my $sqlName = q{
    SELECT name
    FROM attrib_type
    WHERE code = ? };
  my $sqlDesc = q{
    SELECT description
48
49
50
    FROM attrib_type
    WHERE code = ? };
  foreach my $key (keys %$results) {
Magali Ruffier's avatar
Magali Ruffier committed
51
52
53
54
55
56
57
58
59
    my ($name, $description);
    my @names = @{ $prod_helper->execute_simple(-SQL => $sqlName, -PARAMS => [$key]) };
    foreach my $bit (@names) {
      $name .= $bit . " ";
    }
    my @descriptions = @{ $prod_helper->execute_simple(-SQL => $sqlDesc, -PARAMS => [$key]) };
    foreach my $bit (@descriptions) {
      $description .= $bit . " ";
    }
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
    my $value = $results->{$key};
    my $attrib = Bio::EnsEMBL::Attribute->new(
    -NAME        => $name,
    -CODE        => $key,
    -VALUE       => $value,
    -DESCRIPTION => $description, 
    );
    push(@attribs, $attrib);
  }
  $aa->store_on_Translation($translation, \@attribs);
}


sub run_pepstats {
  my ($self, $tmpfile) = @_;
  my $PEPSTATS = $self->param('binpath') . '/bin/pepstats';
  open(OUT, "$PEPSTATS -filter < $tmpfile 2>&1 |" );
  my @lines = <OUT>;
  my $attribs = {};
  my $tid;
  close(OUT);
  foreach my $line (@lines) {
    if ( $line =~ /PEPSTATS of ([^ ]+)/ ) {
      $tid = $1;
    } elsif (defined $tid) {
      if ( $line =~ /^Molecular weight = (\S+)(\s+)Residues = (\d+).*/ ) {
        $attribs->{$tid}{'NumResidues'}   = $3;
        $attribs->{$tid}{'MolecularWeight'}     = $1;
      } elsif ( $line =~ /^Average(\s+)(\S+)(\s+)(\S+)(\s+)=(\s+)(\S+)(\s+)(\S+)(\s+)=(\s+)(\S+)/ ) {
        $attribs->{$tid}{'AvgResWeight'} = $7;
        $attribs->{$tid}{'Charge'}              = $12;
      } elsif ( $line =~ /^Isoelectric(\s+)(\S+)(\s+)=(\s+)(\S+)/ ) {
        $attribs->{$tid}{'IsoPoint'}   = $5;
      }
    }
  }
  return $attribs;
}


sub delete_old_attrib {
Magali Ruffier's avatar
Magali Ruffier committed
101
  my ($self, $dba, @attrib_codes) = @_;
102
103
104
105
106
107
108
109
110
111
112
  my $helper = $dba->dbc()->sql_helper();
  my $sql = q{
     DELETE ta
     FROM translation_attrib ta, attrib_type at, translation tl, transcript tr, seq_region s, coord_system c
     WHERE at.attrib_type_id = ta.attrib_type_id
     AND ta.translation_id = tl.translation_id
     AND tl.transcript_id = tr.transcript_id
     AND tr.seq_region_id = s.seq_region_id
     AND s.coord_system_id = c.coord_system_id
     AND c.species_id = ?
     AND at.code = ? };
Magali Ruffier's avatar
Magali Ruffier committed
113
114
  foreach my $code (@attrib_codes) {
    $helper->execute_update(-SQL => $sql, -PARAMS => [$dba->species_id(), $code]) ;
115
116
117
118
119
120
121
122
123
  }
}


sub get_attrib_codes {
  my ($self) = @_;
  my $prod_dba   = $self->get_production_DBAdaptor();
  my $prod_helper     = $prod_dba->dbc()->sql_helper();
  my $sql = q{
Magali Ruffier's avatar
Magali Ruffier committed
124
    SELECT code
125
126
    FROM attrib_type
    WHERE description = 'Pepstats attributes' };
Magali Ruffier's avatar
Magali Ruffier committed
127
128
  my @attrib_codes = @{ $prod_helper->execute_simple(-SQL => $sql) };
  return @attrib_codes;
129
130
131
132
133
134
}


sub dump_translation {
  my ($self, $dba, $tmpfile) = @_;
  my $helper = $dba->dbc()->sql_helper();
135
136
  my $dbtype = $self->param('dbtype');
  my $ta     = Bio::EnsEMBL::Registry->get_adaptor($self->param('species'), $dbtype, 'translation');
137
138
139
140
141
142
143
  open(TMP, "> $tmpfile");
  my $sql = q{
    SELECT tl.translation_id
    FROM translation tl, transcript tr, seq_region s, coord_system cs
    WHERE tr.transcript_id = tl.transcript_id
    AND tr.seq_region_id = s.seq_region_id
    AND s.coord_system_id = cs.coord_system_id
144
    AND cs.species_id = ? };
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
  my @translation_ids = @{ $helper->execute_simple(-SQL => $sql, -PARAMS => [$dba->species_id()]) };
  for my $dbid (@translation_ids) {
    my $translation = $ta->fetch_by_dbID($dbid);
    my $peptide_seq = $translation->seq();
    if ( !($peptide_seq =~ m/[BZX]/ig ) ) {
      if ( $peptide_seq !~ /\n$/ ) {
        $peptide_seq .= "\n";
      }
      $peptide_seq =~ s/\*$//;
      print TMP ">$dbid\n$peptide_seq";
    }
  }
  close(TMP);
}





1;