PredictionTranscriptAdaptor.pm 10.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
# EnsEMBL Exon reading writing adaptor for mySQL
#
# Author: Arne Stabenau
# 
# Date : 22.11.2001
#

=head1 NAME

Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor - 
11
MySQL Database queries to load and store PredictionExons
12
13
14

=head1 SYNOPSIS

15
16
17
18
19
20
21
22
#get a prediction transcript adaptor from the database
$pta = $database_adaptor->get_PredictionTranscriptAdaptor();

#get a slice on a region of chromosome 1
$sa = $database_adaptor->get_SliceAdaptor();
$slice = $sa->fetch_by_chr_start_end('1', 100000, 200000);

#get all the prediction transcripts from the slice region
23
$prediction_transcripts = @{$pta->fetch_all_by_Slice($slice)};
24

25
26
=head1 CONTACT

27
Email questions to the EnsEMBL developer list: <ensembl-dev@ebi.ac.uk>
28
29
30
31
32
33
34
35

=cut

package Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor;

use vars qw( @ISA );
use strict;

36
use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
37
use Bio::EnsEMBL::DBSQL::DBAdaptor;
38
use Bio::EnsEMBL::DBSQL::AnalysisAdaptor;
39
40
use Bio::EnsEMBL::PredictionTranscript;

41
@ISA = qw( Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor );
42
43


44
=head2 _tables
45

46
47
48
49
50
  Arg [1]    : none
  Example    : none
  Description: Implements abstract superclass method to define the table used
               to retrieve prediction transcripts from the database
  Returntype : string
51
  Exceptions : none
52
  Caller     : generic_fetch
53
54
55

=cut

56
sub _tables {
57
  my $self = shift;
58

59
  return ['prediction_transcript', 'p'];
60
}
61

62
63


64
=head2 _columns
65

66
67
68
69
70
71
72
73
  Arg [1]    : none
  Example    : none
  Description: Implements abstract superclass method to define the columns
               retrieved in database queries used to create prediction 
               transcripts.
  Returntype : list of strings
  Exceptions : none
  Caller     : generic_fetch
74
75
76

=cut

77
sub _columns {
78
79
  my $self = shift;

80
81
82
83
84
85
86
87
88
89
90
  return qw( p.prediction_transcript_id
       p.contig_id
       p.contig_start
       p.contig_end
       p.contig_strand
       p.start_phase
       p.exon_rank
       p.score
       p.p_value	
       p.analysis_id
       p.exon_count);
91
92
}

93

94

95
=head2 _final_clause
96

97
98
99
100
101
  Arg [1]    : none
  Example    : none
  Description: Overrides superclass method to provide an additional table
               joining coinstraint before the SQL query is performed.
  Returntype : string
102
  Exceptions : none
103
  Caller     : generic_fetch
104
105
106

=cut

107
108
109
110
sub _final_clause {
  my $self = shift;
 
  return  'order by p.prediction_transcript_id, p.exon_rank';
111
112
113
}


114

115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
=head2 _objs_from_sth

  Arg [1]    : DBI:st $sth 
               An executed DBI statement handle
  Arg [2]    : (optional) Bio::EnsEMBL::Mapper $mapper 
               An mapper to be used to convert contig coordinates
               to assembly coordinates.
  Arg [3]    : (optional) Bio::EnsEMBL::Slice $slice
               A slice to map the prediction transcript to.   
  Example    : $p_transcripts = $self->_objs_from_sth($sth);
  Description: Creates a list of Prediction transcripts from an executed DBI
               statement handle.  The columns retrieved via the statement 
               handle must be in the same order as the columns defined by the
               _columns method.  If the slice argument is provided then the
               the prediction transcripts will be in returned in the coordinate
               system of the $slice argument.  Otherwise the prediction 
               transcripts will be returned in the RawContig coordinate system.
  Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts
  Exceptions : none
  Caller     : superclass generic_fetch
135

136
=cut
137

138
139
sub _objs_from_sth {
  my ($self, $sth, $mapper, $slice) = @_;
140
  
141
  my @out = ();
142
  
143
144
145
146
147
148
149
150
  my ($prediction_transcript_id, 
      $contig_id, $contig_start, $contig_end, $contig_strand,
      $start_phase, $exon_rank, $score, $p_value, $analysis_id,
      $exon_count );

  $sth->bind_columns(\$prediction_transcript_id, 
		    \$contig_id, \$contig_start, \$contig_end, \$contig_strand,
		    \$start_phase, \$exon_rank, \$score, \$p_value, 
151
		    \$analysis_id,\$exon_count);
152
153
154

  my $rca = $self->db->get_RawContigAdaptor;
  my $aa  = $self->db->get_AnalysisAdaptor;
155
  
156
157
158
  my ($analysis, $contig, $pre_trans, $ptid, $on_slice_flag, $last_end,
      $chr, $start, $end, $strand, 
      $slice_start, $slice_end, $slice_strand,
159
      $exon, $exon_start, $exon_end, $exon_strand,
160
161
      $stable_start, $stable_end, $stable_ctg,
      $transcript_slice_start, $transcript_slice_end );
162
163
164
165
166
167
  my (%analysis_hash, %contig_hash);

  if($slice) {
    $slice_start  = $slice->chr_start;
    $slice_end    = $slice->chr_end;
    $slice_strand = $slice->strand;
168
169
  }

170
  $on_slice_flag = 0;
171

172
  
173
174
175
176
  while($sth->fetch) {
    #create a new transcript for each new prediction transcript id
    unless(defined $pre_trans && $ptid == $prediction_transcript_id) {
      $pre_trans = Bio::EnsEMBL::PredictionTranscript->new;
177

178
179
180
181
182
183
      $ptid = $prediction_transcript_id;
      $pre_trans->dbID($ptid);
      
      unless($analysis = $analysis_hash{$analysis_id}) {
	$analysis = $aa->fetch_by_dbID($analysis_id);
	$analysis_hash{$analysis_id} = $analysis;
184
      }
185
186
187
188
      
      $pre_trans->analysis($analysis);
      $pre_trans->set_exon_count($exon_count);
  
189
      if(@out) {
190
191
192
	#throw away last pt if no exons or introns were on the slice
	if($slice && ( $transcript_slice_end < 1 || 
		       $transcript_slice_start > $slice->length() )) {
193
194
195
196
197
	  pop @out;
	} else {
	  #set the stable_id of the previous prediction
	  $out[$#out]->stable_id("$stable_ctg.$stable_start.$stable_end");
	}
198
      }
199
200
      
      push( @out, $pre_trans );
201

202
203
204
205
      #reset values used for last predtrans
      $stable_start = -1;
      $stable_end   = -1;
      $stable_ctg = '';
206
207
208

      $transcript_slice_end = undef;
      $transcript_slice_start = undef;
209
    }
210

211
212
213
214
215
216
217
218
219
220
221
222
223
    #recalculate stable id values
    if($stable_start == -1 || $contig_start < $stable_start) {
      $stable_start = $contig_start;
    }
    if($contig_end > $stable_end) {
      $stable_end = $contig_end;
    }
    unless($contig = $contig_hash{$contig_id}) {
      $contig = $rca->fetch_by_dbID($contig_id);
      $contig_hash{$contig_id} = $contig;
    }
    $stable_ctg = $contig->name;

224
225
    if($slice) {
      #a slice was passed in so we want slice coords
226

227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
      #convert contig coords to assembly coords
      ($chr, $start, $end, $strand) = 
	$mapper->fast_to_assembly($contig_id, $contig_start,
				  $contig_end, $contig_strand);
      
      #if mapped to gap skip
      next unless(defined $start);

      
      #convert to slice coordinates
      if($slice_strand == -1) {
	$exon_start  = $slice_end - $end   + 1;
	$exon_end    = $slice_end - $start + 1;
	$exon_strand = $strand * -1;
      } else {
	$exon_start  = $start - $slice_start + 1;
243
	$exon_end    = $end   - $slice_start   + 1;
244
245
	$exon_strand = $strand;
      }   
246
247
248
249
250

      if( !defined $transcript_slice_start || 
	  $transcript_slice_start > $exon_start ) {
	$transcript_slice_start = $exon_start;
      }
251
      
252
253
254
255
      if( ! defined $transcript_slice_end ||
	  $transcript_slice_end < $exon_end ) {
	$transcript_slice_end = $exon_end;
      }
256
      #use slice as the contig instead of the raw contig
257
258
259
260
261
262
      $contig = $slice;
    } else {
      #we just want plain old contig coords
      $exon_start =  $contig_start;
      $exon_end   =  $contig_end;
      $exon_strand = $contig_strand;
263
264
    }

265
266
267
268
269
270
271
272
273
274
275
    #create an exon and add it to the prediction transcript
    $exon = Bio::EnsEMBL::Exon->new_fast($contig, 
					 $exon_start, 
					 $exon_end,
					 $exon_strand);
    $exon->phase( $start_phase );
    $exon->end_phase( ($exon_end - $exon_start + 1 + $start_phase) % 3 );
    $exon->score( $score );
    $exon->p_value( $p_value );

    $pre_trans->add_Exon($exon, $exon_rank);
276
  }
277
  
278
  #throw away last  pred_transcript if it had no exons overlapping the slice
279
280
281
282
283
284
285
286
  if(@out) {
    if($slice && ( $transcript_slice_end < 1 || 
		   $transcript_slice_start > $slice->length() )) {
      pop @out;
    } else {
      #set the stable id of the last prediction transcript
      $out[$#out]->stable_id("$stable_ctg.$stable_start.$stable_end");
    }
287
  }
288

289
  return \@out;
290
291
292
293
294
295
}



=head2 store

296
297
298
299
300
  Arg [1]    : list of Bio::EnsEMBL::PredictionTranscript @pre_transcripts 
  Example    : $prediction_transcript_adaptor->store(@pre_transcripts);
  Description: Stores a list of given prediction transcripts in database. 
               Puts dbID and Adaptor into each object stored object.
  Returntype : none
301
302
  Exceptions : on wrong argument type 
  Caller     : general 
303
304
305
306

=cut

sub store {
307
  my ( $self, @pre_transcripts ) = @_;
308
309

  my $exon_sql = q{
310
311
312
313
314
315
      INSERT INTO prediction_transcript ( prediction_transcript_id, exon_rank, 
					  contig_id, contig_start, contig_end, 
					  contig_strand, start_phase, score, 
					  p_value, analysis_id, exon_count )
	VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )
      };
316

317
318
  my $exonst = $self->prepare($exon_sql);

319
320
321
322
323
  foreach my $pre_trans (@pre_transcripts) {
    if( ! $pre_trans->isa('Bio::EnsEMBL::PredictionTranscript') ) {
      $self->throw("$pre_trans is not a EnsEMBL PredictionTranscript " 
		   . "- not dumping!");
    }
324
    
325
326
327
328
329
330
331
332
    if( $pre_trans->dbID && $pre_trans->adaptor == $self ) {
      $self->warn("Already stored");
    }
        
    my $exonId = undef;    
    my $exons = $pre_trans->get_all_Exons();
    my $dbID = undef;
    my $rank = 1;
333
    
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
    for my $exon ( @$exons ) {
      if( ! defined $exon ) { $rank++; next; }
      
      my $contig_id = $exon->contig->dbID();
      my $contig_start = $exon->start();
      my $contig_end = $exon->end();
      my $contig_strand = $exon->strand();
      
      my $start_phase = $exon->phase();
      my $end_phase = $exon->end_phase();
      
      # this is only in PredictionExon
      my $score = $exon->score();
      my $p_value = $exon->p_value();
      
      my $analysis = $pre_trans->analysis->dbID;
      
      if( $rank == 1 ) {
	$exonst->execute( undef, 1, $contig_id, $contig_start, 
			  $contig_end, $contig_strand,
			  $start_phase, $score, $p_value, $analysis, 
			  scalar( @{$exons} ));
	$dbID = $exonst->{'mysql_insertid'};
      } else {
	$exonst->execute( $dbID, $rank, $contig_id, $contig_start, 
			  $contig_end, $contig_strand,
			  $start_phase, $score, $p_value, $analysis, 
			  scalar( @{$exons} ) );
      }
      $rank++;
364
    }
365
366
367
    
    $pre_trans->dbID( $dbID );
    $pre_trans->adaptor( $self );
368
369
  }

370
  $exonst->finish;
371
372
373
}


374
375
376

=head2 remove

377
378
379
380
381
382
  Arg [1]    : Bio::EnsEMBL::PredictionTranscript $pt 
  Example    : $prediction_transcript_adaptor->remove($pt);
  Description: removes given prediction transcript $pt from database. 
  Returntype : none
  Exceptions : none 
  Caller     : general
383
384
385

=cut

386
387
388
389
390
391
392
393
sub remove {
  my $self = shift;
  my $pre_trans = shift;
  
  if ( ! defined $pre_trans->dbID() ) {
    return;
  }

394
395
  my $sth = $self->prepare( "DELETE FROM prediction_transcript 
                             WHERE prediction_transcript_id = ?" );
396
397
398
399
  $sth->execute( $pre_trans->dbID );

  # uhh, didnt know another way of resetting to undef ...
  $pre_trans->{dbID} = undef;
400
  $pre_trans->{adaptor} = undef;
401
402
403
}


404

405
1;