PredictionTranscriptAdaptor.pm 11.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
# EnsEMBL Exon reading writing adaptor for mySQL
#
# Author: Arne Stabenau
# 
# Date : 22.11.2001
#

=head1 NAME

Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor - 
11
MySQL Database queries to load and store PredictionExons
12
13
14

=head1 SYNOPSIS

15
16
17
18
19
20
21
22
#get a prediction transcript adaptor from the database
$pta = $database_adaptor->get_PredictionTranscriptAdaptor();

#get a slice on a region of chromosome 1
$sa = $database_adaptor->get_SliceAdaptor();
$slice = $sa->fetch_by_chr_start_end('1', 100000, 200000);

#get all the prediction transcripts from the slice region
23
$prediction_transcripts = @{$pta->fetch_all_by_Slice($slice)};
24

25
26
=head1 CONTACT

27
Email questions to the EnsEMBL developer list: <ensembl-dev@ebi.ac.uk>
28
29
30
31
32
33
34
35

=cut

package Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor;

use vars qw( @ISA );
use strict;

36
use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
37
use Bio::EnsEMBL::DBSQL::DBAdaptor;
38
use Bio::EnsEMBL::DBSQL::AnalysisAdaptor;
39
40
use Bio::EnsEMBL::PredictionTranscript;

41
@ISA = qw( Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor );
42
43


44
=head2 _tables
45

46
47
48
49
50
  Arg [1]    : none
  Example    : none
  Description: Implements abstract superclass method to define the table used
               to retrieve prediction transcripts from the database
  Returntype : string
51
  Exceptions : none
52
  Caller     : generic_fetch
53
54
55

=cut

56
sub _tables {
57
  my $self = shift;
58

59
  return ['prediction_transcript', 'p'];
60
}
61

62
63


64
=head2 _columns
65

66
67
68
69
70
71
72
73
  Arg [1]    : none
  Example    : none
  Description: Implements abstract superclass method to define the columns
               retrieved in database queries used to create prediction 
               transcripts.
  Returntype : list of strings
  Exceptions : none
  Caller     : generic_fetch
74
75
76

=cut

77
sub _columns {
78
79
  my $self = shift;

80
81
82
83
84
85
86
87
88
89
90
  return qw( p.prediction_transcript_id
       p.contig_id
       p.contig_start
       p.contig_end
       p.contig_strand
       p.start_phase
       p.exon_rank
       p.score
       p.p_value	
       p.analysis_id
       p.exon_count);
91
92
}

93

94

95
=head2 _final_clause
96

97
98
99
100
101
  Arg [1]    : none
  Example    : none
  Description: Overrides superclass method to provide an additional table
               joining coinstraint before the SQL query is performed.
  Returntype : string
102
  Exceptions : none
103
  Caller     : generic_fetch
104
105
106

=cut

107
108
109
110
sub _final_clause {
  my $self = shift;
 
  return  'order by p.prediction_transcript_id, p.exon_rank';
111
112
113
}


114

115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
=head2 _objs_from_sth

  Arg [1]    : DBI:st $sth 
               An executed DBI statement handle
  Arg [2]    : (optional) Bio::EnsEMBL::Mapper $mapper 
               An mapper to be used to convert contig coordinates
               to assembly coordinates.
  Arg [3]    : (optional) Bio::EnsEMBL::Slice $slice
               A slice to map the prediction transcript to.   
  Example    : $p_transcripts = $self->_objs_from_sth($sth);
  Description: Creates a list of Prediction transcripts from an executed DBI
               statement handle.  The columns retrieved via the statement 
               handle must be in the same order as the columns defined by the
               _columns method.  If the slice argument is provided then the
               the prediction transcripts will be in returned in the coordinate
               system of the $slice argument.  Otherwise the prediction 
               transcripts will be returned in the RawContig coordinate system.
  Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts
  Exceptions : none
  Caller     : superclass generic_fetch
135

136
=cut
137

138
139
sub _objs_from_sth {
  my ($self, $sth, $mapper, $slice) = @_;
140
  
141
  my @out = ();
142
  
143
144
145
146
147
148
149
150
  my ($prediction_transcript_id, 
      $contig_id, $contig_start, $contig_end, $contig_strand,
      $start_phase, $exon_rank, $score, $p_value, $analysis_id,
      $exon_count );

  $sth->bind_columns(\$prediction_transcript_id, 
		    \$contig_id, \$contig_start, \$contig_end, \$contig_strand,
		    \$start_phase, \$exon_rank, \$score, \$p_value, 
151
		    \$analysis_id,\$exon_count);
152
153
154

  my $rca = $self->db->get_RawContigAdaptor;
  my $aa  = $self->db->get_AnalysisAdaptor;
155
  
156
157
158
  my ($analysis, $contig, $pre_trans, $ptid, $on_slice_flag, $last_end,
      $chr, $start, $end, $strand, 
      $slice_start, $slice_end, $slice_strand,
159
      $exon, $exon_start, $exon_end, $exon_strand,
160
161
      $stable_start, $stable_end, $stable_ctg,
      $transcript_slice_start, $transcript_slice_end );
162
163
164
165
166
167
  my (%analysis_hash, %contig_hash);

  if($slice) {
    $slice_start  = $slice->chr_start;
    $slice_end    = $slice->chr_end;
    $slice_strand = $slice->strand;
168
169
  }

170
  $on_slice_flag = 0;
171

172
173
  my $prev_exon;
  my $already_merged;
174
  
175
176
177
178
  while($sth->fetch) {
    #create a new transcript for each new prediction transcript id
    unless(defined $pre_trans && $ptid == $prediction_transcript_id) {
      $pre_trans = Bio::EnsEMBL::PredictionTranscript->new;
179

180
181
182
183
184
185
      $ptid = $prediction_transcript_id;
      $pre_trans->dbID($ptid);
      
      unless($analysis = $analysis_hash{$analysis_id}) {
	$analysis = $aa->fetch_by_dbID($analysis_id);
	$analysis_hash{$analysis_id} = $analysis;
186
      }
187
188
189
190
      
      $pre_trans->analysis($analysis);
      $pre_trans->set_exon_count($exon_count);
  
191
192
193
      $prev_exon = undef;
      $already_merged = 0;

194
      if(@out) {
195
196
197
	#throw away last pt if no exons or introns were on the slice
	if($slice && ( $transcript_slice_end < 1 || 
		       $transcript_slice_start > $slice->length() )) {
198
199
200
201
202
	  pop @out;
	} else {
	  #set the stable_id of the previous prediction
	  $out[$#out]->stable_id("$stable_ctg.$stable_start.$stable_end");
	}
203
      }
204
205
      
      push( @out, $pre_trans );
206

207
208
209
210
      #reset values used for last predtrans
      $stable_start = -1;
      $stable_end   = -1;
      $stable_ctg = '';
211
212
213

      $transcript_slice_end = undef;
      $transcript_slice_start = undef;
214
    }
215

216
217
218
219
220
221
222
223
224
225
226
227
228
    #recalculate stable id values
    if($stable_start == -1 || $contig_start < $stable_start) {
      $stable_start = $contig_start;
    }
    if($contig_end > $stable_end) {
      $stable_end = $contig_end;
    }
    unless($contig = $contig_hash{$contig_id}) {
      $contig = $rca->fetch_by_dbID($contig_id);
      $contig_hash{$contig_id} = $contig;
    }
    $stable_ctg = $contig->name;

229
230
    if($slice) {
      #a slice was passed in so we want slice coords
231

232
233
234
235
236
237
238
239
240
241
242
243
244
245
      #convert contig coords to assembly coords
      ($chr, $start, $end, $strand) = 
	$mapper->fast_to_assembly($contig_id, $contig_start,
				  $contig_end, $contig_strand);
      
      #if mapped to gap skip
      next unless(defined $start);

      
      #convert to slice coordinates
      if($slice_strand == -1) {
	$exon_start  = $slice_end - $end   + 1;
	$exon_end    = $slice_end - $start + 1;
	$exon_strand = $strand * -1;
246
247
248
249
250
251
252

	#merge adjacent exons into a single exon
	if($prev_exon && $prev_exon->start == $exon->end + 1) {
	  $exon->end($prev_exon->end);
	  $already_merged++;
	}

253
254
      } else {
	$exon_start  = $start - $slice_start + 1;
255
	$exon_end    = $end   - $slice_start   + 1;
256
	$exon_strand = $strand;
257
258
259
260
261
262

	#merge adjacent exons into a single exon
	if($prev_exon && $exon->start == $prev_exon->end +1) {
	  $exon->start($prev_exon->start);
	  $already_merged++;
	}
263
      }   
264
265
266
267
268

      if( !defined $transcript_slice_start || 
	  $transcript_slice_start > $exon_start ) {
	$transcript_slice_start = $exon_start;
      }
269
      
270
271
272
273
      if( ! defined $transcript_slice_end ||
	  $transcript_slice_end < $exon_end ) {
	$transcript_slice_end = $exon_end;
      }
274
      #use slice as the contig instead of the raw contig
275
276
277
278
279
280
      $contig = $slice;
    } else {
      #we just want plain old contig coords
      $exon_start =  $contig_start;
      $exon_end   =  $contig_end;
      $exon_strand = $contig_strand;
281
282
    }

283
284
285
286
287
288
289
290
291
292
    #create an exon and add it to the prediction transcript
    $exon = Bio::EnsEMBL::Exon->new_fast($contig, 
					 $exon_start, 
					 $exon_end,
					 $exon_strand);
    $exon->phase( $start_phase );
    $exon->end_phase( ($exon_end - $exon_start + 1 + $start_phase) % 3 );
    $exon->score( $score );
    $exon->p_value( $p_value );

293
294
    $prev_exon = $exon;
    $pre_trans->add_Exon($exon, $exon_rank - $already_merged);
295
  }
296
  
297
  #throw away last  pred_transcript if it had no exons overlapping the slice
298
299
300
301
302
303
304
305
  if(@out) {
    if($slice && ( $transcript_slice_end < 1 || 
		   $transcript_slice_start > $slice->length() )) {
      pop @out;
    } else {
      #set the stable id of the last prediction transcript
      $out[$#out]->stable_id("$stable_ctg.$stable_start.$stable_end");
    }
306
  }
307

308
  return \@out;
309
310
311
312
313
314
}



=head2 store

315
316
317
318
319
  Arg [1]    : list of Bio::EnsEMBL::PredictionTranscript @pre_transcripts 
  Example    : $prediction_transcript_adaptor->store(@pre_transcripts);
  Description: Stores a list of given prediction transcripts in database. 
               Puts dbID and Adaptor into each object stored object.
  Returntype : none
320
321
  Exceptions : on wrong argument type 
  Caller     : general 
322
323
324
325

=cut

sub store {
326
  my ( $self, @pre_transcripts ) = @_;
327
328

  my $exon_sql = q{
329
330
331
332
333
334
      INSERT INTO prediction_transcript ( prediction_transcript_id, exon_rank, 
					  contig_id, contig_start, contig_end, 
					  contig_strand, start_phase, score, 
					  p_value, analysis_id, exon_count )
	VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )
      };
335

336
337
  my $exonst = $self->prepare($exon_sql);

338
339
340
341
342
  foreach my $pre_trans (@pre_transcripts) {
    if( ! $pre_trans->isa('Bio::EnsEMBL::PredictionTranscript') ) {
      $self->throw("$pre_trans is not a EnsEMBL PredictionTranscript " 
		   . "- not dumping!");
    }
343
    
344
345
346
347
348
    if( $pre_trans->dbID && $pre_trans->adaptor == $self ) {
      $self->warn("Already stored");
    }
        
    my $exonId = undef;    
349
    my @pt_exons = @{$pre_trans->get_all_Exons()};
350
351
    my $dbID = undef;
    my $rank = 1;
352
    
353
354
355
356
357
358
359
360
361
362
363
364
365
366
    my @exons;
    foreach my $e (@pt_exons) {
      if($e && (!$e->contig || $e->contig->isa('Bio::EnsEMBL::Slice'))) {
	$self->throw('PredictionTranscript must be in contig coords to store');
      }
      
      if($e && $e->isa('Bio::EnsEMBL::StickyExon')) {
	push @exons, @{$e->get_all_component_Exons};
      } else {
	push @exons, $e;
      }
    }

    for my $exon ( @exons ) {
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
      if( ! defined $exon ) { $rank++; next; }
      
      my $contig_id = $exon->contig->dbID();
      my $contig_start = $exon->start();
      my $contig_end = $exon->end();
      my $contig_strand = $exon->strand();
      my $start_phase = $exon->phase();
      my $end_phase = $exon->end_phase();
      my $score = $exon->score();
      my $p_value = $exon->p_value();
      my $analysis = $pre_trans->analysis->dbID;
      
      if( $rank == 1 ) {
	$exonst->execute( undef, 1, $contig_id, $contig_start, 
			  $contig_end, $contig_strand,
			  $start_phase, $score, $p_value, $analysis, 
			  scalar( @{$exons} ));
	$dbID = $exonst->{'mysql_insertid'};
      } else {
	$exonst->execute( $dbID, $rank, $contig_id, $contig_start, 
			  $contig_end, $contig_strand,
			  $start_phase, $score, $p_value, $analysis, 
			  scalar( @{$exons} ) );
      }
      $rank++;
392
    }
393
394
395
    
    $pre_trans->dbID( $dbID );
    $pre_trans->adaptor( $self );
396
397
  }

398
  $exonst->finish;
399
400
401
}


402
403
404

=head2 remove

405
406
407
408
409
410
  Arg [1]    : Bio::EnsEMBL::PredictionTranscript $pt 
  Example    : $prediction_transcript_adaptor->remove($pt);
  Description: removes given prediction transcript $pt from database. 
  Returntype : none
  Exceptions : none 
  Caller     : general
411
412
413

=cut

414
415
416
417
418
419
420
421
sub remove {
  my $self = shift;
  my $pre_trans = shift;
  
  if ( ! defined $pre_trans->dbID() ) {
    return;
  }

422
423
  my $sth = $self->prepare( "DELETE FROM prediction_transcript 
                             WHERE prediction_transcript_id = ?" );
424
425
426
427
  $sth->execute( $pre_trans->dbID );

  # uhh, didnt know another way of resetting to undef ...
  $pre_trans->{dbID} = undef;
428
  $pre_trans->{adaptor} = undef;
429
430
431
}


432

433
1;