PredictionTranscriptAdaptor.pm 9.65 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
# EnsEMBL Exon reading writing adaptor for mySQL
#
# Author: Arne Stabenau
# 
# Date : 22.11.2001
#

=head1 NAME

Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor - 
11
MySQL Database queries to load and store PredictionExons
12
13
14

=head1 SYNOPSIS

15
16
17
18
19
20
21
22
#get a prediction transcript adaptor from the database
$pta = $database_adaptor->get_PredictionTranscriptAdaptor();

#get a slice on a region of chromosome 1
$sa = $database_adaptor->get_SliceAdaptor();
$slice = $sa->fetch_by_chr_start_end('1', 100000, 200000);

#get all the prediction transcripts from the slice region
23
$prediction_transcripts = @{$pta->fetch_all_by_Slice($slice)};
24

25
26
=head1 CONTACT

27
Email questions to the EnsEMBL developer list: <ensembl-dev@ebi.ac.uk>
28
29
30
31
32
33
34
35

=cut

package Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor;

use vars qw( @ISA );
use strict;

36
use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
37
use Bio::EnsEMBL::DBSQL::DBAdaptor;
38
use Bio::EnsEMBL::DBSQL::AnalysisAdaptor;
39
40
use Bio::EnsEMBL::PredictionTranscript;

41
@ISA = qw( Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor );
42
43


44
=head2 _tablename
45

46
47
48
49
50
  Arg [1]    : none
  Example    : none
  Description: Implements abstract superclass method to define the table used
               to retrieve prediction transcripts from the database
  Returntype : string
51
  Exceptions : none
52
  Caller     : generic_fetch
53
54
55

=cut

56
57
sub _tablename {
  my $self = shift;
58

59
  return 'prediction_transcript p';
60
}
61

62
63


64
=head2 _columns
65

66
67
68
69
70
71
72
73
  Arg [1]    : none
  Example    : none
  Description: Implements abstract superclass method to define the columns
               retrieved in database queries used to create prediction 
               transcripts.
  Returntype : list of strings
  Exceptions : none
  Caller     : generic_fetch
74
75
76

=cut

77
sub _columns {
78
79
  my $self = shift;

80
81
82
83
84
85
86
87
88
89
90
  return qw( p.prediction_transcript_id
       p.contig_id
       p.contig_start
       p.contig_end
       p.contig_strand
       p.start_phase
       p.exon_rank
       p.score
       p.p_value	
       p.analysis_id
       p.exon_count);
91
92
}

93

94

95
=head2 _final_clause
96

97
98
99
100
101
  Arg [1]    : none
  Example    : none
  Description: Overrides superclass method to provide an additional table
               joining coinstraint before the SQL query is performed.
  Returntype : string
102
  Exceptions : none
103
  Caller     : generic_fetch
104
105
106

=cut

107
108
109
110
sub _final_clause {
  my $self = shift;
 
  return  'order by p.prediction_transcript_id, p.exon_rank';
111
112
113
}


114

115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
=head2 _objs_from_sth

  Arg [1]    : DBI:st $sth 
               An executed DBI statement handle
  Arg [2]    : (optional) Bio::EnsEMBL::Mapper $mapper 
               An mapper to be used to convert contig coordinates
               to assembly coordinates.
  Arg [3]    : (optional) Bio::EnsEMBL::Slice $slice
               A slice to map the prediction transcript to.   
  Example    : $p_transcripts = $self->_objs_from_sth($sth);
  Description: Creates a list of Prediction transcripts from an executed DBI
               statement handle.  The columns retrieved via the statement 
               handle must be in the same order as the columns defined by the
               _columns method.  If the slice argument is provided then the
               the prediction transcripts will be in returned in the coordinate
               system of the $slice argument.  Otherwise the prediction 
               transcripts will be returned in the RawContig coordinate system.
  Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts
  Exceptions : none
  Caller     : superclass generic_fetch
135

136
=cut
137

138
139
sub _objs_from_sth {
  my ($self, $sth, $mapper, $slice) = @_;
140
  
141
  my @out = ();
142
  
143
144
145
146
147
148
149
150
  my ($prediction_transcript_id, 
      $contig_id, $contig_start, $contig_end, $contig_strand,
      $start_phase, $exon_rank, $score, $p_value, $analysis_id,
      $exon_count );

  $sth->bind_columns(\$prediction_transcript_id, 
		    \$contig_id, \$contig_start, \$contig_end, \$contig_strand,
		    \$start_phase, \$exon_rank, \$score, \$p_value, 
151
		    \$analysis_id,\$exon_count);
152
153
154

  my $rca = $self->db->get_RawContigAdaptor;
  my $aa  = $self->db->get_AnalysisAdaptor;
155
  
156
157
158
159
160
161
162
163
164
165
  my ($analysis, $contig, $pre_trans, $ptid, $on_slice_flag, $last_end,
      $chr, $start, $end, $strand, 
      $slice_start, $slice_end, $slice_strand,
      $exon, $exon_start, $exon_end, $exon_strand);
  my (%analysis_hash, %contig_hash);

  if($slice) {
    $slice_start  = $slice->chr_start;
    $slice_end    = $slice->chr_end;
    $slice_strand = $slice->strand;
166
167
  }

168
  $on_slice_flag = 0;
169

170
171
172
173
  while($sth->fetch) {
    #create a new transcript for each new prediction transcript id
    unless(defined $pre_trans && $ptid == $prediction_transcript_id) {
      $pre_trans = Bio::EnsEMBL::PredictionTranscript->new;
174

175
176
177
178
179
180
      $ptid = $prediction_transcript_id;
      $pre_trans->dbID($ptid);
      
      unless($analysis = $analysis_hash{$analysis_id}) {
	$analysis = $aa->fetch_by_dbID($analysis_id);
	$analysis_hash{$analysis_id} = $analysis;
181
      }
182
183
184
185
186
      
      $pre_trans->analysis($analysis);
      $pre_trans->set_exon_count($exon_count);
  
      #throw away last pred_transcript if none of the exons were on the slice
187
      if(@out && $slice && $on_slice_flag == 0) {
188
	pop @out;
189
      }
190
191
      
      push( @out, $pre_trans );
192

193
194
195
      $on_slice_flag = 0;
      $last_end = undef;
    }
196

197
198
    if($slice) {
      #a slice was passed in so we want slice coords
199

200
201
202
203
204
205
206
207
208
209
210
211
212
213
      #convert contig coords to assembly coords
      ($chr, $start, $end, $strand) = 
	$mapper->fast_to_assembly($contig_id, $contig_start,
				  $contig_end, $contig_strand);
      
      #if mapped to gap skip
      next unless(defined $start);

      #determine if any portion of the transcript is on the slice
      if(($start <= $slice_end && $end >= $slice_start) ||  #exon overlaps?
	 (defined $last_end && 
	  $last_end < $slice_start && $start > $slice_end)) { #intron overlaps?
	$on_slice_flag = 1;
      }
214

215
216
217
218
219
220
221
222
223
      $last_end = $end;
      
      #convert to slice coordinates
      if($slice_strand == -1) {
	$exon_start  = $slice_end - $end   + 1;
	$exon_end    = $slice_end - $start + 1;
	$exon_strand = $strand * -1;
      } else {
	$exon_start  = $start - $slice_start + 1;
224
	$exon_end    = $end   - $slice_start   + 1;
225
226
227
228
229
230
231
232
233
234
235
236
237
238
	$exon_strand = $strand;
      }   
      
      $contig = $slice;
    } else {
      #we just want plain old contig coords
      $exon_start =  $contig_start;
      $exon_end   =  $contig_end;
      $exon_strand = $contig_strand;
      
      unless($contig = $contig_hash{$contig_id}) {
	$contig = $rca->fetch_by_dbID($contig_id);
	$contig_hash{$contig_id} = $contig;
      }
239
240
    }

241
242
243
244
245
246
247
248
249
250
251
    #create an exon and add it to the prediction transcript
    $exon = Bio::EnsEMBL::Exon->new_fast($contig, 
					 $exon_start, 
					 $exon_end,
					 $exon_strand);
    $exon->phase( $start_phase );
    $exon->end_phase( ($exon_end - $exon_start + 1 + $start_phase) % 3 );
    $exon->score( $score );
    $exon->p_value( $p_value );

    $pre_trans->add_Exon($exon, $exon_rank);
252
  }
253

254
  #throw away last  pred_transcript if it had no exons overlapping the slice
255
  if(@out && $slice && $on_slice_flag == 0) {
256
257
    pop @out;
  }
258

259
  return \@out;
260
261
262
263
264
265
}



=head2 store

266
267
268
269
270
271
272
  Arg [1]    : Bio::EnsEMBL::PredictionTranscript $pre_trans 
  Example    : $prediction_transcript_adaptor->store($pre_trans);
  Description: Stores given $pt in database. Puts dbID and Adaptor into $pt 
               object. Returns the dbID. 
  Returntype : int 
  Exceptions : on wrong argument type 
  Caller     : general 
273
274
275
276
277
278

=cut

sub store {
  my ( $self, $pre_trans ) = @_;

279
  if( ! $pre_trans->isa('Bio::EnsEMBL::PredictionTranscript') ) {
280
281
    $self->throw("$pre_trans is not a EnsEMBL PredictionTranscript " 
		 . "- not dumping!");
282
283
284
285
286
287
288
  }

  if( $pre_trans->dbID && $pre_trans->adaptor == $self ) {
    $self->warn("Already stored");
  }

  my $exon_sql = q{
289
290
291
292
293
294
    INSERT INTO prediction_transcript ( prediction_transcript_id, exon_rank, 
					contig_id, contig_start, contig_end, 
					contig_strand, start_phase, score, 
					p_value, analysis_id, exon_count )
    VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )
  };
295

296
297
298
299
  my $exonst = $self->prepare($exon_sql);

  my $exonId = undef;

Graham McVicker's avatar
Graham McVicker committed
300
  my $exons = $pre_trans->get_all_Exons();
301
302
  my $dbID = undef;
  my $rank = 1;
303
  
Graham McVicker's avatar
Graham McVicker committed
304
  for my $exon ( @$exons ) {
305
    if( ! defined $exon ) { $rank++; next; }
306
    
307
308
309
310
311
312
313
314
315
316
    my $contig_id = $exon->contig->dbID();
    my $contig_start = $exon->start();
    my $contig_end = $exon->end();
    my $contig_strand = $exon->strand();
    
    my $start_phase = $exon->phase();
    my $end_phase = $exon->end_phase();

    # this is only in PredictionExon
    my $score = $exon->score();
317
    my $p_value = $exon->p_value();
318

319
320
    my $analysis = $pre_trans->analysis->dbID;

321
    if( $rank == 1 ) {
322
323
324
      $exonst->execute( undef, 1, $contig_id, $contig_start, 
			$contig_end, $contig_strand,
			$start_phase, $score, $p_value, $analysis, 
Arne Stabenau's avatar
Arne Stabenau committed
325
			scalar( @{$exons} ));
326
      $dbID = $exonst->{'mysql_insertid'};
327
    } else {
328
329
330
      $exonst->execute( $dbID, $rank, $contig_id, $contig_start, 
			$contig_end, $contig_strand,
			$start_phase, $score, $p_value, $analysis, 
Arne Stabenau's avatar
Arne Stabenau committed
331
			scalar( @{$exons} ) );
332
333
334
335
336
337
338
339
340
341
342
    }
    $rank++;
  }

  $pre_trans->dbID( $dbID );
  $pre_trans->adaptor( $self );
  
  return $dbID;
}


343
344
345

=head2 remove

346
347
348
349
350
351
  Arg [1]    : Bio::EnsEMBL::PredictionTranscript $pt 
  Example    : $prediction_transcript_adaptor->remove($pt);
  Description: removes given prediction transcript $pt from database. 
  Returntype : none
  Exceptions : none 
  Caller     : general
352
353
354

=cut

355
356
357
358
359
360
361
362
sub remove {
  my $self = shift;
  my $pre_trans = shift;
  
  if ( ! defined $pre_trans->dbID() ) {
    return;
  }

363
364
  my $sth = $self->prepare( "DELETE FROM prediction_transcript 
                             WHERE prediction_transcript_id = ?" );
365
366
367
368
  $sth->execute( $pre_trans->dbID );

  # uhh, didnt know another way of resetting to undef ...
  $pre_trans->{dbID} = undef;
369
  $pre_trans->{adaptor} = undef;
370
371
372
}


373

374
1;