DnaAlignFeatureAdaptor.pm 16.1 KB
Newer Older
1
#
2
# EnsEMBL module for Bio::EnsEMBL::DnaAlignFeatureAdaptor
3
#
4
# Copyright (c) 2003 EnsEMBL
5
6
7
8
9
10
11
12
13
#
# You may distribute this module under the same terms as perl itself

=head1 NAME

Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor - Adaptor for DnaAlignFeatures

=head1 SYNOPSIS

14
  $dafa = $registry->get_adaptor( 'Human', 'Core', 'DnaAlignFeature' );
15

16
  my @features = @{ $dafa->fetch_by_Slice($slice) };
17

18
  $dafa->store(@features);
19
20
21

=head1 DESCRIPTION

22
23
24
25
This is an adaptor responsible for the retrieval and storage of 
DnaDnaAlignFeatures from the database. This adaptor inherits most of its 
functionality from the BaseAlignFeatureAdaptor and BaseFeatureAdaptor 
superclasses.
26

27
=head1 CONTACT
28

29
Post questions to the EnsEMBL development list <ensembl-dev@ebi.ac.uk>
30
31
32
33
34
35
36

=cut


package Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor;
use vars qw(@ISA);
use strict;
37
use Data::Dumper;
38
use Bio::EnsEMBL::DnaDnaAlignFeature;
39
use Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor;
Laura Clarke's avatar
 
Laura Clarke committed
40
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
41

42
@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor);
43
44


45
=head2 _tables
Graham McVicker's avatar
Graham McVicker committed
46
47

  Args       : none
48
  Example    : @tabs = $self->_tables
Graham McVicker's avatar
Graham McVicker committed
49
  Description: PROTECTED implementation of the abstract method inherited from
50
51
               BaseFeatureAdaptor.  Returns list of [tablename, alias] pairs
  Returntype : list of listrefs of strings
Graham McVicker's avatar
Graham McVicker committed
52
53
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch
54
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
55
56
57

=cut

58
sub _tables {
59
  my $self = shift;
60

61
  return (['dna_align_feature', 'daf'],['external_db','exdb']);
62
63
}

Graham McVicker's avatar
Graham McVicker committed
64

65
66
67
68
sub _left_join{
    return (['external_db',"exdb.external_db_id = daf.external_db_id"]);
}

Graham McVicker's avatar
Graham McVicker committed
69
70
71
72
73
74
75
76
77
=head2 _columns

  Args       : none
  Example    : @columns = $self->_columns
  Description: PROTECTED implementation of abstract superclass method.  
               Returns a list of columns that are needed for object creation.
  Returntype : list of strings
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch
78
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
79
80
81

=cut

82
83
sub _columns {
  my $self = shift;
84

85
  #warning, implementation of _objs_from_sth method depends on order of list
86
87
88
89
90
91
92
93
94
95
96
97
98
  return qw(daf.dna_align_feature_id
            daf.seq_region_id
            daf.analysis_id
            daf.seq_region_start
            daf.seq_region_end
            daf.seq_region_strand
            daf.hit_start
            daf.hit_end
            daf.hit_name
            daf.hit_strand
            daf.cigar_line
            daf.evalue
            daf.perc_ident
99
100
            daf.score
            daf.external_db_id
Eugene Kulesha's avatar
typo  
Eugene Kulesha committed
101
            daf.hcoverage
102
103
104
	    daf.external_data
	    exdb.db_name
	    exdb.db_display_name);
105
}
106

107

108
=head2 store
109

110
  Arg [1]    : list of Bio::EnsEMBL::DnaAlignFeatures @feats
Graham McVicker's avatar
Graham McVicker committed
111
               the features to store in the database
112
  Example    : $dna_align_feature_adaptor->store(@features);
Graham McVicker's avatar
Graham McVicker committed
113
114
  Description: Stores a list of DnaAlignFeatures in the database
  Returntype : none
115
116
117
118
119
120
121
122
123
  Exceptions : throw if any of the provided features cannot be stored
               which may occur if:
                 * The feature does not have an associate Slice
                 * The feature does not have an associated analysis
                 * The Slice the feature is associated with is on a seq_region
                   unknown to this database
               A warning is given if:
                 * The feature has already been stored in this db
  Caller     : Pipeline
124
  Status     : Stable
125
126
127

=cut

128
sub store {
129
130
131
  my ($self, @feats) = @_;

  throw("Must call store with features") if( scalar(@feats) == 0 );
132
133
134

  my @tabs = $self->_tables;
  my ($tablename) = @{$tabs[0]};
135
136
137
138
139
140
141

  my $db = $self->db();
  my $analysis_adaptor = $db->get_AnalysisAdaptor();

  my $sth = $self->prepare(
     "INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end,
                             seq_region_strand, hit_start, hit_end,
142
                             hit_strand, hit_name, cigar_line,
143
144
                             analysis_id, score, evalue, perc_ident, external_db_id, hcoverage)
     VALUES (?,?,?,?,?,?,?,?,?,?,?, ?, ?, ?, ?)");
145

146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
 FEATURE: foreach my $feat ( @feats ) {
    if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") ) {
      throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature,"
            . " not a [".ref($feat)."].");
    }

    if($feat->is_stored($db)) {
      warning("DnaDnaAlignFeature [".$feat->dbID."] is already stored" .
              " in this database.");
      next FEATURE;
    }

    my $hstart = $feat->hstart();
    my $hend   = $feat->hend();
    my $hstrand = $feat->hstrand();
    $self->_check_start_end_strand($hstart,$hend, $hstrand);

    my $cigar_string = $feat->cigar_string();
    if(!$cigar_string) {
      $cigar_string = $feat->length() . 'M';
      warning("DnaDnaAlignFeature does not define a cigar_string.\n" .
              "Assuming ungapped block with cigar_line=$cigar_string .");
168
    }
169
170
171
172
173
174
175
176

    my $hseqname = $feat->hseqname();
    if(!$hseqname) {
      throw("DnaDnaAlignFeature must define an hseqname.");
    }

    if(!defined($feat->analysis)) {
      throw("An analysis must be attached to the features to be stored.");
177
178
    }

179
180
181
    #store the analysis if it has not been stored yet
    if(!$feat->analysis->is_stored($db)) {
      $analysis_adaptor->store($feat->analysis());
182
    }
183

184
185
186
    my $original = $feat;
    my $seq_region_id;
    ($feat, $seq_region_id) = $self->_pre_store($feat);
187
188
189
190
191
192
193
194
195
196
    $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
    $sth->bind_param(2,$feat->start,SQL_INTEGER);
    $sth->bind_param(3,$feat->end,SQL_INTEGER);
    $sth->bind_param(4,$feat->strand,SQL_TINYINT);
    $sth->bind_param(5,$hstart,SQL_INTEGER);
    $sth->bind_param(6,$hend,SQL_INTEGER);
    $sth->bind_param(7,$hstrand,SQL_TINYINT);
    $sth->bind_param(8,$hseqname,SQL_VARCHAR);
    $sth->bind_param(9,$cigar_string,SQL_LONGVARCHAR);
    $sth->bind_param(10,$feat->analysis->dbID,SQL_INTEGER);
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
    $sth->bind_param(11,$feat->score,SQL_DOUBLE);
    $sth->bind_param(12,$feat->p_value,SQL_DOUBLE);
    $sth->bind_param(13,$feat->percent_id,SQL_FLOAT);
    $sth->bind_param(14,$feat->external_db_id,SQL_INTEGER);
    $sth->bind_param(15,$feat->hcoverage,SQL_DOUBLE);

    $sth->execute();
    $original->dbID($sth->{'mysql_insertid'});
    $original->adaptor($self);
  }

  $sth->finish();
}


sub save {
  my ($self, $features) = @_;

  my @feats = @$features;
  throw("Must call store with features") if( scalar(@feats) == 0 );

  my @tabs = $self->_tables;
  my ($tablename) = @{$tabs[0]};

  my $db = $self->db();
  my $analysis_adaptor = $db->get_AnalysisAdaptor();

224
  my $sql = qq{INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end, seq_region_strand, hit_start, hit_end, hit_strand, hit_name, cigar_line, analysis_id, score, evalue, perc_ident, external_db_id, hcoverage, external_data) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)};
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269

  my $sth = $self->prepare($sql);
     
 FEATURE: foreach my $feat ( @feats ) {
    if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") ) {
      throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature,"
            . " not a [".ref($feat)."].");
    }

    if($feat->is_stored($db)) {
      warning("DnaDnaAlignFeature [".$feat->dbID."] is already stored" .
              " in this database.");
      next FEATURE;
    }

    my $hstart = defined $feat->hstart ? $feat->hstart : $feat->start ;
    my $hend   = defined $feat->hend ? $feat->hend : $feat->end;
    my $hstrand = defined $feat->hstrand ? $feat->hstrand : $feat->strand;
     $self->_check_start_end_strand($hstart,$hend, $hstrand);

    my $cigar_string = $feat->cigar_string();
    if(!$cigar_string) {
      $cigar_string = $feat->length() . 'M';
      warning("DnaDnaAlignFeature does not define a cigar_string.\n" .
              "Assuming ungapped block with cigar_line=$cigar_string .");
    }

    my $hseqname = $feat->hseqname();
    if(!$hseqname) {
      throw("DnaDnaAlignFeature must define an hseqname.");
    }

    if(!defined($feat->analysis)) {
      throw("An analysis must be attached to the features to be stored.");
    }

    #store the analysis if it has not been stored yet
    if(!$feat->analysis->is_stored($db)) {
      $analysis_adaptor->store($feat->analysis());
    }

    my $original = $feat;
    my $seq_region_id;
    ($feat, $seq_region_id) = $self->_pre_store_userdata($feat);

270
271
    my $extra_data = $feat->extra_data ? $self->dump_data($feat->extra_data) : '';

272
273
274
275
276
277
278
279
280
281
    $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
    $sth->bind_param(2,$feat->start,SQL_INTEGER);
    $sth->bind_param(3,$feat->end,SQL_INTEGER);
    $sth->bind_param(4,$feat->strand,SQL_TINYINT);
    $sth->bind_param(5,$hstart,SQL_INTEGER);
    $sth->bind_param(6,$hend,SQL_INTEGER);
    $sth->bind_param(7,$hstrand,SQL_TINYINT);
    $sth->bind_param(8,$hseqname,SQL_VARCHAR);
    $sth->bind_param(9,$cigar_string,SQL_LONGVARCHAR);
    $sth->bind_param(10,$feat->analysis->dbID,SQL_INTEGER);
282
283
284
    $sth->bind_param(11,$feat->score,SQL_DOUBLE);
    $sth->bind_param(12,$feat->p_value,SQL_DOUBLE);
    $sth->bind_param(13,$feat->percent_id,SQL_FLOAT);
285
286
    $sth->bind_param(14,$feat->external_db_id,SQL_INTEGER);
    $sth->bind_param(15,$feat->hcoverage,SQL_DOUBLE);
287
    $sth->bind_param(16,$extra_data,SQL_LONGVARCHAR);
288
289

    $sth->execute();
290
291
    $original->dbID($sth->{'mysql_insertid'});
    $original->adaptor($self);
292
  }
293
294

  $sth->finish();
295
296
297
}


298
=head2 _objs_from_sth
Graham McVicker's avatar
Graham McVicker committed
299

300
301
302
303
  Arg [1]    : DBI statement handle $sth
               an exectuted DBI statement handle generated by selecting 
               the columns specified by _columns() from the table specified 
               by _table()
Graham McVicker's avatar
Graham McVicker committed
304
305
306
  Example    : @dna_dna_align_feats = $self->_obj_from_hashref
  Description: PROTECTED implementation of superclass abstract method. 
               Creates DnaDnaAlignFeature objects from a DBI hashref
307
  Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
Graham McVicker's avatar
Graham McVicker committed
308
309
  Exceptions : none
  Caller     : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch
310
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
311
312

=cut
313

314
sub _objs_from_sth {
315
  my ($self, $sth, $mapper, $dest_slice) = @_;
316

317
318
319
320
321
  #
  # This code is ugly because an attempt has been made to remove as many
  # function calls as possible for speed purposes.  Thus many caches and
  # a fair bit of gymnastics is used.
  #
322

323
324
  my $sa = $self->db()->get_SliceAdaptor();
  my $aa = $self->db->get_AnalysisAdaptor();
325

326
327
328
329
330
331
332
333
  my @features;
  my %analysis_hash;
  my %slice_hash;
  my %sr_name_hash;
  my %sr_cs_hash;

  my($dna_align_feature_id, $seq_region_id, $analysis_id, $seq_region_start,
     $seq_region_end, $seq_region_strand, $hit_start, $hit_end, $hit_name,
334
     $hit_strand, $cigar_line, $evalue, $perc_ident, $score,
335
     $external_db_id, $hcoverage, $extra_data, $external_db_name, $external_display_db_name );
336
337
338
339

  $sth->bind_columns(
    \$dna_align_feature_id, \$seq_region_id, \$analysis_id, \$seq_region_start,
    \$seq_region_end, \$seq_region_strand, \$hit_start, \$hit_end, \$hit_name,
340
    \$hit_strand, \$cigar_line, \$evalue, \$perc_ident, \$score,
341
    \$external_db_id, \$hcoverage, \$extra_data, \$external_db_name, \$external_display_db_name );
342

343
344
345
346
347
348
349
350
351
352
353
354
355
356
357

  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }
358

359
360
361
362
  my $dest_slice_start;
  my $dest_slice_end;
  my $dest_slice_strand;
  my $dest_slice_length;
363
  my $dest_slice_sr_name;
364
  my $dest_slice_seq_region_id;
365

366
367
368
369
370
  if($dest_slice) {
    $dest_slice_start  = $dest_slice->start();
    $dest_slice_end    = $dest_slice->end();
    $dest_slice_strand = $dest_slice->strand();
    $dest_slice_length = $dest_slice->length();
371
    $dest_slice_sr_name = $dest_slice->seq_region_name();
372
    $dest_slice_seq_region_id = $dest_slice->get_seq_region_id();
373
  }
374

375
376
377
378
  FEATURE: while($sth->fetch()) {
    #get the analysis object
    my $analysis = $analysis_hash{$analysis_id} ||=
      $aa->fetch_by_dbID($analysis_id);
379

380
381
    #get the slice object
    my $slice = $slice_hash{"ID:".$seq_region_id};
382

383
384
385
386
387
388
    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }
389

390
391
    my $sr_name = $sr_name_hash{$seq_region_id};
    my $sr_cs   = $sr_cs_hash{$seq_region_id};
392
393
394
395
396
397
    #
    # remap the feature coordinates to another coord system
    # if a mapper was provided
    #
    if($mapper) {

398
      ($seq_region_id,$seq_region_start,$seq_region_end,$seq_region_strand) =
399
400
401
402
        $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end,
                          $seq_region_strand, $sr_cs);

      #skip features that map to gaps or coord system boundaries
403
      next FEATURE if(!defined($seq_region_id));
404
405
406

      #get a slice in the coord system we just mapped to
      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
407
408
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
409
      } else {
410
411
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
412
413
      }
    }
414

415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
    #
    # If a destination slice was provided convert the coords
    # If the dest_slice starts at 1 and is foward strand, nothing needs doing
    #
    if($dest_slice) {
      if($dest_slice_start != 1 || $dest_slice_strand != 1) {
        if($dest_slice_strand == 1) {
          $seq_region_start = $seq_region_start - $dest_slice_start + 1;
          $seq_region_end   = $seq_region_end   - $dest_slice_start + 1;
        } else {
          my $tmp_seq_region_start = $seq_region_start;
          $seq_region_start = $dest_slice_end - $seq_region_end + 1;
          $seq_region_end   = $dest_slice_end - $tmp_seq_region_start + 1;
          $seq_region_strand *= -1;
        }

        #throw away features off the end of the requested slice
432
        if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
433
	  ( $dest_slice_seq_region_id ne $seq_region_id ))  {
434
435
436
437
          next FEATURE;
        }
      }
      $slice = $dest_slice;
438
    }
439

440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
    # Finally, create the new DnaAlignFeature.
    push( @features,
          $self->_create_feature_fast(
                                  'Bio::EnsEMBL::DnaDnaAlignFeature', {
                                    'slice'      => $slice,
                                    'start'      => $seq_region_start,
                                    'end'        => $seq_region_end,
                                    'strand'     => $seq_region_strand,
                                    'hseqname'   => $hit_name,
                                    'hstart'     => $hit_start,
                                    'hend'       => $hit_end,
                                    'hstrand'    => $hit_strand,
                                    'score'      => $score,
                                    'p_value'    => $evalue,
                                    'percent_id' => $perc_ident,
                                    'cigar_string' => $cigar_line,
                                    'analysis'     => $analysis,
                                    'adaptor'      => $self,
                                    'dbID' => $dna_align_feature_id,
                                    'external_db_id' => $external_db_id,
460
461
                                    'hcoverage'      => $hcoverage,
				    'extra_data'     => $extra_data ? $self->get_dumped_data($extra_data) : '',
462
463
				    'dbname'         => $external_db_name,
				    'db_display_name' => $external_display_db_name
464
465
                                  } ) );

466
  }
467

468
469
  return \@features;
}
470

Glenn Proctor's avatar
Glenn Proctor committed
471
472
473
474
=head2 list_dbIDs

  Arg [1]    : none
  Example    : @feature_ids = @{$dna_align_feature_adaptor->list_dbIDs()};
475
476
  Description: Gets an array of internal ids for all dna align features in 
               the current db
477
  Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
Glenn Proctor's avatar
Glenn Proctor committed
478
479
480
  Returntype : list of ints
  Exceptions : none
  Caller     : ?
481
  Status     : Stable
Glenn Proctor's avatar
Glenn Proctor committed
482
483
484
485

=cut

sub list_dbIDs {
486
   my ($self, $ordered) = @_;
Glenn Proctor's avatar
Glenn Proctor committed
487

488
   return $self->_list_dbIDs("dna_align_feature",undef, $ordered);
Glenn Proctor's avatar
Glenn Proctor committed
489
490
}

491
492
493
1;