DnaAlignFeatureAdaptor.pm 17.8 KB
Newer Older
1
#
2
# EnsEMBL module for Bio::EnsEMBL::DnaAlignFeatureAdaptor
3
#
4
# Copyright (c) 2003 EnsEMBL
5
6
7
8
9
10
11
12
13
#
# You may distribute this module under the same terms as perl itself

=head1 NAME

Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor - Adaptor for DnaAlignFeatures

=head1 SYNOPSIS

14
  $dafa = $registry->get_adaptor( 'Human', 'Core', 'DnaAlignFeature' );
15

16
  my @features = @{ $dafa->fetch_all_by_Slice($slice) };
17

18
  $dafa->store(@features);
19
20
21

=head1 DESCRIPTION

22
23
24
25
This is an adaptor responsible for the retrieval and storage of 
DnaDnaAlignFeatures from the database. This adaptor inherits most of its 
functionality from the BaseAlignFeatureAdaptor and BaseFeatureAdaptor 
superclasses.
26

27
=head1 CONTACT
28

29
Post questions to the EnsEMBL development list <ensembl-dev@ebi.ac.uk>
30
31
32
33
34
35
36

=cut


package Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor;
use vars qw(@ISA);
use strict;
37
use Data::Dumper;
38
use Bio::EnsEMBL::DnaDnaAlignFeature;
39
use Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor;
Laura Clarke's avatar
 
Laura Clarke committed
40
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
41

42
@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor);
43
44


45
=head2 _tables
Graham McVicker's avatar
Graham McVicker committed
46
47

  Args       : none
48
  Example    : @tabs = $self->_tables
Graham McVicker's avatar
Graham McVicker committed
49
  Description: PROTECTED implementation of the abstract method inherited from
50
51
               BaseFeatureAdaptor.  Returns list of [tablename, alias] pairs
  Returntype : list of listrefs of strings
Graham McVicker's avatar
Graham McVicker committed
52
53
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch
54
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
55
56
57

=cut

58
sub _tables {
59
  my $self = shift;
60

61
  return (['dna_align_feature', 'daf'],['external_db','exdb']);
62
63
}

Graham McVicker's avatar
Graham McVicker committed
64

65
66
67
68
sub _left_join{
    return (['external_db',"exdb.external_db_id = daf.external_db_id"]);
}

Graham McVicker's avatar
Graham McVicker committed
69
70
71
72
73
74
75
76
77
=head2 _columns

  Args       : none
  Example    : @columns = $self->_columns
  Description: PROTECTED implementation of abstract superclass method.  
               Returns a list of columns that are needed for object creation.
  Returntype : list of strings
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch
78
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
79
80
81

=cut

82
83
sub _columns {
  my $self = shift;
84

85
  #warning, implementation of _objs_from_sth method depends on order of list
86
87
88
89
90
91
92
93
94
95
96
97
98
  return qw(daf.dna_align_feature_id
            daf.seq_region_id
            daf.analysis_id
            daf.seq_region_start
            daf.seq_region_end
            daf.seq_region_strand
            daf.hit_start
            daf.hit_end
            daf.hit_name
            daf.hit_strand
            daf.cigar_line
            daf.evalue
            daf.perc_ident
99
100
            daf.score
            daf.external_db_id
Eugene Kulesha's avatar
typo  
Eugene Kulesha committed
101
            daf.hcoverage
102
	    daf.external_data
103
	    daf.pair_dna_align_feature_id
104
105
	    exdb.db_name
	    exdb.db_display_name);
106
}
107

108

109
=head2 store
110

111
  Arg [1]    : list of Bio::EnsEMBL::DnaAlignFeatures @feats
Graham McVicker's avatar
Graham McVicker committed
112
               the features to store in the database
113
  Example    : $dna_align_feature_adaptor->store(@features);
Graham McVicker's avatar
Graham McVicker committed
114
115
  Description: Stores a list of DnaAlignFeatures in the database
  Returntype : none
116
117
118
119
120
121
122
123
124
  Exceptions : throw if any of the provided features cannot be stored
               which may occur if:
                 * The feature does not have an associate Slice
                 * The feature does not have an associated analysis
                 * The Slice the feature is associated with is on a seq_region
                   unknown to this database
               A warning is given if:
                 * The feature has already been stored in this db
  Caller     : Pipeline
125
  Status     : Stable
126
127
128

=cut

129
sub store {
130
131
132
  my ($self, @feats) = @_;

  throw("Must call store with features") if( scalar(@feats) == 0 );
133
134
135

  my @tabs = $self->_tables;
  my ($tablename) = @{$tabs[0]};
136
137
138
139
140
141
142

  my $db = $self->db();
  my $analysis_adaptor = $db->get_AnalysisAdaptor();

  my $sth = $self->prepare(
     "INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end,
                             seq_region_strand, hit_start, hit_end,
143
                             hit_strand, hit_name, cigar_line,
144
145
146
                             analysis_id, score, evalue, perc_ident, external_db_id, 
                             hcoverage, pair_dna_align_feature_id)
     VALUES (?,?,?,?,?,?,?,?,?,?,?, ?, ?, ?, ?, ?)");
147

148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
 FEATURE: foreach my $feat ( @feats ) {
    if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") ) {
      throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature,"
            . " not a [".ref($feat)."].");
    }

    if($feat->is_stored($db)) {
      warning("DnaDnaAlignFeature [".$feat->dbID."] is already stored" .
              " in this database.");
      next FEATURE;
    }

    my $hstart = $feat->hstart();
    my $hend   = $feat->hend();
    my $hstrand = $feat->hstrand();
    $self->_check_start_end_strand($hstart,$hend, $hstrand);

    my $cigar_string = $feat->cigar_string();
    if(!$cigar_string) {
      $cigar_string = $feat->length() . 'M';
      warning("DnaDnaAlignFeature does not define a cigar_string.\n" .
              "Assuming ungapped block with cigar_line=$cigar_string .");
170
    }
171
172
173
174
175
176
177
178

    my $hseqname = $feat->hseqname();
    if(!$hseqname) {
      throw("DnaDnaAlignFeature must define an hseqname.");
    }

    if(!defined($feat->analysis)) {
      throw("An analysis must be attached to the features to be stored.");
179
180
    }

181
182
183
    #store the analysis if it has not been stored yet
    if(!$feat->analysis->is_stored($db)) {
      $analysis_adaptor->store($feat->analysis());
184
    }
185

186
187
188
    my $original = $feat;
    my $seq_region_id;
    ($feat, $seq_region_id) = $self->_pre_store($feat);
189
190
191
192
193
194
195
196
197
198
    $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
    $sth->bind_param(2,$feat->start,SQL_INTEGER);
    $sth->bind_param(3,$feat->end,SQL_INTEGER);
    $sth->bind_param(4,$feat->strand,SQL_TINYINT);
    $sth->bind_param(5,$hstart,SQL_INTEGER);
    $sth->bind_param(6,$hend,SQL_INTEGER);
    $sth->bind_param(7,$hstrand,SQL_TINYINT);
    $sth->bind_param(8,$hseqname,SQL_VARCHAR);
    $sth->bind_param(9,$cigar_string,SQL_LONGVARCHAR);
    $sth->bind_param(10,$feat->analysis->dbID,SQL_INTEGER);
199
200
201
202
203
    $sth->bind_param(11,$feat->score,SQL_DOUBLE);
    $sth->bind_param(12,$feat->p_value,SQL_DOUBLE);
    $sth->bind_param(13,$feat->percent_id,SQL_FLOAT);
    $sth->bind_param(14,$feat->external_db_id,SQL_INTEGER);
    $sth->bind_param(15,$feat->hcoverage,SQL_DOUBLE);
204
    $sth->bind_param(16,$feat->pair_dna_align_feature_id, SQL_INTEGER);
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226

    $sth->execute();
    $original->dbID($sth->{'mysql_insertid'});
    $original->adaptor($self);
  }

  $sth->finish();
}


sub save {
  my ($self, $features) = @_;

  my @feats = @$features;
  throw("Must call store with features") if( scalar(@feats) == 0 );

  my @tabs = $self->_tables;
  my ($tablename) = @{$tabs[0]};

  my $db = $self->db();
  my $analysis_adaptor = $db->get_AnalysisAdaptor();

227
  my $sql = qq{INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end, seq_region_strand, hit_start, hit_end, hit_strand, hit_name, cigar_line, analysis_id, score, evalue, perc_ident, external_db_id, hcoverage, pair_dna_align_feature_id, external_data) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)};
228

229
230
  my %analyses = ();

231
232
233
234
235
236
237
238
239
240
241
242
243
244
  my $sth = $self->prepare($sql);
     
 FEATURE: foreach my $feat ( @feats ) {
    if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") ) {
      throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature,"
            . " not a [".ref($feat)."].");
    }

    if($feat->is_stored($db)) {
      warning("DnaDnaAlignFeature [".$feat->dbID."] is already stored" .
              " in this database.");
      next FEATURE;
    }

245
246
247
248
249
250
251
252
    my $hstart  = $feat->hstart || 0; # defined $feat->hstart  ? $feat->hstart : $feat->start ;
    my $hend    = $feat->hend   || 0; # defined $feat->hend    ? $feat->hend : $feat->end;
    my $hstrand = $feat->hstrand|| 0; # defined $feat->hstrand ? $feat->hstrand : $feat->strand;
    if( $hstart && $hend ) {
      if($hend < $hstart) {
        throw("Invalid Feature start/end [$hstart/$hend]. Start must be less than or equal to end.");
      }
    }
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
    my $cigar_string = $feat->cigar_string();
    if(!$cigar_string) {
      $cigar_string = $feat->length() . 'M';
      warning("DnaDnaAlignFeature does not define a cigar_string.\n" .
              "Assuming ungapped block with cigar_line=$cigar_string .");
    }

    my $hseqname = $feat->hseqname();
    if(!$hseqname) {
      throw("DnaDnaAlignFeature must define an hseqname.");
    }

    if(!defined($feat->analysis)) {
      throw("An analysis must be attached to the features to be stored.");
    }

    #store the analysis if it has not been stored yet
    if(!$feat->analysis->is_stored($db)) {
      $analysis_adaptor->store($feat->analysis());
    }

274
275
    $analyses{ $feat->analysis->dbID }++;

276
277
278
279
    my $original = $feat;
    my $seq_region_id;
    ($feat, $seq_region_id) = $self->_pre_store_userdata($feat);

280
281
    my $extra_data = $feat->extra_data ? $self->dump_data($feat->extra_data) : '';

282
283
284
285
286
287
288
289
290
291
    $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
    $sth->bind_param(2,$feat->start,SQL_INTEGER);
    $sth->bind_param(3,$feat->end,SQL_INTEGER);
    $sth->bind_param(4,$feat->strand,SQL_TINYINT);
    $sth->bind_param(5,$hstart,SQL_INTEGER);
    $sth->bind_param(6,$hend,SQL_INTEGER);
    $sth->bind_param(7,$hstrand,SQL_TINYINT);
    $sth->bind_param(8,$hseqname,SQL_VARCHAR);
    $sth->bind_param(9,$cigar_string,SQL_LONGVARCHAR);
    $sth->bind_param(10,$feat->analysis->dbID,SQL_INTEGER);
292
    $sth->bind_param(11,$feat->score,SQL_DOUBLE);
293
#    $sth->bind_param(11,$feat->score); # if the above statement does not work it means you need to upgrade DBD::mysql, meantime you can replace it with this line
294
295
    $sth->bind_param(12,$feat->p_value,SQL_DOUBLE);
    $sth->bind_param(13,$feat->percent_id,SQL_FLOAT);
296
297
    $sth->bind_param(14,$feat->external_db_id,SQL_INTEGER);
    $sth->bind_param(15,$feat->hcoverage,SQL_DOUBLE);
298
299
300
    $sth->bind_param(16,$feat->pair_dna_align_feature_id,SQL_INTEGER);
    $sth->bind_param(17,$extra_data,SQL_LONGVARCHAR);

301
302

    $sth->execute();
303
304
    $original->dbID($sth->{'mysql_insertid'});
    $original->adaptor($self);
305
  }
306
307

  $sth->finish();
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323

## js5 hack to update meta_coord table... 
  if( keys %analyses ) {
    my $sth = $self->prepare( 'select sr.coord_system_id, max(daf.seq_region_end-daf.seq_region_start) from seq_region as sr, dna_align_feature as daf where daf.seq_region_id=sr.seq_region_id and analysis_id in ('.join(',',keys %analyses).') group by coord_system_id' );
    $sth->execute;
    foreach( @{ $sth->fetchall_arrayref } ) {
      my $sth2 = $self->prepare( qq(insert ignore into meta_coord values("dna_align_feature",$_->[0],$_->[1])) );
      $sth2->execute;
      $sth2->finish;
      my $sth2 = $self->prepare( qq(update meta_coord set max_length = $_->[1] where coord_system_id = $_->[0] and table_name="dna_align_feature" and max_length < $_->[1]) );
      $sth2->execute;
      $sth2->finish;
    }
    $sth->finish;
  }

324
325
326
}


327
=head2 _objs_from_sth
Graham McVicker's avatar
Graham McVicker committed
328

329
330
331
332
  Arg [1]    : DBI statement handle $sth
               an exectuted DBI statement handle generated by selecting 
               the columns specified by _columns() from the table specified 
               by _table()
Graham McVicker's avatar
Graham McVicker committed
333
334
335
  Example    : @dna_dna_align_feats = $self->_obj_from_hashref
  Description: PROTECTED implementation of superclass abstract method. 
               Creates DnaDnaAlignFeature objects from a DBI hashref
336
  Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
Graham McVicker's avatar
Graham McVicker committed
337
338
  Exceptions : none
  Caller     : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch
339
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
340
341

=cut
342

343
sub _objs_from_sth {
344
  my ($self, $sth, $mapper, $dest_slice) = @_;
345

346
347
348
349
350
  #
  # This code is ugly because an attempt has been made to remove as many
  # function calls as possible for speed purposes.  Thus many caches and
  # a fair bit of gymnastics is used.
  #
351

352
353
354
# in case of userdata we need the features on the dest_slice
# in case of get_all_supporting_features dest_slice is not provided .. 
  my $sa = $dest_slice ? $dest_slice->adaptor() : $self->db()->get_SliceAdaptor();
355
  my $aa = $self->db->get_AnalysisAdaptor();
356

357
358
359
360
361
362
363
364
  my @features;
  my %analysis_hash;
  my %slice_hash;
  my %sr_name_hash;
  my %sr_cs_hash;

  my($dna_align_feature_id, $seq_region_id, $analysis_id, $seq_region_start,
     $seq_region_end, $seq_region_strand, $hit_start, $hit_end, $hit_name,
365
     $hit_strand, $cigar_line, $evalue, $perc_ident, $score,
366
367
     $external_db_id, $hcoverage, $extra_data, $pair_dna_align_feature_id,
     $external_db_name, $external_display_db_name);
368
369
370
371

  $sth->bind_columns(
    \$dna_align_feature_id, \$seq_region_id, \$analysis_id, \$seq_region_start,
    \$seq_region_end, \$seq_region_strand, \$hit_start, \$hit_end, \$hit_name,
372
    \$hit_strand, \$cigar_line, \$evalue, \$perc_ident, \$score,
373
374
    \$external_db_id, \$hcoverage, \$extra_data, \$pair_dna_align_feature_id,
     \$external_db_name, \$external_display_db_name);
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389

  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }
390

391
392
393
394
  my $dest_slice_start;
  my $dest_slice_end;
  my $dest_slice_strand;
  my $dest_slice_length;
395
  my $dest_slice_sr_name;
396
  my $dest_slice_seq_region_id;
397

398
399
400
401
402
  if($dest_slice) {
    $dest_slice_start  = $dest_slice->start();
    $dest_slice_end    = $dest_slice->end();
    $dest_slice_strand = $dest_slice->strand();
    $dest_slice_length = $dest_slice->length();
403
    $dest_slice_sr_name = $dest_slice->seq_region_name();
404
    $dest_slice_seq_region_id = $dest_slice->get_seq_region_id();
405
  }
406

407
408
409
410
  FEATURE: while($sth->fetch()) {
    #get the analysis object
    my $analysis = $analysis_hash{$analysis_id} ||=
      $aa->fetch_by_dbID($analysis_id);
411

412
413
    #get the slice object
    my $slice = $slice_hash{"ID:".$seq_region_id};
414

415
416
417
418
419
420
    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }
421

422
423
    my $sr_name = $sr_name_hash{$seq_region_id};
    my $sr_cs   = $sr_cs_hash{$seq_region_id};
424
425
426
427
428
429
    #
    # remap the feature coordinates to another coord system
    # if a mapper was provided
    #
    if($mapper) {

430
      ($seq_region_id,$seq_region_start,$seq_region_end,$seq_region_strand) =
431
432
433
434
        $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end,
                          $seq_region_strand, $sr_cs);

      #skip features that map to gaps or coord system boundaries
435
      next FEATURE if(!defined($seq_region_id));
436
437
438

      #get a slice in the coord system we just mapped to
      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
439
440
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
441
      } else {
442
443
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
444
445
      }
    }
446

447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
    #
    # If a destination slice was provided convert the coords
    # If the dest_slice starts at 1 and is foward strand, nothing needs doing
    #
    if($dest_slice) {
      if($dest_slice_start != 1 || $dest_slice_strand != 1) {
        if($dest_slice_strand == 1) {
          $seq_region_start = $seq_region_start - $dest_slice_start + 1;
          $seq_region_end   = $seq_region_end   - $dest_slice_start + 1;
        } else {
          my $tmp_seq_region_start = $seq_region_start;
          $seq_region_start = $dest_slice_end - $seq_region_end + 1;
          $seq_region_end   = $dest_slice_end - $tmp_seq_region_start + 1;
          $seq_region_strand *= -1;
        }

        #throw away features off the end of the requested slice
464
        if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
465
	  ( $dest_slice_seq_region_id ne $seq_region_id ))  {
466
467
468
469
          next FEATURE;
        }
      }
      $slice = $dest_slice;
470
    }
471

472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
    # Finally, create the new DnaAlignFeature.
    push( @features,
          $self->_create_feature_fast(
                                  'Bio::EnsEMBL::DnaDnaAlignFeature', {
                                    'slice'      => $slice,
                                    'start'      => $seq_region_start,
                                    'end'        => $seq_region_end,
                                    'strand'     => $seq_region_strand,
                                    'hseqname'   => $hit_name,
                                    'hstart'     => $hit_start,
                                    'hend'       => $hit_end,
                                    'hstrand'    => $hit_strand,
                                    'score'      => $score,
                                    'p_value'    => $evalue,
                                    'percent_id' => $perc_ident,
                                    'cigar_string' => $cigar_line,
                                    'analysis'     => $analysis,
                                    'adaptor'      => $self,
                                    'dbID' => $dna_align_feature_id,
                                    'external_db_id' => $external_db_id,
492
493
                                    'hcoverage'      => $hcoverage,
				    'extra_data'     => $extra_data ? $self->get_dumped_data($extra_data) : '',
494
				    'dbname'         => $external_db_name,
495
496
				    'db_display_name' => $external_display_db_name,
				    'pair_dna_align_feature_id' => $pair_dna_align_feature_id
497
498
                                  } ) );

499
  }
500

501
502
  return \@features;
}
503

Glenn Proctor's avatar
Glenn Proctor committed
504
505
506
507
=head2 list_dbIDs

  Arg [1]    : none
  Example    : @feature_ids = @{$dna_align_feature_adaptor->list_dbIDs()};
508
509
  Description: Gets an array of internal ids for all dna align features in 
               the current db
510
  Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
Glenn Proctor's avatar
Glenn Proctor committed
511
512
513
  Returntype : list of ints
  Exceptions : none
  Caller     : ?
514
  Status     : Stable
Glenn Proctor's avatar
Glenn Proctor committed
515
516
517
518

=cut

sub list_dbIDs {
519
   my ($self, $ordered) = @_;
Glenn Proctor's avatar
Glenn Proctor committed
520

521
   return $self->_list_dbIDs("dna_align_feature",undef, $ordered);
Glenn Proctor's avatar
Glenn Proctor committed
522
523
}

524
525
526
1;