DnaAlignFeatureAdaptor.pm 18.5 KB
Newer Older
1
2
=head1 LICENSE

3
  Copyright (c) 1999-2010 The European Bioinformatics Institute and
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <ensembl-dev@ebi.ac.uk>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut
20
21
22
23
24
25
26

=head1 NAME

Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor - Adaptor for DnaAlignFeatures

=head1 SYNOPSIS

27
  $dafa = $registry->get_adaptor( 'Human', 'Core', 'DnaAlignFeature' );
28

29
  my @features = @{ $dafa->fetch_all_by_Slice($slice) };
30

31
  $dafa->store(@features);
32
33
34

=head1 DESCRIPTION

35
36
37
This is an adaptor responsible for the retrieval and storage of
DnaDnaAlignFeatures from the database. This adaptor inherits most of its
functionality from the BaseAlignFeatureAdaptor and BaseFeatureAdaptor
38
superclasses.
39

40
=head1 METHODS
41
42
43
44
45
46
47

=cut


package Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor;
use vars qw(@ISA);
use strict;
48
use Data::Dumper;
49
use Bio::EnsEMBL::DnaDnaAlignFeature;
50
use Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor;
Laura Clarke's avatar
   
Laura Clarke committed
51
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
52

53
@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor);
54
55


56
=head2 _tables
Graham McVicker's avatar
Graham McVicker committed
57
58

  Args       : none
59
  Example    : @tabs = $self->_tables
Graham McVicker's avatar
Graham McVicker committed
60
  Description: PROTECTED implementation of the abstract method inherited from
61
62
               BaseFeatureAdaptor.  Returns list of [tablename, alias] pairs
  Returntype : list of listrefs of strings
Graham McVicker's avatar
Graham McVicker committed
63
64
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch
65
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
66
67
68

=cut

69
sub _tables {
70
  my $self = shift;
71

72
  return (['dna_align_feature', 'daf'],['external_db','exdb']);
73
74
}

Graham McVicker's avatar
Graham McVicker committed
75

76
77
78
79
sub _left_join{
    return (['external_db',"exdb.external_db_id = daf.external_db_id"]);
}

Graham McVicker's avatar
Graham McVicker committed
80
81
82
83
84
85
86
87
88
=head2 _columns

  Args       : none
  Example    : @columns = $self->_columns
  Description: PROTECTED implementation of abstract superclass method.  
               Returns a list of columns that are needed for object creation.
  Returntype : list of strings
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch
89
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
90
91
92

=cut

93
94
sub _columns {
  my $self = shift;
95

96
  #warning, implementation of _objs_from_sth method depends on order of list
97
98
99
100
101
102
103
104
105
106
107
108
109
  return qw(daf.dna_align_feature_id
            daf.seq_region_id
            daf.analysis_id
            daf.seq_region_start
            daf.seq_region_end
            daf.seq_region_strand
            daf.hit_start
            daf.hit_end
            daf.hit_name
            daf.hit_strand
            daf.cigar_line
            daf.evalue
            daf.perc_ident
110
111
            daf.score
            daf.external_db_id
Eugene Kulesha's avatar
typo    
Eugene Kulesha committed
112
            daf.hcoverage
113
	    daf.external_data
114
	    daf.pair_dna_align_feature_id
115
116
	    exdb.db_name
	    exdb.db_display_name);
117
}
118

119

120
=head2 store
121

122
  Arg [1]    : list of Bio::EnsEMBL::DnaAlignFeatures @feats
Graham McVicker's avatar
Graham McVicker committed
123
               the features to store in the database
124
  Example    : $dna_align_feature_adaptor->store(@features);
Graham McVicker's avatar
Graham McVicker committed
125
126
  Description: Stores a list of DnaAlignFeatures in the database
  Returntype : none
127
128
129
130
131
132
133
134
135
  Exceptions : throw if any of the provided features cannot be stored
               which may occur if:
                 * The feature does not have an associate Slice
                 * The feature does not have an associated analysis
                 * The Slice the feature is associated with is on a seq_region
                   unknown to this database
               A warning is given if:
                 * The feature has already been stored in this db
  Caller     : Pipeline
136
  Status     : Stable
137
138
139

=cut

140
sub store {
141
  my ( $self, @feats ) = @_;
142

143
  throw("Must call store with features") if ( scalar(@feats) == 0 );
144
145

  my @tabs = $self->_tables;
146
  my ($tablename) = @{ $tabs[0] };
147

148
  my $db               = $self->db();
149
150
151
  my $analysis_adaptor = $db->get_AnalysisAdaptor();

  my $sth = $self->prepare(
152
153
154
155
156
157
158
159
160
161
162
163
164
    "INSERT INTO $tablename (seq_region_id, seq_region_start,
                             seq_region_end, seq_region_strand,
                             hit_start, hit_end, hit_strand, hit_name,
                             cigar_line, analysis_id, score, evalue,
                             perc_ident, external_db_id, hcoverage,
                             pair_dna_align_feature_id)
     VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"    # 16 arguments
  );

FEATURE:
  foreach my $feat (@feats) {
    if ( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") )
    {
165
      throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature,"
166
167
168
          . " not a ["
          . ref($feat)
          . "]." );
169
170
    }

171
172
173
174
    if ( $feat->is_stored($db) ) {
      warning( "DnaDnaAlignFeature ["
          . $feat->dbID()
          . "] is already stored in this database." );
175
176
177
      next FEATURE;
    }

178
179
    my $hstart  = $feat->hstart();
    my $hend    = $feat->hend();
180
    my $hstrand = $feat->hstrand();
181
    $self->_check_start_end_strand( $hstart, $hend, $hstrand );
182
183

    my $cigar_string = $feat->cigar_string();
184
    if ( !$cigar_string ) {
185
      $cigar_string = $feat->length() . 'M';
186
187
      warning( "DnaDnaAlignFeature does not define a cigar_string.\n"
          . "Assuming ungapped block with cigar_line=$cigar_string ." );
188
    }
189
190

    my $hseqname = $feat->hseqname();
191
    if ( !$hseqname ) {
192
193
194
      throw("DnaDnaAlignFeature must define an hseqname.");
    }

195
196
197
    if ( !defined( $feat->analysis ) ) {
      throw(
        "An analysis must be attached to the features to be stored.");
198
199
    }

200
    #store the analysis if it has not been stored yet
201
202
    if ( !$feat->analysis->is_stored($db) ) {
      $analysis_adaptor->store( $feat->analysis() );
203
    }
204

205
206
    my $original = $feat;
    my $seq_region_id;
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
    ( $feat, $seq_region_id ) = $self->_pre_store($feat);

    $sth->bind_param( 1,  $seq_region_id,        SQL_INTEGER );
    $sth->bind_param( 2,  $feat->start,          SQL_INTEGER );
    $sth->bind_param( 3,  $feat->end,            SQL_INTEGER );
    $sth->bind_param( 4,  $feat->strand,         SQL_TINYINT );
    $sth->bind_param( 5,  $hstart,               SQL_INTEGER );
    $sth->bind_param( 6,  $hend,                 SQL_INTEGER );
    $sth->bind_param( 7,  $hstrand,              SQL_TINYINT );
    $sth->bind_param( 8,  $hseqname,             SQL_VARCHAR );
    $sth->bind_param( 9,  $cigar_string,         SQL_LONGVARCHAR );
    $sth->bind_param( 10, $feat->analysis->dbID, SQL_INTEGER );
    $sth->bind_param( 11, $feat->score,          SQL_DOUBLE );
    $sth->bind_param( 12, $feat->p_value,        SQL_DOUBLE );
    $sth->bind_param( 13, $feat->percent_id,     SQL_FLOAT );
    $sth->bind_param( 14, $feat->external_db_id, SQL_INTEGER );
    $sth->bind_param( 15, $feat->hcoverage,      SQL_DOUBLE );
    $sth->bind_param( 16, $feat->pair_dna_align_feature_id,
      SQL_INTEGER );
226
227

    $sth->execute();
228
229

    $original->dbID( $sth->{'mysql_insertid'} );
230
    $original->adaptor($self);
231
  } ## end foreach my $feat (@feats)
232
233

  $sth->finish();
234
} ## end sub store
235
236
237
238
239
240
241
242
243
244
245
246
247
248


sub save {
  my ($self, $features) = @_;

  my @feats = @$features;
  throw("Must call store with features") if( scalar(@feats) == 0 );

  my @tabs = $self->_tables;
  my ($tablename) = @{$tabs[0]};

  my $db = $self->db();
  my $analysis_adaptor = $db->get_AnalysisAdaptor();

249
  my $sql = qq{INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end, seq_region_strand, hit_start, hit_end, hit_strand, hit_name, cigar_line, analysis_id, score, evalue, perc_ident, external_db_id, hcoverage, pair_dna_align_feature_id, external_data) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)};
250

251
252
  my %analyses = ();

253
254
255
256
257
258
259
260
261
262
263
264
265
266
  my $sth = $self->prepare($sql);
     
 FEATURE: foreach my $feat ( @feats ) {
    if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") ) {
      throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature,"
            . " not a [".ref($feat)."].");
    }

    if($feat->is_stored($db)) {
      warning("DnaDnaAlignFeature [".$feat->dbID."] is already stored" .
              " in this database.");
      next FEATURE;
    }

267
268
269
270
271
272
273
274
    my $hstart  = $feat->hstart || 0; # defined $feat->hstart  ? $feat->hstart : $feat->start ;
    my $hend    = $feat->hend   || 0; # defined $feat->hend    ? $feat->hend : $feat->end;
    my $hstrand = $feat->hstrand|| 0; # defined $feat->hstrand ? $feat->hstrand : $feat->strand;
    if( $hstart && $hend ) {
      if($hend < $hstart) {
        throw("Invalid Feature start/end [$hstart/$hend]. Start must be less than or equal to end.");
      }
    }
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
    my $cigar_string = $feat->cigar_string();
    if(!$cigar_string) {
      $cigar_string = $feat->length() . 'M';
      warning("DnaDnaAlignFeature does not define a cigar_string.\n" .
              "Assuming ungapped block with cigar_line=$cigar_string .");
    }

    my $hseqname = $feat->hseqname();
    if(!$hseqname) {
      throw("DnaDnaAlignFeature must define an hseqname.");
    }

    if(!defined($feat->analysis)) {
      throw("An analysis must be attached to the features to be stored.");
    }

    #store the analysis if it has not been stored yet
    if(!$feat->analysis->is_stored($db)) {
      $analysis_adaptor->store($feat->analysis());
    }

296
297
    $analyses{ $feat->analysis->dbID }++;

298
299
300
301
    my $original = $feat;
    my $seq_region_id;
    ($feat, $seq_region_id) = $self->_pre_store_userdata($feat);

302
303
    my $extra_data = $feat->extra_data ? $self->dump_data($feat->extra_data) : '';

304
305
306
307
308
309
310
311
312
313
    $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
    $sth->bind_param(2,$feat->start,SQL_INTEGER);
    $sth->bind_param(3,$feat->end,SQL_INTEGER);
    $sth->bind_param(4,$feat->strand,SQL_TINYINT);
    $sth->bind_param(5,$hstart,SQL_INTEGER);
    $sth->bind_param(6,$hend,SQL_INTEGER);
    $sth->bind_param(7,$hstrand,SQL_TINYINT);
    $sth->bind_param(8,$hseqname,SQL_VARCHAR);
    $sth->bind_param(9,$cigar_string,SQL_LONGVARCHAR);
    $sth->bind_param(10,$feat->analysis->dbID,SQL_INTEGER);
314
    $sth->bind_param(11,$feat->score,SQL_DOUBLE);
315
#    $sth->bind_param(11,$feat->score); # if the above statement does not work it means you need to upgrade DBD::mysql, meantime you can replace it with this line
316
317
    $sth->bind_param(12,$feat->p_value,SQL_DOUBLE);
    $sth->bind_param(13,$feat->percent_id,SQL_FLOAT);
318
319
    $sth->bind_param(14,$feat->external_db_id,SQL_INTEGER);
    $sth->bind_param(15,$feat->hcoverage,SQL_DOUBLE);
320
321
322
    $sth->bind_param(16,$feat->pair_dna_align_feature_id,SQL_INTEGER);
    $sth->bind_param(17,$extra_data,SQL_LONGVARCHAR);

323
324

    $sth->execute();
325
326
    $original->dbID($sth->{'mysql_insertid'});
    $original->adaptor($self);
327
  }
328
329

  $sth->finish();
330
331
332

## js5 hack to update meta_coord table... 
  if( keys %analyses ) {
333

334
335
    my $sth = $self->prepare( 'select sr.coord_system_id, max(daf.seq_region_end-daf.seq_region_start) from seq_region as sr, dna_align_feature as daf where daf.seq_region_id=sr.seq_region_id and analysis_id in ('.join(',',keys %analyses).') group by coord_system_id' );
    $sth->execute;
336

337
338
339
340
    foreach( @{ $sth->fetchall_arrayref } ) {
      my $sth2 = $self->prepare( qq(insert ignore into meta_coord values("dna_align_feature",$_->[0],$_->[1])) );
      $sth2->execute;
      $sth2->finish;
341
342

      $sth2 = $self->prepare( qq(update meta_coord set max_length = $_->[1] where coord_system_id = $_->[0] and table_name="dna_align_feature" and max_length < $_->[1]) );
343
344
345
      $sth2->execute;
      $sth2->finish;
    }
346

347
348
349
    $sth->finish;
  }

350
351
352
}


353
=head2 _objs_from_sth
Graham McVicker's avatar
Graham McVicker committed
354

355
356
357
358
  Arg [1]    : DBI statement handle $sth
               an exectuted DBI statement handle generated by selecting 
               the columns specified by _columns() from the table specified 
               by _table()
Graham McVicker's avatar
Graham McVicker committed
359
360
361
  Example    : @dna_dna_align_feats = $self->_obj_from_hashref
  Description: PROTECTED implementation of superclass abstract method. 
               Creates DnaDnaAlignFeature objects from a DBI hashref
362
  Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
Graham McVicker's avatar
Graham McVicker committed
363
364
  Exceptions : none
  Caller     : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch
365
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
366
367

=cut
368

369
sub _objs_from_sth {
370
  my ($self, $sth, $mapper, $dest_slice) = @_;
371

372
373
374
375
376
  #
  # This code is ugly because an attempt has been made to remove as many
  # function calls as possible for speed purposes.  Thus many caches and
  # a fair bit of gymnastics is used.
  #
377

378
379
380
# in case of userdata we need the features on the dest_slice
# in case of get_all_supporting_features dest_slice is not provided .. 
  my $sa = $dest_slice ? $dest_slice->adaptor() : $self->db()->get_SliceAdaptor();
381
  my $aa = $self->db->get_AnalysisAdaptor();
382

383
384
385
386
387
388
389
390
  my @features;
  my %analysis_hash;
  my %slice_hash;
  my %sr_name_hash;
  my %sr_cs_hash;

  my($dna_align_feature_id, $seq_region_id, $analysis_id, $seq_region_start,
     $seq_region_end, $seq_region_strand, $hit_start, $hit_end, $hit_name,
391
     $hit_strand, $cigar_line, $evalue, $perc_ident, $score,
392
393
     $external_db_id, $hcoverage, $extra_data, $pair_dna_align_feature_id,
     $external_db_name, $external_display_db_name);
394
395
396
397

  $sth->bind_columns(
    \$dna_align_feature_id, \$seq_region_id, \$analysis_id, \$seq_region_start,
    \$seq_region_end, \$seq_region_strand, \$hit_start, \$hit_end, \$hit_name,
398
    \$hit_strand, \$cigar_line, \$evalue, \$perc_ident, \$score,
399
400
    \$external_db_id, \$hcoverage, \$extra_data, \$pair_dna_align_feature_id,
     \$external_db_name, \$external_display_db_name);
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415

  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }
416

417
418
419
420
  my $dest_slice_start;
  my $dest_slice_end;
  my $dest_slice_strand;
  my $dest_slice_length;
421
  my $dest_slice_sr_name;
422
  my $dest_slice_seq_region_id;
423

424
425
426
427
428
  if($dest_slice) {
    $dest_slice_start  = $dest_slice->start();
    $dest_slice_end    = $dest_slice->end();
    $dest_slice_strand = $dest_slice->strand();
    $dest_slice_length = $dest_slice->length();
429
    $dest_slice_sr_name = $dest_slice->seq_region_name();
430
    $dest_slice_seq_region_id = $dest_slice->get_seq_region_id();
431
  }
432

433
434
435
436
  FEATURE: while($sth->fetch()) {
    #get the analysis object
    my $analysis = $analysis_hash{$analysis_id} ||=
      $aa->fetch_by_dbID($analysis_id);
437

438
439
    #get the slice object
    my $slice = $slice_hash{"ID:".$seq_region_id};
440

441
442
443
444
445
446
    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }
447

448
449
    my $sr_name = $sr_name_hash{$seq_region_id};
    my $sr_cs   = $sr_cs_hash{$seq_region_id};
450
451
452
453
454
455
    #
    # remap the feature coordinates to another coord system
    # if a mapper was provided
    #
    if($mapper) {

456
      ($seq_region_id,$seq_region_start,$seq_region_end,$seq_region_strand) =
457
458
459
460
        $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end,
                          $seq_region_strand, $sr_cs);

      #skip features that map to gaps or coord system boundaries
461
      next FEATURE if(!defined($seq_region_id));
462
463
464

      #get a slice in the coord system we just mapped to
      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
465
466
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
467
      } else {
468
469
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
470
471
      }
    }
472

473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
    #
    # If a destination slice was provided convert the coords
    # If the dest_slice starts at 1 and is foward strand, nothing needs doing
    #
    if($dest_slice) {
      if($dest_slice_start != 1 || $dest_slice_strand != 1) {
        if($dest_slice_strand == 1) {
          $seq_region_start = $seq_region_start - $dest_slice_start + 1;
          $seq_region_end   = $seq_region_end   - $dest_slice_start + 1;
        } else {
          my $tmp_seq_region_start = $seq_region_start;
          $seq_region_start = $dest_slice_end - $seq_region_end + 1;
          $seq_region_end   = $dest_slice_end - $tmp_seq_region_start + 1;
          $seq_region_strand *= -1;
        }

        #throw away features off the end of the requested slice
490
        if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
491
	  ( $dest_slice_seq_region_id ne $seq_region_id ))  {
492
493
494
495
          next FEATURE;
        }
      }
      $slice = $dest_slice;
496
    }
497

498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
    # Finally, create the new DnaAlignFeature.
    push( @features,
          $self->_create_feature_fast(
                                  'Bio::EnsEMBL::DnaDnaAlignFeature', {
                                    'slice'      => $slice,
                                    'start'      => $seq_region_start,
                                    'end'        => $seq_region_end,
                                    'strand'     => $seq_region_strand,
                                    'hseqname'   => $hit_name,
                                    'hstart'     => $hit_start,
                                    'hend'       => $hit_end,
                                    'hstrand'    => $hit_strand,
                                    'score'      => $score,
                                    'p_value'    => $evalue,
                                    'percent_id' => $perc_ident,
                                    'cigar_string' => $cigar_line,
                                    'analysis'     => $analysis,
                                    'adaptor'      => $self,
                                    'dbID' => $dna_align_feature_id,
                                    'external_db_id' => $external_db_id,
518
519
                                    'hcoverage'      => $hcoverage,
				    'extra_data'     => $extra_data ? $self->get_dumped_data($extra_data) : '',
520
				    'dbname'         => $external_db_name,
521
522
				    'db_display_name' => $external_display_db_name,
				    'pair_dna_align_feature_id' => $pair_dna_align_feature_id
523
524
                                  } ) );

525
  }
526

527
528
  return \@features;
}
529

Glenn Proctor's avatar
Glenn Proctor committed
530
531
532
533
=head2 list_dbIDs

  Arg [1]    : none
  Example    : @feature_ids = @{$dna_align_feature_adaptor->list_dbIDs()};
534
535
  Description: Gets an array of internal ids for all dna align features in 
               the current db
536
  Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
Glenn Proctor's avatar
Glenn Proctor committed
537
538
539
  Returntype : list of ints
  Exceptions : none
  Caller     : ?
540
  Status     : Stable
Glenn Proctor's avatar
Glenn Proctor committed
541
542
543
544

=cut

sub list_dbIDs {
545
   my ($self, $ordered) = @_;
Glenn Proctor's avatar
Glenn Proctor committed
546

547
   return $self->_list_dbIDs("dna_align_feature",undef, $ordered);
Glenn Proctor's avatar
Glenn Proctor committed
548
549
}

550
551
552
1;