DnaAlignFeatureAdaptor.pm 11.7 KB
Newer Older
1
#
2
# EnsEMBL module for Bio::EnsEMBL::DnaAlignFeatureAdaptor
3
#
4
# Copyright (c) 2003 EnsEMBL
5
6
7
8
9
10
11
12
13
#
# You may distribute this module under the same terms as perl itself

=head1 NAME

Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor - Adaptor for DnaAlignFeatures

=head1 SYNOPSIS

Graham McVicker's avatar
Graham McVicker committed
14
    $dafa = $dbadaptor->get_DnaAlignFeatureAdaptor();
15

16
    @features = @{$dafa->fetch_by_Slice($slice)};
17

Graham McVicker's avatar
Graham McVicker committed
18
    $dafa->store(@features);
19
20
21

=head1 DESCRIPTION

22
23
24
25
This is an adaptor responsible for the retrieval and storage of 
DnaDnaAlignFeatures from the database. This adaptor inherits most of its 
functionality from the BaseAlignFeatureAdaptor and BaseFeatureAdaptor 
superclasses.
26

27
=head1 CONTACT
28

29
Post questions to the EnsEMBL development list <ensembl-dev@ebi.ac.uk>
30
31
32
33
34
35
36
37

=cut


package Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor;
use vars qw(@ISA);
use strict;

38
use Bio::EnsEMBL::DnaDnaAlignFeature;
39
use Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor;
Laura Clarke's avatar
 
Laura Clarke committed
40
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
41

42
@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor);
43
44


45
=head2 _tables
Graham McVicker's avatar
Graham McVicker committed
46
47

  Args       : none
48
  Example    : @tabs = $self->_tables
Graham McVicker's avatar
Graham McVicker committed
49
  Description: PROTECTED implementation of the abstract method inherited from
50
51
               BaseFeatureAdaptor.  Returns list of [tablename, alias] pairs
  Returntype : list of listrefs of strings
Graham McVicker's avatar
Graham McVicker committed
52
53
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch
54
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
55
56
57

=cut

58
sub _tables {
59
  my $self = shift;
60

61
  return ['dna_align_feature', 'daf'];
62
63
}

Graham McVicker's avatar
Graham McVicker committed
64
65
66
67
68
69
70
71
72
73

=head2 _columns

  Args       : none
  Example    : @columns = $self->_columns
  Description: PROTECTED implementation of abstract superclass method.  
               Returns a list of columns that are needed for object creation.
  Returntype : list of strings
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch
74
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
75
76
77

=cut

78
79
sub _columns {
  my $self = shift;
80

81
  #warning, implementation of _objs_from_sth method depends on order of list
82
83
84
85
86
87
88
89
90
91
92
93
94
95
  return qw(daf.dna_align_feature_id
            daf.seq_region_id
            daf.analysis_id
            daf.seq_region_start
            daf.seq_region_end
            daf.seq_region_strand
            daf.hit_start
            daf.hit_end
            daf.hit_name
            daf.hit_strand
            daf.cigar_line
            daf.evalue
            daf.perc_ident
            daf.score);
96
}
97

98

99
=head2 store
100

101
  Arg [1]    : list of Bio::EnsEMBL::DnaAlignFeatures @feats
Graham McVicker's avatar
Graham McVicker committed
102
               the features to store in the database
103
  Example    : $dna_align_feature_adaptor->store(@features);
Graham McVicker's avatar
Graham McVicker committed
104
105
  Description: Stores a list of DnaAlignFeatures in the database
  Returntype : none
106
107
108
109
110
111
112
113
114
  Exceptions : throw if any of the provided features cannot be stored
               which may occur if:
                 * The feature does not have an associate Slice
                 * The feature does not have an associated analysis
                 * The Slice the feature is associated with is on a seq_region
                   unknown to this database
               A warning is given if:
                 * The feature has already been stored in this db
  Caller     : Pipeline
115
  Status     : Stable
116
117
118

=cut

119
sub store {
120
121
122
  my ($self, @feats) = @_;

  throw("Must call store with features") if( scalar(@feats) == 0 );
123
124
125

  my @tabs = $self->_tables;
  my ($tablename) = @{$tabs[0]};
126
127
128
129
130
131
132

  my $db = $self->db();
  my $analysis_adaptor = $db->get_AnalysisAdaptor();

  my $sth = $self->prepare(
     "INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end,
                             seq_region_strand, hit_start, hit_end,
133
                             hit_strand, hit_name, cigar_line,
134
                             analysis_id, score, evalue, perc_ident)
135
     VALUES (?,?,?,?,?,?,?,?,?,?,?, ?, ?)");
136

137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
 FEATURE: foreach my $feat ( @feats ) {
    if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") ) {
      throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature,"
            . " not a [".ref($feat)."].");
    }

    if($feat->is_stored($db)) {
      warning("DnaDnaAlignFeature [".$feat->dbID."] is already stored" .
              " in this database.");
      next FEATURE;
    }

    my $hstart = $feat->hstart();
    my $hend   = $feat->hend();
    my $hstrand = $feat->hstrand();
    $self->_check_start_end_strand($hstart,$hend, $hstrand);

    my $cigar_string = $feat->cigar_string();
    if(!$cigar_string) {
      $cigar_string = $feat->length() . 'M';
      warning("DnaDnaAlignFeature does not define a cigar_string.\n" .
              "Assuming ungapped block with cigar_line=$cigar_string .");
159
    }
160
161
162
163
164
165
166
167

    my $hseqname = $feat->hseqname();
    if(!$hseqname) {
      throw("DnaDnaAlignFeature must define an hseqname.");
    }

    if(!defined($feat->analysis)) {
      throw("An analysis must be attached to the features to be stored.");
168
169
    }

170
171
172
    #store the analysis if it has not been stored yet
    if(!$feat->analysis->is_stored($db)) {
      $analysis_adaptor->store($feat->analysis());
173
    }
174

175
176
177
    my $original = $feat;
    my $seq_region_id;
    ($feat, $seq_region_id) = $self->_pre_store($feat);
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
    $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
    $sth->bind_param(2,$feat->start,SQL_INTEGER);
    $sth->bind_param(3,$feat->end,SQL_INTEGER);
    $sth->bind_param(4,$feat->strand,SQL_TINYINT);
    $sth->bind_param(5,$hstart,SQL_INTEGER);
    $sth->bind_param(6,$hend,SQL_INTEGER);
    $sth->bind_param(7,$hstrand,SQL_TINYINT);
    $sth->bind_param(8,$hseqname,SQL_VARCHAR);
    $sth->bind_param(9,$cigar_string,SQL_LONGVARCHAR);
    $sth->bind_param(10,$feat->analysis->dbID,SQL_INTEGER);
    $sth->bind_param(11,$feat->score,SQL_DOUBLE);
    $sth->bind_param(12,$feat->p_value,SQL_DOUBLE);
    $sth->bind_param(13,$feat->percent_id,SQL_FLOAT);

    $sth->execute();
193
194
    $original->dbID($sth->{'mysql_insertid'});
    $original->adaptor($self);
195
  }
196
197

  $sth->finish();
198
199
200
}


201
=head2 _objs_from_sth
Graham McVicker's avatar
Graham McVicker committed
202

203
204
205
206
  Arg [1]    : DBI statement handle $sth
               an exectuted DBI statement handle generated by selecting 
               the columns specified by _columns() from the table specified 
               by _table()
Graham McVicker's avatar
Graham McVicker committed
207
208
209
  Example    : @dna_dna_align_feats = $self->_obj_from_hashref
  Description: PROTECTED implementation of superclass abstract method. 
               Creates DnaDnaAlignFeature objects from a DBI hashref
210
  Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
Graham McVicker's avatar
Graham McVicker committed
211
212
  Exceptions : none
  Caller     : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch
213
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
214
215

=cut
216

217
sub _objs_from_sth {
218
  my ($self, $sth, $mapper, $dest_slice) = @_;
219

220
221
222
223
224
  #
  # This code is ugly because an attempt has been made to remove as many
  # function calls as possible for speed purposes.  Thus many caches and
  # a fair bit of gymnastics is used.
  #
225

226
227
  my $sa = $self->db()->get_SliceAdaptor();
  my $aa = $self->db->get_AnalysisAdaptor();
228

229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
  my @features;
  my %analysis_hash;
  my %slice_hash;
  my %sr_name_hash;
  my %sr_cs_hash;

  my($dna_align_feature_id, $seq_region_id, $analysis_id, $seq_region_start,
     $seq_region_end, $seq_region_strand, $hit_start, $hit_end, $hit_name,
     $hit_strand, $cigar_line, $evalue, $perc_ident, $score);

  $sth->bind_columns(
    \$dna_align_feature_id, \$seq_region_id, \$analysis_id, \$seq_region_start,
    \$seq_region_end, \$seq_region_strand, \$hit_start, \$hit_end, \$hit_name,
    \$hit_strand, \$cigar_line, \$evalue, \$perc_ident, \$score);

  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }
258

259
260
261
262
  my $dest_slice_start;
  my $dest_slice_end;
  my $dest_slice_strand;
  my $dest_slice_length;
263
  my $dest_slice_sr_name;
264
  my $dest_slice_seq_region_id;
265

266
267
268
269
270
  if($dest_slice) {
    $dest_slice_start  = $dest_slice->start();
    $dest_slice_end    = $dest_slice->end();
    $dest_slice_strand = $dest_slice->strand();
    $dest_slice_length = $dest_slice->length();
271
    $dest_slice_sr_name = $dest_slice->seq_region_name();
272
    $dest_slice_seq_region_id = $dest_slice->get_seq_region_id();
273
  }
274

275
276
277
278
  FEATURE: while($sth->fetch()) {
    #get the analysis object
    my $analysis = $analysis_hash{$analysis_id} ||=
      $aa->fetch_by_dbID($analysis_id);
279

280
281
    #get the slice object
    my $slice = $slice_hash{"ID:".$seq_region_id};
282

283
284
285
286
287
288
    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }
289

290
291
    my $sr_name = $sr_name_hash{$seq_region_id};
    my $sr_cs   = $sr_cs_hash{$seq_region_id};
292
293
294
295
296
297
    #
    # remap the feature coordinates to another coord system
    # if a mapper was provided
    #
    if($mapper) {

298
      ($seq_region_id,$seq_region_start,$seq_region_end,$seq_region_strand) =
299
300
301
302
        $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end,
                          $seq_region_strand, $sr_cs);

      #skip features that map to gaps or coord system boundaries
303
      next FEATURE if(!defined($seq_region_id));
304
305
306

      #get a slice in the coord system we just mapped to
      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
307
308
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
309
      } else {
310
311
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
312
313
      }
    }
314

315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
    #
    # If a destination slice was provided convert the coords
    # If the dest_slice starts at 1 and is foward strand, nothing needs doing
    #
    if($dest_slice) {
      if($dest_slice_start != 1 || $dest_slice_strand != 1) {
        if($dest_slice_strand == 1) {
          $seq_region_start = $seq_region_start - $dest_slice_start + 1;
          $seq_region_end   = $seq_region_end   - $dest_slice_start + 1;
        } else {
          my $tmp_seq_region_start = $seq_region_start;
          $seq_region_start = $dest_slice_end - $seq_region_end + 1;
          $seq_region_end   = $dest_slice_end - $tmp_seq_region_start + 1;
          $seq_region_strand *= -1;
        }

        #throw away features off the end of the requested slice
332
        if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
333
	  ( $dest_slice_seq_region_id ne $seq_region_id ))  {
334
335
336
337
          next FEATURE;
        }
      }
      $slice = $dest_slice;
338
    }
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356

    #finally, create the new dna align feature
    push @features, Bio::EnsEMBL::DnaDnaAlignFeature->new_fast
      ( { 'slice'         =>  $slice,
          'start'         =>  $seq_region_start,
          'end'           =>  $seq_region_end,
          'strand'        =>  $seq_region_strand,
          'hseqname'      =>  $hit_name,
          'hstart'        =>  $hit_start,
          'hend'          =>  $hit_end,
          'hstrand'       =>  $hit_strand,
          'score'         =>  $score,
          'p_value'       =>  $evalue,
          'percent_id'    =>  $perc_ident,
          'cigar_string'  =>  $cigar_line,
          'analysis'      =>  $analysis,
          'adaptor'       =>  $self,
          'dbID'          =>  $dna_align_feature_id } );
357
  }
358

359
360
  return \@features;
}
361

Glenn Proctor's avatar
Glenn Proctor committed
362
363
364
365
=head2 list_dbIDs

  Arg [1]    : none
  Example    : @feature_ids = @{$dna_align_feature_adaptor->list_dbIDs()};
366
367
  Description: Gets an array of internal ids for all dna align features in 
               the current db
368
  Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
Glenn Proctor's avatar
Glenn Proctor committed
369
370
371
  Returntype : list of ints
  Exceptions : none
  Caller     : ?
372
  Status     : Stable
Glenn Proctor's avatar
Glenn Proctor committed
373
374
375
376

=cut

sub list_dbIDs {
377
   my ($self, $ordered) = @_;
Glenn Proctor's avatar
Glenn Proctor committed
378

379
   return $self->_list_dbIDs("dna_align_feature",undef, $ordered);
Glenn Proctor's avatar
Glenn Proctor committed
380
381
}

382
383
384
1;