DnaAlignFeatureAdaptor.pm 11 KB
Newer Older
1
#
2
# EnsEMBL module for Bio::EnsEMBL::DnaAlignFeatureAdaptor
3
#
4
# Copyright (c) 2003 EnsEMBL
5
6
7
8
9
10
11
12
13
#
# You may distribute this module under the same terms as perl itself

=head1 NAME

Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor - Adaptor for DnaAlignFeatures

=head1 SYNOPSIS

Graham McVicker's avatar
Graham McVicker committed
14
    $dafa = $dbadaptor->get_DnaAlignFeatureAdaptor();
15

16
    @features = @{$dafa->fetch_by_Slice($slice)};
17

Graham McVicker's avatar
Graham McVicker committed
18
    $dafa->store(@features);
19
20
21

=head1 DESCRIPTION

22
23
24
25
This is an adaptor responsible for the retrieval and storage of 
DnaDnaAlignFeatures from the database. This adaptor inherits most of its 
functionality from the BaseAlignFeatureAdaptor and BaseFeatureAdaptor 
superclasses.
26

27
=head1 CONTACT
28

29
Post questions to the EnsEMBL development list <ensembl-dev@ebi.ac.uk>
30
31
32
33
34
35
36
37

=cut


package Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor;
use vars qw(@ISA);
use strict;

38
use Bio::EnsEMBL::DnaDnaAlignFeature;
39
use Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor;
40
use Bio::EnsEMBL::Utils::Exception qw(throw);
41

42
@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor);
43
44


45
=head2 _tables
Graham McVicker's avatar
Graham McVicker committed
46
47

  Args       : none
48
  Example    : @tabs = $self->_tables
Graham McVicker's avatar
Graham McVicker committed
49
  Description: PROTECTED implementation of the abstract method inherited from
50
51
               BaseFeatureAdaptor.  Returns list of [tablename, alias] pairs
  Returntype : list of listrefs of strings
Graham McVicker's avatar
Graham McVicker committed
52
53
54
55
56
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch

=cut

57
sub _tables {
58
  my $self = shift;
59

60
  return ['dna_align_feature', 'daf'];
61
62
}

Graham McVicker's avatar
Graham McVicker committed
63
64
65
66
67
68
69
70
71
72
73
74
75

=head2 _columns

  Args       : none
  Example    : @columns = $self->_columns
  Description: PROTECTED implementation of abstract superclass method.  
               Returns a list of columns that are needed for object creation.
  Returntype : list of strings
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::generic_fetch

=cut

76
77
sub _columns {
  my $self = shift;
78

79
  #warning, implementation of _objs_from_sth method depends on order of list
80
81
82
83
84
85
86
87
88
89
90
91
92
93
  return qw(daf.dna_align_feature_id
            daf.seq_region_id
            daf.analysis_id
            daf.seq_region_start
            daf.seq_region_end
            daf.seq_region_strand
            daf.hit_start
            daf.hit_end
            daf.hit_name
            daf.hit_strand
            daf.cigar_line
            daf.evalue
            daf.perc_ident
            daf.score);
94
}
95

96

97
=head2 store
98

99
  Arg [1]    : list of Bio::EnsEMBL::DnaAlignFeatures @feats
Graham McVicker's avatar
Graham McVicker committed
100
               the features to store in the database
101
  Example    : $dna_align_feature_adaptor->store(@features);
Graham McVicker's avatar
Graham McVicker committed
102
103
  Description: Stores a list of DnaAlignFeatures in the database
  Returntype : none
104
105
106
107
108
109
110
111
112
  Exceptions : throw if any of the provided features cannot be stored
               which may occur if:
                 * The feature does not have an associate Slice
                 * The feature does not have an associated analysis
                 * The Slice the feature is associated with is on a seq_region
                   unknown to this database
               A warning is given if:
                 * The feature has already been stored in this db
  Caller     : Pipeline
113
114
115

=cut

116
sub store {
117
118
119
  my ($self, @feats) = @_;

  throw("Must call store with features") if( scalar(@feats) == 0 );
120
121
122

  my @tabs = $self->_tables;
  my ($tablename) = @{$tabs[0]};
123
124
125
126
127
128
129

  my $db = $self->db();
  my $analysis_adaptor = $db->get_AnalysisAdaptor();

  my $sth = $self->prepare(
     "INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end,
                             seq_region_strand, hit_start, hit_end,
130
                             hit_strand, hit_name, cigar_line,
131
                             analysis_id, score, evalue, perc_ident)
132
     VALUES (?,?,?,?,?,?,?,?,?,?,?, ?, ?)");
133

134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
 FEATURE: foreach my $feat ( @feats ) {
    if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") ) {
      throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature,"
            . " not a [".ref($feat)."].");
    }

    if($feat->is_stored($db)) {
      warning("DnaDnaAlignFeature [".$feat->dbID."] is already stored" .
              " in this database.");
      next FEATURE;
    }

    my $hstart = $feat->hstart();
    my $hend   = $feat->hend();
    my $hstrand = $feat->hstrand();
    $self->_check_start_end_strand($hstart,$hend, $hstrand);

    my $cigar_string = $feat->cigar_string();
    if(!$cigar_string) {
      $cigar_string = $feat->length() . 'M';
      warning("DnaDnaAlignFeature does not define a cigar_string.\n" .
              "Assuming ungapped block with cigar_line=$cigar_string .");
156
    }
157
158
159
160
161
162
163
164

    my $hseqname = $feat->hseqname();
    if(!$hseqname) {
      throw("DnaDnaAlignFeature must define an hseqname.");
    }

    if(!defined($feat->analysis)) {
      throw("An analysis must be attached to the features to be stored.");
165
166
    }

167
168
169
    #store the analysis if it has not been stored yet
    if(!$feat->analysis->is_stored($db)) {
      $analysis_adaptor->store($feat->analysis());
170
    }
171

172
173
174
175
176
177
178
179
    my $original = $feat;
    my $seq_region_id;
    ($feat, $seq_region_id) = $self->_pre_store($feat);

    $sth->execute( $seq_region_id, $feat->start, $feat->end, $feat->strand,
		   $hstart, $hend, $hstrand, $hseqname,
		   $cigar_string, $feat->analysis->dbID, $feat->score,
		   $feat->p_value, $feat->percent_id);
180

181
182
    $original->dbID($sth->{'mysql_insertid'});
    $original->adaptor($self);
183
  }
184
185

  $sth->finish();
186
187
188
}


189
=head2 _objs_from_sth
Graham McVicker's avatar
Graham McVicker committed
190

191
192
193
194
  Arg [1]    : DBI statement handle $sth
               an exectuted DBI statement handle generated by selecting 
               the columns specified by _columns() from the table specified 
               by _table()
Graham McVicker's avatar
Graham McVicker committed
195
196
197
  Example    : @dna_dna_align_feats = $self->_obj_from_hashref
  Description: PROTECTED implementation of superclass abstract method. 
               Creates DnaDnaAlignFeature objects from a DBI hashref
198
  Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
Graham McVicker's avatar
Graham McVicker committed
199
200
201
202
  Exceptions : none
  Caller     : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch

=cut
203

204
sub _objs_from_sth {
205
  my ($self, $sth, $mapper, $dest_slice) = @_;
206

207
208
209
210
211
  #
  # This code is ugly because an attempt has been made to remove as many
  # function calls as possible for speed purposes.  Thus many caches and
  # a fair bit of gymnastics is used.
  #
212

213
214
  my $sa = $self->db()->get_SliceAdaptor();
  my $aa = $self->db->get_AnalysisAdaptor();
215

216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
  my @features;
  my %analysis_hash;
  my %slice_hash;
  my %sr_name_hash;
  my %sr_cs_hash;

  my($dna_align_feature_id, $seq_region_id, $analysis_id, $seq_region_start,
     $seq_region_end, $seq_region_strand, $hit_start, $hit_end, $hit_name,
     $hit_strand, $cigar_line, $evalue, $perc_ident, $score);

  $sth->bind_columns(
    \$dna_align_feature_id, \$seq_region_id, \$analysis_id, \$seq_region_start,
    \$seq_region_end, \$seq_region_strand, \$hit_start, \$hit_end, \$hit_name,
    \$hit_strand, \$cigar_line, \$evalue, \$perc_ident, \$score);

  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }
245

246
247
248
249
250
251
252
253
254
255
  my $dest_slice_start;
  my $dest_slice_end;
  my $dest_slice_strand;
  my $dest_slice_length;
  if($dest_slice) {
    $dest_slice_start  = $dest_slice->start();
    $dest_slice_end    = $dest_slice->end();
    $dest_slice_strand = $dest_slice->strand();
    $dest_slice_length = $dest_slice->length();
  }
256

257
258
259
260
  FEATURE: while($sth->fetch()) {
    #get the analysis object
    my $analysis = $analysis_hash{$analysis_id} ||=
      $aa->fetch_by_dbID($analysis_id);
261

262
263
    #get the slice object
    my $slice = $slice_hash{"ID:".$seq_region_id};
264

265
266
267
268
269
270
    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }
271

272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
    #
    # remap the feature coordinates to another coord system
    # if a mapper was provided
    #
    if($mapper) {
      my $sr_name = $sr_name_hash{$seq_region_id};
      my $sr_cs   = $sr_cs_hash{$seq_region_id};

      ($sr_name,$seq_region_start,$seq_region_end,$seq_region_strand) =
        $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end,
                          $seq_region_strand, $sr_cs);

      #skip features that map to gaps or coord system boundaries
      next FEATURE if(!defined($sr_name));

      #get a slice in the coord system we just mapped to
      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
        $slice = $slice_hash{"NAME:$sr_name:$cmp_cs_name:$cmp_cs_vers"} ||=
          $sa->fetch_by_region($cmp_cs_name, $sr_name,undef, undef, undef,
                               $cmp_cs_vers);
292
      } else {
293
294
295
        $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||=
          $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef,
                               $asm_cs_vers);
296
297
      }
    }
298

299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
    #
    # If a destination slice was provided convert the coords
    # If the dest_slice starts at 1 and is foward strand, nothing needs doing
    #
    if($dest_slice) {
      if($dest_slice_start != 1 || $dest_slice_strand != 1) {
        if($dest_slice_strand == 1) {
          $seq_region_start = $seq_region_start - $dest_slice_start + 1;
          $seq_region_end   = $seq_region_end   - $dest_slice_start + 1;
        } else {
          my $tmp_seq_region_start = $seq_region_start;
          $seq_region_start = $dest_slice_end - $seq_region_end + 1;
          $seq_region_end   = $dest_slice_end - $tmp_seq_region_start + 1;
          $seq_region_strand *= -1;
        }

        #throw away features off the end of the requested slice
        if($seq_region_end < 1 || $seq_region_start > $dest_slice_length) {
          next FEATURE;
        }
      }
      $slice = $dest_slice;
321
    }
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339

    #finally, create the new dna align feature
    push @features, Bio::EnsEMBL::DnaDnaAlignFeature->new_fast
      ( { 'slice'         =>  $slice,
          'start'         =>  $seq_region_start,
          'end'           =>  $seq_region_end,
          'strand'        =>  $seq_region_strand,
          'hseqname'      =>  $hit_name,
          'hstart'        =>  $hit_start,
          'hend'          =>  $hit_end,
          'hstrand'       =>  $hit_strand,
          'score'         =>  $score,
          'p_value'       =>  $evalue,
          'percent_id'    =>  $perc_ident,
          'cigar_string'  =>  $cigar_line,
          'analysis'      =>  $analysis,
          'adaptor'       =>  $self,
          'dbID'          =>  $dna_align_feature_id } );
340
  }
341

342
343
  return \@features;
}
344

Glenn Proctor's avatar
Glenn Proctor committed
345
346
347
348
=head2 list_dbIDs

  Arg [1]    : none
  Example    : @feature_ids = @{$dna_align_feature_adaptor->list_dbIDs()};
349
350
  Description: Gets an array of internal ids for all dna align features in 
               the current db
Glenn Proctor's avatar
Glenn Proctor committed
351
352
353
354
355
356
357
358
359
360
361
362
  Returntype : list of ints
  Exceptions : none
  Caller     : ?

=cut

sub list_dbIDs {
   my ($self) = @_;

   return $self->_list_dbIDs("dna_align_feature");
}

363
364
365
1;