ProteinAlignFeatureAdaptor.pm 12 KB
Newer Older
1
=head1 LICENSE
2

3
  Copyright (c) 1999-2010 The European Bioinformatics Institute and
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <ensembl-dev@ebi.ac.uk>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut
20 21 22

=head1 NAME

23 24
Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor - 
Adaptor for ProteinAlignFeatures
25 26 27

=head1 SYNOPSIS

28 29
  $pafa =
    $registry->get_adaptor( 'Human', 'Core', 'ProteinAlignFeature' );
30

31
  my @features = @{ $pafa->fetch_all_by_Slice($slice) };
32

33
  $pafa->store(@features);
34

35
=head1 METHODS
36 37 38 39 40 41 42 43

=cut


package Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor;
use vars qw(@ISA);
use strict;

44
use Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor;
45
use Bio::EnsEMBL::DnaPepAlignFeature;
Laura Clarke's avatar
 
Laura Clarke committed
46
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
47
@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor);
48 49


50 51
=head2 store

52 53 54
  Arg [1]    : list of Bio::EnsEMBL::DnaPepAlignFeature @feats
  Example    : $protein_align_feature_adaptor->store(@feats);
  Description: stores a list of ProteinAlignFeatures in the database
55
  Returntype : none
56 57 58 59 60 61 62 63 64
  Exceptions : throw if any of the provided features cannot be stored
               which may occur if:
                 * The feature does not have an associated Slice
                 * The feature does not have an associated analysis
                 * The Slice the feature is associated with is on a seq_region
                   unknown to this database
              A warning is given if:
                 * The feature has already been stored in this db
  Caller     : Pipeline
65
  Status     : Stable
66 67 68

=cut

69

70
sub store{
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
 my ($self, @feats) = @_;

  throw("Must call store with features") if( scalar(@feats) == 0 );

  my @tabs = $self->_tables;
  my ($tablename) = @{$tabs[0]};

  my $db = $self->db();
  my $slice_adaptor = $db->get_SliceAdaptor();
  my $analysis_adaptor = $db->get_AnalysisAdaptor();

  my $sth = $self->prepare(
     "INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end,
                             seq_region_strand, hit_start, hit_end,
                             hit_name, cigar_line,
86 87
                             analysis_id, score, evalue, perc_ident, external_db_id, hcoverage)
     VALUES (?,?,?,?,?,?,?,?,?,?, ?, ?, ?, ?)");
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111

 FEATURE: foreach my $feat ( @feats ) {
   if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaPepAlignFeature") ) {
     throw("feature must be a Bio::EnsEMBL::DnaPepAlignFeature,"
           . " not a [".ref($feat)."].");
   }

   if($feat->is_stored($db)) {
     warning("PepDnaAlignFeature [".$feat->dbID."] is already stored" .
             " in this database.");
     next FEATURE;
   }

   #sanity check the hstart and hend
   my $hstart  = $feat->hstart();
   my $hend    = $feat->hend();
   $self->_check_start_end_strand($hstart,$hend,1);

   my $cigar_string = $feat->cigar_string();
   if(!$cigar_string) {
     $cigar_string = $feat->length() . 'M';
     warning("DnaPepAlignFeature does not define a cigar_string.\n" .
             "Assuming ungapped block with cigar_string=$cigar_string\n");
   }
112

113 114 115
   my $hseqname = $feat->hseqname();
   if(!$hseqname) {
     throw("DnaPepAlignFeature must define an hseqname.");
116 117
   }

118 119
   if(!defined($feat->analysis)) {
     throw("An analysis must be attached to the features to be stored.");
120
   }
121 122 123 124 125 126 127

   #store the analysis if it has not been stored yet
   if(!$feat->analysis->is_stored($db)) {
     $analysis_adaptor->store($feat->analysis());
   }

   my $slice = $feat->slice();
Ian Longden's avatar
Ian Longden committed
128
   if(!defined($slice) || !($slice->isa("Bio::EnsEMBL::Slice") or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
129 130 131 132 133 134 135
     throw("A slice must be attached to the features to be stored.");
   }

   my $original = $feat;
   my $seq_region_id;
   ($feat, $seq_region_id) = $self->_pre_store($feat);

136 137 138 139 140 141 142 143 144 145 146 147
   $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
   $sth->bind_param(2,$feat->start,SQL_INTEGER);
   $sth->bind_param(3,$feat->end,SQL_INTEGER);
   $sth->bind_param(4,$feat->strand,SQL_TINYINT);
   $sth->bind_param(5,$feat->hstart,SQL_INTEGER);
   $sth->bind_param(6,$feat->hend,SQL_INTEGER);
   $sth->bind_param(7,$feat->hseqname,SQL_VARCHAR);
   $sth->bind_param(8,$feat->cigar_string,SQL_LONGVARCHAR);
   $sth->bind_param(9,$feat->analysis->dbID,SQL_INTEGER);
   $sth->bind_param(10,$feat->score,SQL_DOUBLE);
   $sth->bind_param(11,$feat->p_value,SQL_DOUBLE);
   $sth->bind_param(12,$feat->percent_id,SQL_REAL);
148 149
   $sth->bind_param(13,$feat->external_db_id,SQL_INTEGER);
   $sth->bind_param(14,$feat->hcoverage,SQL_DOUBLE);
150 151

   $sth->execute();
152 153
   $original->dbID($sth->{'mysql_insertid'});
   $original->adaptor($self);
154
 }
155

156 157 158
 $sth->finish();
}

159

160
=head2 _objs_from_sth
161

162 163 164 165 166 167 168 169 170 171
  Arg [1]    : DBI statement handle $sth
               an exectuted DBI statement handle generated by selecting 
               the columns specified by _columns() from the table specified 
               by _table()
  Example    : @dna_dna_align_feats = $self->_obj_from_hashref
  Description: PROTECTED implementation of superclass abstract method. 
               Creates DnaDnaAlignFeature objects from a DBI hashref
  Returntype : listref of Bio::EnsEMBL::ProteinAlignFeatures
  Exceptions : none
  Caller     : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch
172
  Status     : Stable
173

174
=cut
175

176
sub _objs_from_sth {
177
  my ($self, $sth, $mapper, $dest_slice) = @_;
178

179 180 181 182 183
  #
  # This code is ugly because an attempt has been made to remove as many
  # function calls as possible for speed purposes.  Thus many caches and
  # a fair bit of gymnastics is used.
  #
184

185 186
  my $sa = $self->db()->get_SliceAdaptor();
  my $aa = $self->db->get_AnalysisAdaptor();
187

188 189 190 191 192 193 194 195
  my @features;
  my %analysis_hash;
  my %slice_hash;
  my %sr_name_hash;
  my %sr_cs_hash;

  my ($protein_align_feature_id, $seq_region_id, $seq_region_start,
      $seq_region_end, $analysis_id, $seq_region_strand, $hit_start,
196
      $hit_end, $hit_name, $cigar_line, $evalue, $perc_ident, $score,
197
      $external_db_id, $hcoverage, $external_db_name, $external_display_db_name );
198 199 200 201

  $sth->bind_columns(\$protein_align_feature_id, \$seq_region_id,
           \$seq_region_start,\$seq_region_end, \$analysis_id,
           \$seq_region_strand, \$hit_start,\$hit_end, \$hit_name,
202
           \$cigar_line, \$evalue, \$perc_ident, \$score,
203
           \$external_db_id, \$hcoverage, \$external_db_name, \$external_display_db_name );
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218

  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }
219

220 221 222 223
  my $dest_slice_start;
  my $dest_slice_end;
  my $dest_slice_strand;
  my $dest_slice_length;
224
  my $dest_slice_sr_name;
225
  my $dest_slice_sr_id;
226 227 228 229 230
  if($dest_slice) {
    $dest_slice_start  = $dest_slice->start();
    $dest_slice_end    = $dest_slice->end();
    $dest_slice_strand = $dest_slice->strand();
    $dest_slice_length = $dest_slice->length();
231
    $dest_slice_sr_name = $dest_slice->seq_region_name();
232
    $dest_slice_sr_id  = $dest_slice->get_seq_region_id();
233
  }
234

235 236 237 238
  FEATURE: while($sth->fetch()) {
    #get the analysis object
    my $analysis = $analysis_hash{$analysis_id} ||=
      $aa->fetch_by_dbID($analysis_id);
239

240 241
    #get the slice object
    my $slice = $slice_hash{"ID:".$seq_region_id};
242

243 244 245 246 247 248
    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }
249

250 251
    my $sr_name = $sr_name_hash{$seq_region_id};
    my $sr_cs   = $sr_cs_hash{$seq_region_id};
252 253 254 255 256 257
    #
    # remap the feature coordinates to another coord system
    # if a mapper was provided
    #
    if($mapper) {

258
      ($seq_region_id,$seq_region_start,$seq_region_end,$seq_region_strand) =
259 260 261 262
        $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end,
                          $seq_region_strand, $sr_cs);

      #skip features that map to gaps or coord system boundaries
263 264 265 266 267 268 269 270 271 272 273
      next FEATURE if(!defined($seq_region_id));

 #     #get a slice in the coord system we just mapped to
 #     if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
#      } else {
#        $slice = $slice_hash{"ID:".$seq_region_id} ||=
#          $sa->fetch_by_seq_region_id($asm_cs_name, $sr_name, undef, undef, undef,
#                               $asm_cs_vers);
#      }
274
    }
275

276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
    #
    # If a destination slice was provided convert the coords
    # If the dest_slice starts at 1 and is foward strand, nothing needs doing
    #
    if($dest_slice) {
      if($dest_slice_start != 1 || $dest_slice_strand != 1) {
        if($dest_slice_strand == 1) {
          $seq_region_start = $seq_region_start - $dest_slice_start + 1;
          $seq_region_end   = $seq_region_end   - $dest_slice_start + 1;
        } else {
          my $tmp_seq_region_start = $seq_region_start;
          $seq_region_start = $dest_slice_end - $seq_region_end + 1;
          $seq_region_end   = $dest_slice_end - $tmp_seq_region_start + 1;
          $seq_region_strand *= -1;
        }
291 292 293 294
      }
       
      #throw away features off the end of the requested slice
      if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
295
	( $dest_slice_sr_id ne $seq_region_id )) {
296
	next FEATURE;
297 298
      }
      $slice = $dest_slice;
299
    }
300

301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
    # Finally, create the new ProteinAlignFeature.
    push(
      @features,
      $self->_create_feature_fast(
        'Bio::EnsEMBL::DnaPepAlignFeature', {
          'slice'        => $slice,
          'start'        => $seq_region_start,
          'end'          => $seq_region_end,
          'strand'       => $seq_region_strand,
          'hseqname'     => $hit_name,
          'hstart'       => $hit_start,
          'hend'         => $hit_end,
          'hstrand'      => 1,                  # dna_pep_align features
                                                # are always hstrand 1
          'score'        => $score,
          'p_value'      => $evalue,
          'percent_id'   => $perc_ident,
          'cigar_string' => $cigar_line,
          'analysis'     => $analysis,
          'adaptor'      => $self,
          'dbID'           => $protein_align_feature_id,
          'external_db_id' => $external_db_id,
323 324 325
          'hcoverage'      => $hcoverage,
	  'dbname'         => $external_db_name,
	  'db_display_name' => $external_display_db_name
326 327
        } ) );

328
  }
329

330
  return \@features;
331
}
332 333


334

335
sub _tables {
336 337
  my $self = shift;

338
  return (['protein_align_feature', 'paf'], ['external_db','exdb']);
339 340
}

341

342 343
sub _columns {
  my $self = shift;
344 345

  #warning _objs_from_hashref method depends on ordering of this list 
346 347 348 349 350 351 352 353 354 355 356 357
  return qw( paf.protein_align_feature_id
             paf.seq_region_id
             paf.seq_region_start
             paf.seq_region_end
             paf.analysis_id
             paf.seq_region_strand
             paf.hit_start
             paf.hit_end
             paf.hit_name
             paf.cigar_line
             paf.evalue
             paf.perc_ident
358 359
             paf.score
             paf.external_db_id
360 361 362 363 364 365 366
             paf.hcoverage
	     exdb.db_name
	     exdb.db_display_name);
}

sub _left_join{
    return (['external_db',"exdb.external_db_id = paf.external_db_id"]);
367 368
}

Glenn Proctor's avatar
Glenn Proctor committed
369 370 371 372
=head2 list_dbIDs

  Arg [1]    : none
  Example    : @feature_ids = @{$protein_align_feature_adaptor->list_dbIDs()};
373 374
  Description: Gets an array of internal ids for all protein align 
               features in the current db
375
  Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
376
  Returntype : listref of ints
Glenn Proctor's avatar
Glenn Proctor committed
377 378
  Exceptions : none
  Caller     : ?
379
  Status     : Stable
Glenn Proctor's avatar
Glenn Proctor committed
380 381 382 383

=cut

sub list_dbIDs {
384
   my ($self,$ordered) = @_;
Glenn Proctor's avatar
Glenn Proctor committed
385

386
   return $self->_list_dbIDs("protein_align_feature", undef, $ordered);
Glenn Proctor's avatar
Glenn Proctor committed
387
}
388 389 390 391 392 393






394
1;