BaseFeatureAdaptor.pm 19.6 KB
Newer Older
1
#
Simon Potter's avatar
Simon Potter committed
2
# EnsEMBL module for Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
#
# Cared for by Ewan Birney <birney@ebi.ac.uk>
#
# Copyright Ewan Birney
#
# You may distribute this module under the same terms as perl itself

# POD documentation - main docs before the code

=head1 NAME

Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor - Abstract Base class for 
                                          FeatureAdaptors

=head1 SYNOPSIS

Abstract class should not be instantiated.  Implementation of
abstract methods must be performed by subclasses.

=head1 DESCRIPTION

This is a base adaptor for feature adaptors. This base class is simply a way
of eliminating code duplication through the implementation of methods 
common to all feature adaptors.

=head1 AUTHOR - Ewan Birney

Email birney@ebi.ac.uk

Describe contact details here

=head1 APPENDIX

The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _

=cut


# Let the code begin...


package Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
45
use vars qw(@ISA $SLICE_FEATURE_CACHE_SIZE);
46 47 48 49 50
use strict;

# Object preamble - inherits from Bio::EnsEMBL::Root

use Bio::EnsEMBL::DBSQL::BaseAdaptor;
51
use Bio::EnsEMBL::Utils::Cache;
52 53 54

@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);

55
$SLICE_FEATURE_CACHE_SIZE = 4;
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75


=head2 new

  Arg [1]    : list of args @args
               Superclass constructor arguments
  Example    : none
  Description: Constructor which just initializes internal cache structures
  Returntype : Bio::EnsEMBL::BaseFeatureAdaptor
  Exceptions : none
  Caller     : implementing subclass constructors

=cut

sub new {
  my ($class, @args) = @_;

  my $self = $class->SUPER::new(@args);

  #initialize caching data structures
76 77
  $self->{'_slice_feature_cache'} = {};

78 79 80 81 82 83
  tie(%{$self->{'_slice_feature_cache'}}, 
      'Bio::EnsEMBL::Utils::Cache',
      $SLICE_FEATURE_CACHE_SIZE);

  return $self;
}
84 85 86 87 88 89 90

=head2 generic_fetch

  Arg [1]    : (optional) string $constraint
               An SQL query constraint (i.e. part of the WHERE clause)
  Arg [2]    : (optional) string $logic_name
               the logic_name of the analysis of the features to obtain
91
  Example    : $fts = $a->generic_fetch('contig_id in (1234, 1235)', 'Swall');
92 93
  Description: Performs a database fetch and returns feature objects in
               contig coordinates.
94
  Returntype : listref of Bio::EnsEMBL::SeqFeature in contig coordinates
95 96 97 98
  Exceptions : none
  Caller     : BaseFeatureAdaptor, ProxyDnaAlignFeatureAdaptor::generic_fetch

=cut
99 100
  
sub generic_fetch {
101
  my ($self, $constraint, $logic_name, $mapper, $slice) = @_;
102 103 104 105 106 107
  
  my $tablename = $self->_tablename();
  my $columns = join(', ', $self->_columns());
  
  if($logic_name) {
    #determine the analysis id via the logic_name
108 109
    my $analysis = 
      $self->db->get_AnalysisAdaptor()->fetch_by_logic_name($logic_name);
110 111
    unless(defined $analysis && $analysis->dbID() ) {
      $self->warn("No analysis for logic name $logic_name exists\n");
Web Admin's avatar
Web Admin committed
112
      return [];
113 114 115 116 117 118 119 120 121 122
    }
    
    my $analysis_id = $analysis->dbID();
    
    if($constraint) {
      $constraint .= " AND analysis_id = $analysis_id";
    } else {
      $constraint = " analysis_id = $analysis_id";
    }
  } 
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
  
  my $sql = "SELECT $columns FROM $tablename ";

  my $default_where = $self->_default_where_clause;
  my $final_clause = $self->_final_clause;

  #append a where clause if it was defined
  if($constraint) { 
    $sql .= " where $constraint ";
    if($default_where) {
      $sql .= " and $default_where ";
    }
  } elsif($default_where) {
    $sql .= " where $default_where ";
  }

  #append additional clauses which may have been defined
  $sql .= " $final_clause";

142
  my $sth = $self->prepare($sql);
143 144 145
  
  $sth->execute;  

146
  return $self->_objs_from_sth($sth, $mapper, $slice);
147 148 149 150
}


=head2 fetch_by_dbID
151 152 153 154 155 156

  Arg [1]    : int $id
               the unique database identifier for the feature to be obtained 
  Example    : $feat = $adaptor->fetch_by_dbID(1234);
  Description: Returns the feature created from the database defined by the
               the id $id. 
157
  Returntype : Bio::EnsEMBL::SeqFeature
158 159
  Exceptions : thrown if $id is not defined
  Caller     : general
160 161 162 163 164 165 166 167 168 169 170 171 172

=cut

sub fetch_by_dbID{
  my ($self,$id) = @_;
  
  unless(defined $id) {
    $self->throw("fetch_by_dbID must have an id");
  }

  my $tablename = $self->_tablename();
  my $constraint = "${tablename}_id = $id";

173
  #return first element of _generic_fetch list
174
  my ($feat) = @{$self->generic_fetch($constraint)}; 
175
  return $feat;
176 177 178
}


179
=head2 fetch_all_by_RawContig_constraint
180

181 182
  Arg [1]    : Bio::EnsEMBL::RawContig $contig
               The contig object from which features are to be obtained
183 184 185 186
  Arg [2]    : (optional) string $constraint
               An SQL query constraint (i.e. part of the WHERE clause)
  Arg [3]    : (optional) string $logic_name
               the logic name of the type of features to obtain
187 188
  Example    : $fs = $a->fetch_all_by_Contig_constraint($ctg,'perc_ident>5.0');
  Description: Returns a listref of features created from the database which 
189 190 191
               are on the contig defined by $cid and fulfill the SQL constraint
               defined by $constraint. If logic name is defined, only features
               with an analysis of type $logic_name will be returned. 
192
  Returntype : listref of Bio::EnsEMBL::SeqFeature in contig coordinates
193 194
  Exceptions : thrown if $cid is not defined
  Caller     : general
195 196 197

=cut

198
sub fetch_all_by_RawContig_constraint {
199
  my ($self, $contig, $constraint, $logic_name) = @_;
200
  
201 202
  unless( defined $contig ) {
    $self->throw("fetch_by_Contig_constraint must have an contig");
203 204
  }

205 206 207 208 209 210
  unless( ref $contig && $contig->isa('Bio::EnsEMBL::RawContig')) {
    $self->throw("contig argument is not a Bio::EnsEMBL::RawContig object\n");
  }

  my $cid = $contig->dbID();

211 212 213 214 215 216
  if($constraint) {
    $constraint .= " AND contig_id = $cid";
  } else {
    $constraint = "contig_id = $cid";
  }

Alistair Rust's avatar
Alistair Rust committed
217
  return $self->generic_fetch($constraint, $logic_name);
218 219
}

220

Arne Stabenau's avatar
Arne Stabenau committed
221
=head2 fetch_all_by_RawContig
222

223 224
  Arg [1]    : Bio::EnsEMBL::RawContig $contig 
               the contig from which features should be obtained
225 226
  Arg [2]    : (optional) string $logic_name
               the logic name of the type of features to obtain
227
  Example    : @fts = $a->fetch_all_by_RawContig($contig, 'wall');
228 229 230 231
  Description: Returns a list of features created from the database which are 
               are on the contig defined by $cid If logic name is defined, 
               only features with an analysis of type $logic_name will be 
               returned. 
232
  Returntype : listref of Bio::EnsEMBL::*Feature in contig coordinates
233 234 235 236
  Exceptions : none
  Caller     : general

=cut
237
   
Arne Stabenau's avatar
Arne Stabenau committed
238 239 240
sub fetch_all_by_RawContig {
  my ( $self, $contig, $logic_name ) = @_;

241
  return $self->fetch_all_by_RawContig_constraint($contig, '',$logic_name);
242 243 244
}


245
=head2 fetch_all_by_RawContig_and_score
246 247
  Arg [1]    : Bio::EnsEMBL::RawContig $contig 
               the contig from which features should be obtained
248
  Arg [2]    : (optional) float $score
249 250 251
               the lower bound of the score of the features to obtain
  Arg [3]    : (optional) string $logic_name
               the logic name of the type of features to obtain
252
  Example    : @fts = $a->fetch_by_RawContig_and_score(1, 50.0, 'Swall');
253
  Description: Returns a list of features created from the database which are 
254
               are on the contig defined by $cid and which have score greater  
255 256
               than score.  If logic name is defined, only features with an 
               analysis of type $logic_name will be returned. 
257
  Returntype : listref of Bio::EnsEMBL::*Feature in contig coordinates
258 259 260 261 262
  Exceptions : thrown if $score is not defined
  Caller     : general

=cut

263
sub fetch_all_by_RawContig_and_score{
264
  my($self, $contig, $score, $logic_name) = @_;
265 266 267

  my $constraint;

268
  if(defined $score){
269 270 271
    $constraint = "score > $score";
  }
    
272
  return $self->fetch_all_by_RawContig_constraint($contig, $constraint, 
273
					       $logic_name);
274 275 276
}


277
=head2 fetch_all_by_Slice
278 279 280 281 282

  Arg [1]    : Bio::EnsEMBL::Slice $slice
               the slice from which to obtain features
  Arg [2]    : (optional) string $logic_name
               the logic name of the type of features to obtain
283 284 285 286 287 288
  Example    : $fts = $a->fetch_all_by_Slice($slice, 'Swall');
  Description: Returns a listref of features created from the database 
               which are on the Slice defined by $slice. If $logic_name is 
               defined only features with an analysis of type $logic_name 
               will be returned. 
  Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
289 290 291 292 293
  Exceptions : none
  Caller     : Bio::EnsEMBL::Slice

=cut

294
sub fetch_all_by_Slice {
295 296 297
  my ($self, $slice, $logic_name) = @_;
  
  #fetch by constraint with empty constraint
298
  return $self->fetch_all_by_Slice_constraint($slice, '', $logic_name);
299 300 301
}


302
=head2 fetch_all_by_Slice_and_score
303 304 305

  Arg [1]    : Bio::EnsEMBL::Slice $slice
               the slice from which to obtain features
306
  Arg [2]    : (optional) float $score
307 308 309
               lower bound of the the score of the features retrieved
  Arg [3]    : (optional) string $logic_name
               the logic name of the type of features to obtain
310
  Example    : $fts = $a->fetch_all_by_Slice($slice, 'Swall');
311 312 313 314 315
  Description: Returns a list of features created from the database which are 
               are on the Slice defined by $slice and which have a score 
               greated than $score. If $logic_name is defined, 
               only features with an analysis of type $logic_name will be 
               returned. 
316
  Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
317 318 319 320 321
  Exceptions : none
  Caller     : Bio::EnsEMBL::Slice

=cut

322
sub fetch_all_by_Slice_and_score {
323 324 325
  my ($self, $slice, $score, $logic_name) = @_;
  my $constraint;

326
  if(defined $score) {
327 328 329
    $constraint = "score > $score";
  }

330 331
  return $self->fetch_all_by_Slice_constraint($slice, $constraint, 
					      $logic_name);
332 333 334
}  


335
=head2 fetch_all_by_Slice_constraint
336

337 338 339 340 341
  Arg [1]    : Bio::EnsEMBL::Slice $slice
               the slice from which to obtain features
  Arg [2]    : (optional) string $constraint
               An SQL query constraint (i.e. part of the WHERE clause)
  Arg [3]    : (optional) string $logic_name
342
               the logic name of the type of features to obtain
343 344
  Example    : $fs = $a->fetch_all_by_Slice_constraint($slc, 'perc_ident > 5');
  Description: Returns a listref of features created from the database which 
345 346 347 348
               are on the Slice defined by $slice and fulfill the SQL 
               constraint defined by $constraint. If logic name is defined, 
               only features with an analysis of type $logic_name will be 
               returned. 
349
  Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
350 351
  Exceptions : thrown if $slice is not defined
  Caller     : Bio::EnsEMBL::Slice
352 353 354

=cut

355
sub fetch_all_by_Slice_constraint {
356
  my($self, $slice, $constraint, $logic_name) = @_;
357

358 359
  unless(defined $slice && ref $slice && $slice->isa("Bio::EnsEMBL::Slice")) {
    $self->throw("Slice arg must be a Bio::EnsEMBL::Slice not a [$slice]\n");
360 361
  }

362 363
  $logic_name = '' unless $logic_name;
  $constraint = '' unless $constraint;
364

365
  #check the cache and return if we have already done this query
Web Admin's avatar
Web Admin committed
366
  my $key = join($slice->name, $constraint, $logic_name);
367 368
  return $self->{'_slice_feature_cache'}{$key} 
    if $self->{'_slice_feature_cache'}{$key};
Web Admin's avatar
Web Admin committed
369
    
370 371 372 373
  my $slice_start  = $slice->chr_start();
  my $slice_end    = $slice->chr_end();
  my $slice_strand = $slice->strand();
		 
374 375
  my $mapper = 
    $self->db->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type);
376 377

  #get the list of contigs this slice is on
378
  my @cids = 
379
    $mapper->list_contig_ids( $slice->chr_name, $slice_start ,$slice_end );
380
  
Web Admin's avatar
Web Admin committed
381
  return [] unless scalar(@cids);
382 383 384

  my $cid_list = join(',',@cids);

385
  #construct the SQL constraint for the contig ids 
386 387 388 389 390
  if($constraint) {
    $constraint .= " AND contig_id IN ($cid_list)";
  } else {
    $constraint = "contig_id IN ($cid_list)";
  }
391

392
  #for speed the remapping to slice may be done at the time of object creation
393 394
  my $features = 
    $self->generic_fetch($constraint, $logic_name, $mapper, $slice); 
Graham McVicker's avatar
Graham McVicker committed
395
  
396 397
  if(@$features && (!$features->[0]->can('contig') || 
		    $features->[0]->contig == $slice)) {
Graham McVicker's avatar
Graham McVicker committed
398
    #features have been converted to slice coords already, cache and return
Web Admin's avatar
Web Admin committed
399
    return $self->{'_slice_feature_cache'}{$key} = $features;
400
  }
401

402 403
  #remapping has not been done, we have to do our own conversion from
  # raw contig coords to slice coords
404

405
  my @out = ();
406
  
407 408
  my ($feat_start, $feat_end, $feat_strand); 

409
  foreach my $f (@$features) {
410
    #since feats were obtained in contig coords, attached seq is a contig
411
    my $contig_id = $f->contig->dbID();
412

413
    my ($chr_name, $start, $end, $strand) = 
414 415
      $mapper->fast_to_assembly($contig_id, $f->start(), 
				$f->end(),$f->strand(),"rawcontig");
416

417
    # undefined start means gap
418
    next unless defined $start;     
419

420
    # maps to region outside desired area 
421
    next if ($start > $slice_end) || ($end < $slice_start);  
422 423
    
    #shift the feature start, end and strand in one call
424
    if($slice_strand == -1) {
425
      $f->move( $slice_end - $end + 1, $slice_end - $start + 1, $strand * -1 );
426 427 428 429
    } else {
      $f->move( $start - $slice_start + 1, $end - $slice_start + 1, $strand );
    }
    
430
    $f->contig($slice);
431
    
Web Admin's avatar
Web Admin committed
432
    push @out,$f;
433 434
  }
  
435
  #update the cache
Web Admin's avatar
Web Admin committed
436
  return $self->{'_slice_feature_cache'}{$key} = \@out;
437 438 439
}


440
=head2 store
441

442
  Arg [1]    : list of Bio::EnsEMBL::SeqFeature
443 444 445 446 447 448 449
  Example    : $adaptor->store(@feats);
  Description: ABSTRACT  Subclasses are responsible for implementing this 
               method.  It should take a list of features and store them in 
               the database.
  Returntype : none
  Exceptions : thrown method is not implemented by subclass
  Caller     : general
450 451 452 453 454 455 456 457 458 459

=cut

sub store{
  my $self = @_;

  $self->throw("Abstract method store not defined by implementing subclass\n");
}


460 461 462 463 464 465 466 467 468
=head2 remove

  Arg [1]    : A feature $feature 
  Example    : $feature_adaptor->remove($feature);
  Description: This removes a feature from the database.  The table the
               feature is removed from is defined by the abstract method
               _tablename, and the primary key of the table is assumed
               to be _tablename() . '_id'.  The feature argument must 
               be an object implementing the dbID method, and for the
469
               feature to be removed from the database a dbID value must
470 471 472 473 474 475 476 477
               be returned.
  Returntype : none
  Exceptions : thrown if $feature arg does not implement dbID(), or if 
               $feature->dbID is not a true value               
  Caller     : general

=cut

478

479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
sub remove {
  my ($self, $feature) = @_;

  unless($feature->can('dbID')) {
    $self->throw("Feature [$feature] does not implement method dbID");
  }

  unless($feature->dbID) {
    $self->warn("BaseFeatureAdaptor::remove - dbID not defined - " .
                "feature could not be removed");
  }

  my $table = $self->_tablename();

  my $sth = $self->prepare("DELETE FROM $table WHERE ${table}_id = ?");
  $sth->execute($feature->dbID());

  #unset the feature dbID
  $feature->dbID('');
  
  return;
}


503

504
=head2 remove_by_RawContig
505 506

  Arg [1]    : Bio::Ensembl::RawContig $contig 
507
  Example    : $feature_adaptor->remove_by_RawContig($contig);
508 509 510 511
  Description: This removes features from the database which lie on a removed
               contig.  The table the features are removed from is defined by 
               the abstract method_tablename, and the primary key of the table
               is assumed to be contig_id.
512
  Returntype : none
513
  Exceptions : thrown if no contig is supplied
514 515 516 517
  Caller     : general

=cut

518
sub remove_by_RawContig {
519
  my ($self, $contig) = @_;
520

521 522 523
  unless($contig) {
    $self->throw("BaseFeatureAdaptor::remove - no contig supplied: ".
		 "Deletion of features failed.");
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
  }

  my $table = $self->_tablename();

  # RepeatFeatureAdaptor returns 2 table names so need to so do some cleaning
  # up of the returned table name.  This has the form:
  #  table_name1 t1, table_name2 t2

  my @tables = split(/,/,$table);

  foreach my $table_name (@tables) {
    # Not pretty this but there is no need to delete anything from the 
    # repeat_consensus table
    next if ($table_name =~ /repeat_consensus/);

    my ($actual_table) = $table_name =~ /(^\w+)/;  # lose the table alias, if there is one

    my $sth = $self->prepare("DELETE FROM $actual_table WHERE contig_id = ?");
542
    $sth->execute($contig->dbID);
543 544 545 546 547 548 549
  }

  return;
}



550 551
=head2 _tablename

552 553 554 555 556 557 558 559
  Args       : none
  Example    : $tablename = $self->_table_name()
  Description: ABSTRACT PROTECTED Subclasses are responsible for implementing
               this method.  It should return the name of the table to be
               used to obtain features.  
  Returntype : string
  Exceptions : thrown if not implemented by subclass
  Caller     : BaseFeatureAdaptor::generic_fetch
560 561 562 563 564 565 566

=cut

sub _tablename {
  my $self = shift;

  $self->throw("abstract method _tablename not defined by implementing" .
Simon Potter's avatar
Simon Potter committed
567
               " subclass of BaseFeatureAdaptor");
568 569 570
  return undef;
}

571

572 573
=head2 _columns

574 575 576 577 578 579 580 581
  Args       : none
  Example    : $tablename = $self->_columns()
  Description: ABSTRACT PROTECTED Subclasses are responsible for implementing
               this method.  It should return a list of columns to be used
               for feature creation
  Returntype : list of strings
  Exceptions : thrown if not implemented by subclass
  Caller     : BaseFeatureAdaptor::generic_fetch
582 583 584 585 586 587 588

=cut

sub _columns {
  my $self = shift;

  $self->throw("abstract method _columns not defined by implementing" .
Simon Potter's avatar
Simon Potter committed
589
               " subclass of BaseFeatureAdaptor");
590 591 592
}


593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633
=head2 _default_where_clause

  Arg [1]    : none
  Example    : none
  Description: May be overridden to provide an additional where constraint to 
               the SQL query which is generated to fetch feature records.
               This constraint is always appended to the end of the generated
               where clause and thus may be used to add a join between tables
  Returntype : string
  Exceptions : none
  Caller     : generic_fetch

=cut

sub _default_where_clause {
  my $self = shift;

  return '';
}


=head2 _final_clause

  Arg [1]    : none
  Example    : none
  Description: May be overriden to provide an additional clause to the end
               of the SQL query used to fetch feature records.  
               This is useful to add a required ORDER BY clause to the 
               query for example.
  Returntype : string
  Exceptions : none
  Caller     : generic_fetch

=cut

sub _final_clause {
  my $self = shift;

  return '';
}

634
=head2 _objs_from_sth
635 636 637 638 639 640 641 642 643 644 645 646 647

  Arg [1]    : DBI::row_hashref $hashref containing key-value pairs 
               for each of the columns specified by the _columns method
  Example    : my @feats = $self->_obj_from_hashref
  Description: ABSTRACT PROTECTED The subclass is responsible for implementing
               this method.  It should take in a DBI row hash reference and
               return a list of created features in contig coordinates.
  Returntype : list of Bio::EnsEMBL::*Features in contig coordinates
  Exceptions : thrown if not implemented by subclass
  Caller     : BaseFeatureAdaptor::generic_fetch

=cut

648
sub _objs_from_sth {
649 650 651
  my $self = shift;

  $self->throw("abstract method _obj_from_hashref not defined by implementing"
Simon Potter's avatar
Simon Potter committed
652
             . " subclass of BaseFeatureAdaptor");
653 654
} 

655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674

=head2 deleteObj

  Arg [1]    : none
  Example    : none
  Description: Cleans up internal caches and references to other objects so
               that correct garbage collection may occur.
  Returntype : none
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBConnection::deleteObj

=cut

sub deleteObj {
  my $self = shift;

  #flush feature cache
  %{$self->{'_slice_feature_cache'}} = ();
}

675 676 677
1;