AssemblyExceptionFeatureAdaptor.pm 18.5 KB
Newer Older
1 2
=head1 LICENSE

3
  Copyright (c) 1999-2010 The European Bioinformatics Institute and
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <ensembl-dev@ebi.ac.uk>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut
20 21 22

=head1 NAME

23
Bio::EnsEMBL::DBSQL::AssemblyExceptionFeatureAdaptor
24 25 26

=head1 SYNOPSIS

27
  my $assembly_exception_feature_adaptor =
28
    $database_adaptor->get_AssemblyExceptionFeatureAdaptor();
29

30 31
  @assembly_exception_features =
    $assembly_exception_feature_adaptor->fetch_all_by_Slice($slice);
32 33 34

=head1 DESCRIPTION

35 36
Assembly Exception Feature Adaptor - database access for assembly
exception features.
37

38
=head1 METHODS
39 40 41

=cut

42 43
package Bio::EnsEMBL::DBSQL::AssemblyExceptionFeatureAdaptor;

44
use strict;
45 46
use warnings;
no warnings qw(uninitialized);
47

48
use Bio::EnsEMBL::DBSQL::BaseAdaptor;
49 50 51
use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
use Bio::EnsEMBL::AssemblyExceptionFeature;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
52 53 54 55 56 57
use Bio::EnsEMBL::Utils::Cache;

our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);

# set the number of slices you'd like to cache
our $ASSEMBLY_EXCEPTION_FEATURE_CACHE_SIZE = 100;
58

59
=head2 new
60

61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
  Arg [1]    : list of args @args
               Superclass constructor arguments
  Example    : none
  Description: Constructor which just initializes internal cache structures
  Returntype : Bio::EnsEMBL::DBSQL::AssemblyExceptionFeatureAdaptor
  Exceptions : none
  Caller     : implementing subclass constructors
  Status     : Stable

=cut

sub new {
  my $caller = shift;
  my $class = ref($caller) || $caller;

  my $self = $class->SUPER::new(@_);
77

78 79 80 81 82 83 84 85 86
  # initialize an LRU cache for slices
  my %cache;
  tie(%cache, 'Bio::EnsEMBL::Utils::Cache',
    $ASSEMBLY_EXCEPTION_FEATURE_CACHE_SIZE);

  $self->{'_aexc_slice_cache'} = \%cache;

  return $self;
}
87 88 89 90 91 92 93 94

=head2 fetch_all

  Arg [1]    : none
  Example    : my @axfs = @{$axfa->fetch_all()};
  Description: Retrieves all assembly exception features which are in the
               database and builds internal caches of the features.
  Returntype : reference to list of Bio::EnsEMBL::AssemblyExceptionFeatures
95
  Exceptions : none
96
  Caller     : fetch_by_dbID, fetch_by_Slice
97
  Status     : Stable
98 99 100

=cut

101 102
sub fetch_all {
  my $self = shift;
103

104 105 106 107 108
  # this is the "global" cache for all assembly exception features in the db
  if(defined($self->{'_aexc_cache'})) {
    return $self->{'_aexc_cache'};
  }

109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
  my $statement = qq(
  SELECT    ae.assembly_exception_id,
            ae.seq_region_id,
            ae.seq_region_start,
            ae.seq_region_end,
            ae.exc_type,
            ae.exc_seq_region_id,
            ae.exc_seq_region_start,
            ae.exc_seq_region_end,
            ae.ori
  FROM      assembly_exception ae,
            coord_system cs,
            seq_region sr
  WHERE     cs.species_id = ?
    AND     sr.coord_system_id = cs.coord_system_id
    AND     sr.seq_region_id = ae.seq_region_id);

  my $sth = $self->prepare($statement);

  $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
129

130
  $sth->execute();
131

132 133
  my ($ax_id, $sr_id, $sr_start, $sr_end,
      $x_type, $x_sr_id, $x_sr_start, $x_sr_end, $ori);
134

135 136
  $sth->bind_columns(\$ax_id, \$sr_id, \$sr_start, \$sr_end,
                     \$x_type, \$x_sr_id, \$x_sr_start, \$x_sr_end, \$ori);
137

138 139
  my @features;
  my $sa = $self->db()->get_SliceAdaptor();
140

141
  $self->{'_aexc_dbID_cache'} = {};
142

143 144 145
  while($sth->fetch()) {
    my $slice   = $sa->fetch_by_seq_region_id($sr_id);
    my $x_slice = $sa->fetch_by_seq_region_id($x_sr_id);
146

147 148
    # each row creates TWO features, each of which has alternate_slice
    # pointing to the "other" one
149

150
   
151 152 153 154 155 156 157 158 159
    my $a = Bio::EnsEMBL::AssemblyExceptionFeature->new
          ('-dbID'            => $ax_id,
           '-start'           => $sr_start,
           '-end'             => $sr_end,
           '-strand'          => 1,
           '-adaptor'         => $self,
           '-slice'           => $slice,
           '-alternate_slice' => $x_slice->sub_Slice($x_sr_start, $x_sr_end),
           '-type'            => $x_type);
160
   
161
    push @features, $a;
162
    $self->{'_aexc_dbID_cache'}->{$ax_id} = $a;
163 164 165 166 167 168 169 170 171

    push @features, Bio::EnsEMBL::AssemblyExceptionFeature->new
          ('-dbID'            => $ax_id,
           '-start'           => $x_sr_start,
           '-end'             => $x_sr_end,
           '-strand'          => 1,
           '-adaptor'         => $self,
           '-slice'           => $x_slice,
           '-alternate_slice' => $slice->sub_Slice($sr_start, $sr_end),
172
           '-type'            => "$x_type REF" );
173 174 175
  }

  $sth->finish();
176 177

  $self->{'_aexc_cache'} = \@features;
178
  
179 180 181
  return \@features;
}

182

183 184 185 186 187 188 189 190 191
=head2 fetch_by_dbID

  Arg [1]    : int $dbID
  Example    : my $axf = $axfa->fetch_by_dbID(3);
  Description: Retrieves a single assembly exception feature via its internal
               identifier.  Note that this only retrieves one of the two
               assembly exception features which are represented by a single
               row in the assembly_exception table.
  Returntype : Bio::EnsEMBL::AssemblyExceptionFeature
192
  Exceptions : none
193
  Caller     : general
194
  Status     : Stable
195 196 197

=cut

198
sub fetch_by_dbID {
199
  my $self = shift;
200 201
  my $dbID = shift;

202
  if(!exists($self->{'_aexc_dbID_cache'})) {
203 204 205
    # force loading of cache
    $self->fetch_all();
  }
206

207
  return $self->{'_aexc_dbID_cache'}->{$dbID};
208 209 210
}


211
=head2 fetch_all_by_Slice
212

213 214 215 216 217 218
  Arg [1]    : Bio::EnsEMBL::Slice $slice
  Example    : my @axfs = @{$axfa->fetch_all_by_Slice($slice)};
  Description: Retrieves all assembly exception features which overlap the
               provided slice.  The returned features will be in coordinate
               system of the slice.
  Returntype : reference to list of Bio::EnsEMBL::AssemblyException features
219
  Exceptions : none
220
  Caller     : Feature::get_all_alt_locations, general
221
  Status     : Stable
222 223 224

=cut

225 226 227
sub fetch_all_by_Slice {
  my $self = shift;
  my $slice = shift;
228

229
  my $key= uc($slice->name());
230

231 232 233
  # return features from the slice cache if present
  if(exists($self->{'_aexc_slice_cache'}->{$key})) {
    return $self->{'_aexc_slice_cache'}->{$key};
234
  }
235

236
  my $all_features = $self->fetch_all();
237

238 239
  my $mcc = $self->db()->get_MetaCoordContainer();
  my $css = $mcc->fetch_all_CoordSystems_by_feature_type('assembly_exception');
240

241
  my @features;
242

243
  my $ma = $self->db()->get_AssemblyMapperAdaptor();
244

245 246 247 248 249 250 251
  foreach my $cs (@$css) {
    my $mapper;
    if($cs->equals($slice->coord_system)) {
      $mapper = undef;
    } else {
      $mapper = $ma->fetch_by_CoordSystems($cs,$slice->coord_system());
    }
252

253
    push @features, @{ $self->_remap($all_features, $mapper, $slice) };
254 255
  }

256
  $self->{'_aexc_slice_cache'}->{$key} = \@features;
257

258 259 260 261
  return \@features;
}


262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
#
# Given a list of features checks if they are in the correct coord system
# by looking at the first features slice.  If they are not then they are
# converted and placed on the slice.
#
# Note that this is a re-implementation of a method with the same name in
# BaseFeatureAdaptor, and in contrast to the latter which maps features in
# place, this method returns a remapped copy of each feature. The reason for
# this is to get around conflicts with caching.
#
sub _remap {
  my ($self, $features, $mapper, $slice) = @_;

  # check if any remapping is actually needed
  if(@$features && (!$features->[0]->isa('Bio::EnsEMBL::Feature') ||
                    $features->[0]->slice == $slice)) {
    return $features;
  }

  # remapping has not been done, we have to do our own conversion from
  # to slice coords

  my @out;

  my $slice_start = $slice->start();
  my $slice_end   = $slice->end();
  my $slice_strand = $slice->strand();
  my $slice_cs    = $slice->coord_system();

  my ($seq_region, $start, $end, $strand);

  my $slice_seq_region = $slice->seq_region_name();

  foreach my $f (@$features) {
    # since feats were obtained in contig coords, attached seq is a contig
    my $fslice = $f->slice();
    if(!$fslice) {
      throw("Feature does not have attached slice.\n");
    }
    my $fseq_region = $fslice->seq_region_name();
    my $fcs = $fslice->coord_system();

    if(!$slice_cs->equals($fcs)) {
      # slice of feature in different coord system, mapping required
      ($seq_region, $start, $end, $strand) =
        $mapper->fastmap($fseq_region,$f->start(),$f->end(),$f->strand(),$fcs);

      # undefined start means gap
      next if(!defined $start);
    } else {
      $start      = $f->start();
      $end        = $f->end();
      $strand     = $f->strand();
      $seq_region = $f->slice->seq_region_name();
    }

    # maps to region outside desired area
    next if ($start > $slice_end) || ($end < $slice_start) || 
      ($slice_seq_region ne $seq_region);

    # create new copies of successfully mapped feaatures with shifted start,
    # end and strand
    my ($new_start, $new_end);
    if($slice_strand == -1) {
      $new_start = $slice_end - $end + 1;
      $new_end = $slice_end - $start + 1;
    } else {
      $new_start = $start - $slice_start + 1;
      $new_end = $end - $slice_start + 1;
    }
    
    push @out, Bio::EnsEMBL::AssemblyExceptionFeature->new(
            '-dbID'            => $f->dbID,
            '-start'           => $new_start,
            '-end'             => $new_end,
            '-strand'          => $strand * $slice_strand,
            '-adaptor'         => $self,
            '-slice'           => $slice,
            '-alternate_slice' => $f->alternate_slice,
            '-type'            => $f->type,
    );
  }

  return \@out;
}

348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
=head2 store

    Arg[1]       : Bio::EnsEMBL::AssemblyException $asx
    Arg[2]       : Bio::EnsEMBL::AssemblyException $asx2

    Example      : $asx = Bio::EnsEMBL::AssemblyExceptionFeature->new(...)
                   $asx2 = Bio::EnsEMBL::AssemblyExceptionFeature->new(...)
                   $asx_seq_region_id = $asx_adaptor->store($asx);
    Description:  This stores a assembly exception feature in the 
                  assembly_exception table and returns the assembly_exception_id.
                  Needs 2 features: one pointing to the Assembly_exception, and the
                  other pointing to the region in the reference that is being mapped to
                  Will check that start, end and type are defined, and the alternate
                  slice is present as well.
    ReturnType:   int
    Exceptions:   throw if assembly exception not defined (needs start, end,
		  type and alternate_slice) of if $asx not a Bio::EnsEMBL::AssemblyException
    Caller:       general
    Status:       Stable
=cut

Ian Longden's avatar
Ian Longden committed
369
#    use Data::Dumper;
370 371 372 373 374 375

sub store{
    my $self = shift;
    my $asx = shift;
    my $asx2 = shift;

Ian Longden's avatar
Ian Longden committed
376

377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
    if (! $asx->isa('Bio::EnsEMBL::AssemblyExceptionFeature')){
	throw("$asx is not a Ensembl assemlby exception -- not stored");
    }
    #if already present, return ID in the database
    my $db = $self->db();
    if ($asx->is_stored($db)){
	return $asx->dbID();
    }
    #do some checkings for the object
    #at the moment, the orientation is always 1
    if (! $asx->start || ! $asx->end ){
	throw("Assembly exception does not have coordinates");
    }
    if ($asx->type !~ /PAR|HAP/){
	throw("Only types of assembly exception features valid are PAR and HAP");
    }
Ian Longden's avatar
Ian Longden committed
393
    if ( !($asx->alternate_slice->isa('Bio::EnsEMBL::Slice') or $asx->alternate_slice->isa('Bio::EnsEMBL::LRGSlice')) ){
394 395 396 397 398 399 400 401 402 403 404 405 406
	throw("Alternate slice should be a Bio::EnsEMBL::Slice");
    }
    #now check the other Assembly exception feature, the one pointing to the REF
    # region
    if (!$asx2->isa('Bio::EnsEMBL::AssemblyExceptionFeature')){
	throw("$asx2 is not a Ensembl assemlby exception -- not stored");
    }
     if (! $asx2->start || ! $asx2->end ){
	throw("Assembly exception does not have coordinates");
    }
    if ($asx2->type !~ /HAP REF|PAR REF/){
	throw("$asx2 should have  type of assembly exception features HAP REF or PAR REF");
    }
Ian Longden's avatar
Ian Longden committed
407 408
    if (! ($asx2->alternate_slice->isa('Bio::EnsEMBL::Slice') or $asx2->alternate_slice->isa('Bio::EnsEMBL::LRGSlice'))){
      throw("Alternate slice should be a Bio::EnsEMBL::Slice");
409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490
    }
    #finally check that both features are pointing to each other slice
    if ($asx->slice != $asx2->alternate_slice || $asx->alternate_slice != $asx2->slice){
	throw("Slice and alternate slice in both features are not pointing to each other");
    }
    #prepare the SQL
    my $asx_sql = q{
	INSERT INTO assembly_exception( seq_region_id, seq_region_start,
					seq_region_end, 
					exc_type, exc_seq_region_id,
					exc_seq_region_start, exc_seq_region_end,
					ori)
	    VALUES (?, ?, ?, ?, ?, ?, ?, 1)
	};

    my $asx_st = $self->prepare($asx_sql);
    my $asx_id = undef;
    my $asx_seq_region_id;
    my $asx2_seq_region_id;
    my $original = $asx;
    my $original2 = $asx2;
    #check all feature information
    ($asx, $asx_seq_region_id) = $self->_pre_store($asx);
    ($asx2, $asx2_seq_region_id) = $self->_pre_store($asx2);

    #and store it
    $asx_st->bind_param(1, $asx_seq_region_id, SQL_INTEGER);
    $asx_st->bind_param(2, $asx->start(), SQL_INTEGER);
    $asx_st->bind_param(3, $asx->end(), SQL_INTEGER);
    $asx_st->bind_param(4, $asx->type(), SQL_VARCHAR);
    $asx_st->bind_param(5, $asx2_seq_region_id, SQL_INTEGER);
    $asx_st->bind_param(6, $asx2->start(), SQL_INTEGER);
    $asx_st->bind_param(7, $asx2->end(), SQL_INTEGER);

    $asx_st->execute();
    $asx_id = $asx_st->{'mysql_insertid'};

    #finally, update the dbID and adaptor of the asx and asx2
    $original->adaptor($self);
    $original->dbID($asx_id);
    $original2->adaptor($self);
    $original2->dbID($asx_id);
    #and finally update dbID cache with new assembly exception
    $self->{'_aexc_dbID_cache'}->{$asx_id} = $original;
    #and update the other caches as well
    push @{$self->{'_aexc_slice_cache'}->{uc($asx->slice->name)}},$original, $original2;
    push @{$self->{'_aexc_cache'}}, $original, $original2;
    
    return $asx_id;
}

#
# Helper function containing some common feature storing functionality
#
# Given a Feature this will return a copy (or the same feature if no changes 
# to the feature are needed) of the feature which is relative to the start
# of the seq_region it is on. The seq_region_id of the seq_region it is on
# is also returned.
#
# This method will also ensure that the database knows which coordinate
# systems that this feature is stored in.
# Since this adaptor doesn't inherit from BaseFeatureAdaptor, we need to copy
# the code
#

sub _pre_store {
  my $self    = shift;
  my $feature = shift;

  if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
    throw('Expected Feature argument.');
  }

  $self->_check_start_end_strand($feature->start(),$feature->end(),
                                 $feature->strand());


  my $db = $self->db();

  my $slice_adaptor = $db->get_SliceAdaptor();
  my $slice = $feature->slice();

Ian Longden's avatar
Ian Longden committed
491
  if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))  ) {
492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
    throw('Feature must be attached to Slice to be stored.');
  }

  # make sure feature coords are relative to start of entire seq_region

  if($slice->start != 1 || $slice->strand != 1) {
    #move feature onto a slice of the entire seq_region
    $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
                                             $slice->seq_region_name(),
                                             undef, #start
                                             undef, #end
                                             undef, #strand
                                             $slice->coord_system->version());

    $feature = $feature->transfer($slice);

    if(!$feature) {
      throw('Could not transfer Feature to slice of ' .
            'entire seq_region prior to storing');
    }
  }

  # Ensure this type of feature is known to be stored in this coord system.
  my $cs = $slice->coord_system;

   my $mcc = $db->get_MetaCoordContainer();

  $mcc->add_feature_type($cs, 'assembly_exception', $feature->length);

  my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);

  if(!$seq_region_id) {
    throw('Feature is associated with seq_region which is not in this DB.');
  }

  return ($feature, $seq_region_id);
}

#
# helper function used to validate start/end/strand and 
# hstart/hend/hstrand etc.
#
sub _check_start_end_strand {
  my $self = shift;
  my $start = shift;
  my $end   = shift;
  my $strand = shift;

  #
  # Make sure that the start, end, strand are valid
  #
  if(int($start) != $start) {
    throw("Invalid Feature start [$start].  Must be integer.");
  }
  if(int($end) != $end) {
    throw("Invalid Feature end [$end]. Must be integer.");
  }
  if(int($strand) != $strand || $strand < -1 || $strand > 1) {
    throw("Invalid Feature strand [$strand]. Must be -1, 0 or 1.");
  }
  if($end < $start) {
    throw("Invalid Feature start/end [$start/$end]. Start must be less " .
          "than or equal to end.");
  }

  return 1;
}

=head2 remove

  Arg [1]    : $asx Bio::EnsEMBL::AssemblyFeatureException 
  Example    : $asx_adaptor->remove($asx);
  Description: This removes a assembly exception feature from the database.  
  Returntype : none
  Exceptions : thrown if $asx arg does not implement dbID(), or if
               $asx->dbID is not a true value
  Caller     : general
  Status     : Stable

=cut

#again, this method is generic in BaseFeatureAdaptor, but since this class
#is not inheriting, need to copy&paste
sub remove {
  my ($self, $feature) = @_;
577

578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
  if(!$feature || !ref($feature) || !$feature->isa('Bio::EnsEMBL::AssemblyExceptionFeature')) {
    throw('AssemblyExceptionFeature argument is required');
  }

  if(!$feature->is_stored($self->db)) {
    throw("This feature is not stored in this database");
  }

  my $asx_id = $feature->dbID();
  my $key = uc($feature->slice->name);
  my $sth = $self->prepare("DELETE FROM assembly_exception WHERE assembly_exception_id = ?");
  $sth->bind_param(1,$feature->dbID,SQL_INTEGER);
  $sth->execute();
  
  #and clear cache
  #and finally update dbID cache
  delete $self->{'_aexc_dbID_cache'}->{$asx_id};
  #and remove from cache feature
  my @features;
  foreach my $asx (@{$self->{'_aexc_slice_cache'}->{$key}}){
      if ($asx->dbID != $asx_id){
	  push @features, $asx;
      }
  }
  $self->{'_aexc_slice_cache'}->{$key} = \@features;
  @features = ();
  foreach my $asx (@{$self->{'_aexc_cache'}}){
      if ($asx->dbID != $asx_id){
	  push @features, $asx;
      }
  }
  $self->{'_aexc_cache'} = \@features;

#unset the feature dbID ad adaptor
  $feature->dbID(undef);
  $feature->adaptor(undef);

  return;
}
617 618


619
1;