SimpleFeatureAdaptor.pm 12.6 KB
Newer Older
1
=head1 LICENSE
2

3
Copyright [1999-2013] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4

5 6 7 8 9 10 11 12 13 14 15 16 17
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=cut
18 19 20 21 22


=head1 CONTACT

  Please email comments or questions to the public Ensembl
Magali Ruffier's avatar
Magali Ruffier committed
23
  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
24 25

  Questions may also be sent to the Ensembl help desk at
Magali Ruffier's avatar
Magali Ruffier committed
26
  <http://www.ensembl.org/Help/Contact>.
27 28

=cut
29 30 31

=head1 NAME

32
Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor
33 34 35

=head1 SYNOPSIS

36
  my $registry = 'Bio::EnsEMBL::Registry';
37

38
  $registry->
39 40 41
    load_registry_from_db( ...

  my $sfa =
42
    $registry->get_adaptor('homo sapiens', 'core', 'SimpleFeature');
43 44 45 46 47 48 49

  print ref($sfa), "\n";

  my $sf_aref =
    $sfa->fetch_all;

  print scalar @$sf_aref, "\n";
50 51 52

=head1 DESCRIPTION

53
Simple Feature Adaptor - database access for simple features
54

55
=head1 METHODS
56 57 58 59 60 61 62

=cut

package Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor;
use vars qw(@ISA);
use strict;

63
use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
64
use Bio::EnsEMBL::SimpleFeature;
65
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
66

67
@ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor);
68 69


70 71
=head2 store

72
  Arg [1]    : list of Bio::EnsEMBL::SimpleFeatures @sf
Graham McVicker's avatar
Graham McVicker committed
73
               the simple features to store in the database
74
  Example    : $simple_feature_adaptor->store(@simple_feats);
Graham McVicker's avatar
Graham McVicker committed
75 76
  Description: Stores a list of simple feature objects in the database
  Returntype : none
77
  Exceptions : thrown if @sf is not defined, if any of the features do not
78
               have an attached slice.
79
               or if any elements of @sf are not Bio::EnsEMBL::SimpleFeatures 
Graham McVicker's avatar
Graham McVicker committed
80
  Caller     : general
81
  Status     : Stable
82 83 84 85

=cut

sub store{
86
  my ($self,@sf) = @_;
87

Graham McVicker's avatar
Graham McVicker committed
88
  if( scalar(@sf) == 0 ) {
89
    throw("Must call store with list of SimpleFeatures");
Graham McVicker's avatar
Graham McVicker committed
90
  }
91 92 93 94 95 96 97 98 99 100 101 102

  my $sth = $self->prepare
    ("INSERT INTO simple_feature (seq_region_id, seq_region_start, " .
                                 "seq_region_end, seq_region_strand, " .
                                 "display_label, analysis_id, score) " .
     "VALUES (?,?,?,?,?,?,?)");

  my $db = $self->db();
  my $analysis_adaptor = $db->get_AnalysisAdaptor();

 FEATURE: foreach my $sf ( @sf ) {

Graham McVicker's avatar
Graham McVicker committed
103
    if( !ref $sf || !$sf->isa("Bio::EnsEMBL::SimpleFeature") ) {
104 105
      throw("SimpleFeature must be an Ensembl SimpleFeature, " .
            "not a [".ref($sf)."]");
Graham McVicker's avatar
Graham McVicker committed
106
    }
107 108 109 110 111

    if($sf->is_stored($db)) {
      warning("SimpleFeature [".$sf->dbID."] is already stored" .
              " in this database.");
      next FEATURE;
Graham McVicker's avatar
Graham McVicker committed
112
    }
113 114 115

    if(!defined($sf->analysis)) {
      throw("An analysis must be attached to the features to be stored.");
Graham McVicker's avatar
Graham McVicker committed
116
    }
117 118 119 120

    #store the analysis if it has not been stored yet
    if(!$sf->analysis->is_stored($db)) {
      $analysis_adaptor->store($sf->analysis());
121 122
    }

123 124 125
    my $original = $sf;
    my $seq_region_id;
    ($sf, $seq_region_id) = $self->_pre_store($sf);
126

127 128 129 130 131 132 133 134 135
    $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
    $sth->bind_param(2,$sf->start,SQL_INTEGER);
    $sth->bind_param(3,$sf->end,SQL_INTEGER);
    $sth->bind_param(4,$sf->strand,SQL_TINYINT);
    $sth->bind_param(5,$sf->display_label,SQL_VARCHAR);
    $sth->bind_param(6,$sf->analysis->dbID,SQL_INTEGER);
    $sth->bind_param(7,$sf->score,SQL_DOUBLE);

    $sth->execute();
136 137 138 139

    $original->dbID($sth->{'mysql_insertid'});
    $original->adaptor($self);
  }
Graham McVicker's avatar
Graham McVicker committed
140
}
141 142


Kieron Taylor's avatar
Kieron Taylor committed
143
=head2 _tables
144

Graham McVicker's avatar
Graham McVicker committed
145 146 147
  Arg [1]    : none
  Example    : none
  Description: PROTECTED implementation of superclass abstract method
148 149
               returns the names, aliases of the tables to use for queries
  Returntype : list of listrefs of strings
Graham McVicker's avatar
Graham McVicker committed
150 151
  Exceptions : none
  Caller     : internal
152
  Status     : Stable
153

Graham McVicker's avatar
Graham McVicker committed
154
=cut
155

156
sub _tables {
157 158
  my $self = shift;
  
159
  return ['simple_feature', 'sf'];
160 161
}

Graham McVicker's avatar
Graham McVicker committed
162 163 164 165 166 167 168 169 170 171

=head2 _columns

  Arg [1]    : none
  Example    : none
  Description: PROTECTED implementation of superclass abstract method
               returns a list of columns to use for queries
  Returntype : list of strings
  Exceptions : none
  Caller     : internal
172
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
173 174 175

=cut

176 177 178
sub _columns {
  my $self = shift;

179 180 181
  return qw( sf.simple_feature_id
             sf.seq_region_id sf.seq_region_start sf.seq_region_end
             sf.seq_region_strand sf.display_label sf.analysis_id sf.score );
182 183
}

184

185
=head2 _objs_from_sth
Graham McVicker's avatar
Graham McVicker committed
186 187 188

  Arg [1]    : hash reference $hashref
  Example    : none
189 190
  Description: PROTECTED implementation of superclass abstract method.
               creates SimpleFeatures from an executed DBI statement handle.
Graham McVicker's avatar
Graham McVicker committed
191
  Returntype : list reference to Bio::EnsEMBL::SimpleFeature objects
Graham McVicker's avatar
Graham McVicker committed
192 193
  Exceptions : none
  Caller     : internal
194
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
195 196 197

=cut

198
sub _objs_from_sth {
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
  my ($self, $sth, $mapper, $dest_slice) = @_;

  #
  # This code is ugly because an attempt has been made to remove as many
  # function calls as possible for speed purposes.  Thus many caches and
  # a fair bit of gymnastics is used.
  #

  my $sa = $self->db()->get_SliceAdaptor();
  my $aa = $self->db->get_AnalysisAdaptor();

  my @features;
  my %analysis_hash;
  my %slice_hash;
  my %sr_name_hash;
  my %sr_cs_hash;
215 216
  
  
217 218 219 220 221 222
  my($simple_feature_id,$seq_region_id, $seq_region_start, $seq_region_end,
     $seq_region_strand, $display_label, $analysis_id, $score);

  $sth->bind_columns(\$simple_feature_id,\$seq_region_id, \$seq_region_start,
                     \$seq_region_end, \$seq_region_strand, \$display_label,
                     \$analysis_id, \$score);
223
  
224 225 226 227 228 229 230 231 232 233 234 235 236 237
  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }
238

239 240 241 242
  my $dest_slice_start;
  my $dest_slice_end;
  my $dest_slice_strand;
  my $dest_slice_length;
243
  my $dest_slice_sr_name;
244
  my $dest_slice_seq_region_id;
245 246 247 248 249
  if($dest_slice) {
    $dest_slice_start  = $dest_slice->start();
    $dest_slice_end    = $dest_slice->end();
    $dest_slice_strand = $dest_slice->strand();
    $dest_slice_length = $dest_slice->length();
250
    $dest_slice_sr_name = $dest_slice->seq_region_name();
251
    $dest_slice_seq_region_id =$dest_slice->get_seq_region_id();
252
  }
253

254
  my $count =0;
255
  FEATURE: while($sth->fetch()) {
256
      $count++;
257 258 259 260
    #get the analysis object
    my $analysis = $analysis_hash{$analysis_id} ||=
      $aa->fetch_by_dbID($analysis_id);

261 262
    #need to get the internal_seq_region, if present
    $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
263 264 265 266 267 268 269 270 271 272
    #get the slice object
    my $slice = $slice_hash{"ID:".$seq_region_id};

    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }

273 274
    my $sr_name = $sr_name_hash{$seq_region_id};
    my $sr_cs   = $sr_cs_hash{$seq_region_id};
275 276 277 278 279 280
    #
    # remap the feature coordinates to another coord system
    # if a mapper was provided
    #
    if($mapper) {

281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
      if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper')  ) {
	    ( $seq_region_id,  $seq_region_start,
	      $seq_region_end, $seq_region_strand )
		=
		$mapper->map( $sr_name, $seq_region_start, $seq_region_end,
                          $seq_region_strand, $sr_cs, 1, $dest_slice);

      } else {

	    ( $seq_region_id,  $seq_region_start,
	      $seq_region_end, $seq_region_strand )
		=
		$mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end,
                          $seq_region_strand, $sr_cs );
      }
296 297

      #skip features that map to gaps or coord system boundaries
298 299
      next FEATURE if(!defined($seq_region_id));
      
300 301
      #get a slice in the coord system we just mapped to
      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
302 303
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
304
      } else {
305 306
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
307 308 309 310 311 312 313 314
      }
    }

    #
    # If a destination slice was provided convert the coords
    # If the dest_slice starts at 1 and is foward strand, nothing needs doing
    #
    if($dest_slice) {
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417
      my $seq_region_len = $dest_slice->seq_region_length();

      if ($dest_slice_strand == 1) { # Positive strand
		
	$seq_region_start = $seq_region_start - $dest_slice_start + 1;
	$seq_region_end   = $seq_region_end - $dest_slice_start + 1;

	if ($dest_slice->is_circular()) {
	  # Handle cicular chromosomes.

	  if ($seq_region_start > $seq_region_end) {
	    # Looking at a feature overlapping the chromsome origin.

	    if ($seq_region_end > $dest_slice_start) {

	      # Looking at the region in the beginning of the
	      # chromosome.
	      $seq_region_start -= $seq_region_len;
	    }

	    if ($seq_region_end < 0) {
	      $seq_region_end += $seq_region_len;
	    }

	  } else {

	    if (   $dest_slice_start > $dest_slice_end
		   && $seq_region_end < 0) {
	      # Looking at the region overlapping the chromosome
	      # origin and a feature which is at the beginning of the
	      # chromosome.
	      $seq_region_start += $seq_region_len;
	      $seq_region_end   += $seq_region_len;
	    }
	  }

	}		       ## end if ($dest_slice->is_circular...)

      } else {			# Negative strand

	my $start = $dest_slice_end - $seq_region_end + 1;
	my $end = $dest_slice_end - $seq_region_start + 1;

	if ($dest_slice->is_circular()) {

	  if ($dest_slice_start > $dest_slice_end) { 
	    # slice spans origin or replication

	    if ($seq_region_start >= $dest_slice_start) {
	      $end += $seq_region_len;
	      $start += $seq_region_len 
		if $seq_region_end > $dest_slice_start;

	    } elsif ($seq_region_start <= $dest_slice_end) {
	      # do nothing
	    } elsif ($seq_region_end >= $dest_slice_start) {
	      $start += $seq_region_len;
	      $end += $seq_region_len;

	    } elsif ($seq_region_end <= $dest_slice_end) {

	      $end += $seq_region_len
		if $end < 0;

	    } elsif ($seq_region_start > $seq_region_end) {
		  
	      $end += $seq_region_len;

	    } else {
		  
	    }
      
	  } else {

	    if ($seq_region_start <= $dest_slice_end and $seq_region_end >= $dest_slice_start) {
	      # do nothing
	    } elsif ($seq_region_start > $seq_region_end) {
	      if ($seq_region_start <= $dest_slice_end) {
	  
		$start -= $seq_region_len;

	      } elsif ($seq_region_end >= $dest_slice_start) {
		$end += $seq_region_len;

	      } else {
		    
	      }
	    }
	  }

	}

	$seq_region_start = $start;
	$seq_region_end = $end;
	$seq_region_strand *= -1;

      }	## end else [ if ($dest_slice_strand...)]

      # Throw away features off the end of the requested slice or on
      # different seq_region.
      if (   $seq_region_end < 1
	     || $seq_region_start > $dest_slice_length
	     || ($dest_slice_seq_region_id ne $seq_region_id)) {
418
	next FEATURE;
419
      }
420

421
      $slice = $dest_slice;
422 423
    }

424 425 426 427 428 429 430 431 432 433 434 435 436 437
    push( @features,
          $self->_create_feature_fast(
                                    'Bio::EnsEMBL::SimpleFeature', {
                                      'start'    => $seq_region_start,
                                      'end'      => $seq_region_end,
                                      'strand'   => $seq_region_strand,
                                      'slice'    => $slice,
                                      'analysis' => $analysis,
                                      'adaptor'  => $self,
                                      'dbID'     => $simple_feature_id,
                                      'display_label' => $display_label,
                                      'score'         => $score
                                    } ) );

438
    }
439

440
  return \@features;
441 442
}

Glenn Proctor's avatar
Glenn Proctor committed
443 444 445 446 447 448

=head2 list_dbIDs

  Arg [1]    : none
  Example    : @feature_ids = @{$simple_feature_adaptor->list_dbIDs()};
  Description: Gets an array of internal ids for all simple features in the current db
449
  Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
Glenn Proctor's avatar
Glenn Proctor committed
450 451 452
  Returntype : list of ints
  Exceptions : none
  Caller     : ?
453
  Status     : Stable
Glenn Proctor's avatar
Glenn Proctor committed
454 455 456 457

=cut

sub list_dbIDs {
458
   my ($self, $ordered) = @_;
Glenn Proctor's avatar
Glenn Proctor committed
459

460
   return $self->_list_dbIDs("simple_feature", undef, $ordered);
Glenn Proctor's avatar
Glenn Proctor committed
461 462
}

463
1;