BaseAdaptor.pm 33.4 KB
Newer Older
1
=head1 LICENSE
Ewan Birney's avatar
Ewan Birney committed
2

3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
Copyright [1999-2013] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=cut
Ewan Birney's avatar
Ewan Birney committed
18

19 20 21 22 23


=head1 CONTACT

  Please email comments or questions to the public Ensembl
Magali Ruffier's avatar
Magali Ruffier committed
24
  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 26

  Questions may also be sent to the Ensembl help desk at
Magali Ruffier's avatar
Magali Ruffier committed
27
  <http://www.ensembl.org/Help/Contact>.
28 29

=cut
Ewan Birney's avatar
Ewan Birney committed
30 31 32 33 34 35 36

=head1 NAME

Bio::EnsEMBL::DBSQL::BaseAdaptor - Base Adaptor for DBSQL adaptors

=head1 SYNOPSIS

37 38 39 40
  # base adaptor provides

  # SQL prepare function
  $adaptor->prepare("sql statement");
Ewan Birney's avatar
Ewan Birney committed
41

42 43
  # get of root DBAdaptor object
  $adaptor->db();
Ewan Birney's avatar
Ewan Birney committed
44

45
  # constructor, ok for inheritence
46
  $adaptor = Bio::EnsEMBL::DBSQL::SubClassOfBaseAdaptor->new($dbobj);
Ewan Birney's avatar
Ewan Birney committed
47 48 49 50

=head1 DESCRIPTION

This is a true base class for Adaptors in the Ensembl DBSQL
51
system.
Ewan Birney's avatar
Ewan Birney committed
52

53 54
Adaptors are expected to have the following functions

55
  $obj = $adaptor->fetch_by_dbID($internal_id);
56 57

which builds the object from the primary key of the object. This
58 59 60 61
function is crucial because it allows adaptors to collaborate relatively
independently of each other - in other words, we can change the schema
under one adaptor without too many knock on changes through the other
adaptors.
62 63 64

Most adaptors will also have

65
  $dbid = $adaptor->store($obj);
66 67 68 69

which stores the object. Currently the storing of an object also causes
the objects to set

70
  $obj->dbID();
71 72 73 74 75

correctly and attach the adaptor.

Other fetch functions go by the convention of

76
  @object_array = @{ $adaptor->fetch_all_by_XXXX($arguments_for_XXXX) };
77

78 79
sometimes it returns an array ref denoted by the 'all' in the name of
the method, sometimes an individual object. For example
80

81
  $gene = $gene_adaptor->fetch_by_stable_id($stable_id);
82 83 84

or

85
  @fp = @{ $simple_feature_adaptor->fetch_all_by_Slice($slice) };
86

87 88
Occassionally adaptors need to provide access to lists of ids. In this
case the convention is to go list_XXXX, such as
89

90
  @gene_ids = @{ $gene_adaptor->list_geneIds() };
Graham McVicker's avatar
Graham McVicker committed
91 92

(note: this method is poorly named)
93

94
=head1 METHODS
Ewan Birney's avatar
Ewan Birney committed
95 96 97 98

=cut

package Bio::EnsEMBL::DBSQL::BaseAdaptor;
99 100
require Exporter;
use vars qw(@ISA @EXPORT);
Ewan Birney's avatar
Ewan Birney committed
101 102
use strict;

103
use Bio::EnsEMBL::Utils::Exception qw(throw);
104
use Bio::EnsEMBL::Utils::Scalar qw(assert_ref assert_integer wrap_array);
105
use DBI qw(:sql_types);
106
use Data::Dumper;
107
use Scalar::Util qw/looks_like_number/;
Ewan Birney's avatar
Ewan Birney committed
108

109 110
@ISA = qw(Exporter);
@EXPORT = (@{$DBI::EXPORT_TAGS{'sql_types'}});
111 112 113 114 115 116 117 118 119 120 121 122

=head2 new

  Arg [1]    : Bio::EnsEMBL::DBSQL::DBConnection $dbobj
  Example    : $adaptor = new AdaptorInheritedFromBaseAdaptor($dbobj);
  Description: Creates a new BaseAdaptor object.  The intent is that this
               constructor would be called by an inherited superclass either
               automatically or through $self->SUPER::new in an overridden 
               new method.
  Returntype : Bio::EnsEMBL::DBSQL::BaseAdaptor
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBSQL::DBConnection
123
  Status     : Stable
124 125 126

=cut

Ewan Birney's avatar
Ewan Birney committed
127
sub new {
128 129 130 131 132
  my ( $class, $dbobj ) = @_;

  my $self = bless {}, $class;

  if ( !defined $dbobj || !ref $dbobj ) {
133 134
    throw("Don't have a db [$dbobj] for new adaptor");
  }
135 136

  if ( $dbobj->isa('Bio::EnsEMBL::DBSQL::DBAdaptor') ) {
137
    $self->db($dbobj);
138 139
    $self->dbc( $dbobj->dbc );
    $self->species_id( $dbobj->species_id() );
140
    $self->is_multispecies( $dbobj->is_multispecies() );
141
  } elsif ( ref($dbobj) =~ /DBAdaptor$/ ) {
Ian Longden's avatar
Ian Longden committed
142
    $self->db($dbobj);
143 144 145 146
    $self->dbc( $dbobj->dbc );
  } elsif ( ref($dbobj) =~ /DBConnection$/ ) {
    $self->dbc($dbobj);
  } else {
Ian Longden's avatar
Ian Longden committed
147 148
    throw("Don't have a DBAdaptor [$dbobj] for new adaptor");
  }
149

150
  return $self;
Ewan Birney's avatar
Ewan Birney committed
151 152 153
}


154
=head2 prepare
Ewan Birney's avatar
Ewan Birney committed
155

156 157 158 159 160 161 162 163 164
  Arg [1]    : string $string
               a SQL query to be prepared by this adaptors database
  Example    : $sth = $adaptor->prepare("select yadda from blabla")
  Description: provides a DBI statement handle from the adaptor. A convenience
               function so you dont have to write $adaptor->db->prepare all the
               time
  Returntype : DBI::StatementHandle
  Exceptions : none
  Caller     : Adaptors inherited from BaseAdaptor
165
  Status     : Stable
Ewan Birney's avatar
Ewan Birney committed
166 167 168

=cut

169 170 171 172 173 174
sub prepare {
  my ( $self, $string ) = @_;

  # Uncomment next line to cancel caching on the SQL side.
  # Needed for timing comparisons etc.
  #$string =~ s/SELECT/SELECT SQL_NO_CACHE/i;
Ewan Birney's avatar
Ewan Birney committed
175

176
  return $self->dbc->prepare($string);
Ewan Birney's avatar
Ewan Birney committed
177 178 179 180 181
}


=head2 db

Ian Longden's avatar
Ian Longden committed
182
  Arg [1]    : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $obj 
183 184 185 186
               the database this adaptor is using.
  Example    : $db = $adaptor->db();
  Description: Getter/Setter for the DatabaseConnection that this adaptor is 
               using.
Ian Longden's avatar
Ian Longden committed
187
  Returntype : Bio::EnsEMBL::DBSQL::DBAdaptor
188
  Exceptions : none
189
  Caller     : Adaptors inherited from BaseAdaptor
190
  Status     : Stable
Ewan Birney's avatar
Ewan Birney committed
191 192 193

=cut

194 195
sub db {
  my ( $self, $value ) = @_;
Ian Longden's avatar
Ian Longden committed
196

197 198 199
  if ( defined($value) ) {
    $self->{'db'} = $value;
  }
Ewan Birney's avatar
Ewan Birney committed
200

201
  return $self->{'db'};
Ewan Birney's avatar
Ewan Birney committed
202 203
}

Ian Longden's avatar
Ian Longden committed
204 205 206 207 208 209 210 211 212
=head2 dbc

  Arg [1]    : (optional) Bio::EnsEMBL::DBSQL::DBConnection $obj 
               the database this adaptor is using.
  Example    : $db = $adaptor->db();
  Description: Getter/Setter for the DatabaseConnection that this adaptor is 
               using.
  Returntype : Bio::EnsEMBL::DBSQL::DBConnection
  Exceptions : none
213
  Caller     : Adaptors inherited from BaseAdaptor
214
  Status     : Stable
Ian Longden's avatar
Ian Longden committed
215 216 217

=cut

218 219 220 221 222 223
sub dbc {
  my ( $self, $value ) = @_;

  if ( defined($value) ) {
    $self->{'dbc'} = $value;
  }
Ian Longden's avatar
Ian Longden committed
224 225 226

  return $self->{'dbc'};
}
Ewan Birney's avatar
Ewan Birney committed
227

228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
=head2 is_multispecies

  Arg [1]    : (optional) boolean $arg
  Example    : if ($adaptor->is_multispecies()) { }
  Description: Getter/Setter for the is_multispecies boolean of
               to use for this adaptor.
  Returntype : boolean
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub is_multispecies {
  my ( $self, $arg ) = @_;

  if ( defined($arg) ) {
    $self->{_is_multispecies} = $arg;
  }

  return $self->{_is_multispecies};
}

251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
=head2 species_id

  Arg [1]    : (optional) int $species_id
               The internal ID of the species in a multi-species database.
  Example    : $db = $adaptor->db();
  Description: Getter/Setter for the internal ID of the species in a
               multi-species database.  The default species ID is 1.
  Returntype : Integer
  Exceptions : none
  Caller     : Adaptors inherited from BaseAdaptor
  Status     : Stable

=cut

sub species_id {
  my ( $self, $value ) = @_;

  if ( defined($value) ) {
    $self->{'species_id'} = $value;
  }

  return $self->{'species_id'} || 1;
}

275

276 277 278 279 280
# list primary keys for a particular table
# args are table name and primary key field
# if primary key field is not supplied, tablename_id is assumed
# returns listref of IDs
sub _list_dbIDs {
281
  my ( $self, $table, $pk, $ordered ) = @_;
282

283
  if ( !defined($pk) ) { $pk = $table . "_id" }
284

285
  my $sql = sprintf( "SELECT `%s` FROM `%s`", $pk, $table );
286

287 288 289 290 291
  my $join_with_cs = 0;
  if (    $self->is_multispecies()
       && $self->isa('Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor')
       && !$self->isa('Bio::EnsEMBL::DBSQL::UnmappedObjectAdaptor') )
  {
Monika Komorowska's avatar
Monika Komorowska committed
292
    
293 294 295 296 297 298 299 300 301
    $sql .= q(
JOIN seq_region USING (seq_region_id)
JOIN coord_system cs USING (coord_system_id)
WHERE cs.species_id = ?
);

    $join_with_cs = 1;
  }

302
  if ( defined($ordered) && $ordered ) {
303
    $sql .= " ORDER BY seq_region_id, seq_region_start";
304 305
  }

306
  my $sth = $self->prepare($sql);
307

308
  if ($join_with_cs) {
309
    $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
310 311
  }

312 313 314 315
  eval { $sth->execute() };
  if ($@) {
    throw("Detected an error whilst executing SQL '${sql}': $@");
  }
316

317 318
  my $id;
  $sth->bind_col( 1, \$id );
319

320 321 322 323
  my @out;
  while ( $sth->fetch() ) {
    push( @out, $id );
  }
324 325

  return \@out;
326
} ## end sub _list_dbIDs
327

328

329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
# _straight_join

#   Arg [1]    : (optional) boolean $new_val
#   Example    : $self->_straight_join(1);
#                $self->generic_fetch($constraint);
#                $self->_straight_join(0);
#   Description: PROTECTED Getter/Setter that turns on/off the use of 
#                a straight join in queries.
#   Returntype : boolean
#   Exceptions : none
#   Caller     : general

sub _straight_join {
  my $self = shift;
  if(@_) {
    $self->{'_straight_join'} = shift;
  }

  return $self->{'_straight_join'};
}


351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
=head2 bind_param_generic_fetch

 Arg [1]   : (optional)  scalar $param
              This is the parameter to bind
 Arg [2]   : (optional) int $sql_type
              Type of the parameter (from DBI (:sql_types))
 Example   :  $adaptor->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
              $adaptor->generic_fetch();
 Description:  When using parameters for the query, will call the bind_param to avoid
               some security issues. If there are no arguments, will return the bind_parameters
 ReturnType : listref
 Exceptions:  if called with one argument

=cut

sub bind_param_generic_fetch{
    my $self = shift;
    my $param = shift;
    my $sql_type = shift;

    if (defined $param && !defined $sql_type){
	throw("Need to specify sql_type for parameter $param\n");
    }
    elsif (defined $param && defined $sql_type){
375 376 377 378
	#check when there is a SQL_INTEGER type that the parameter is really a number
	if ($sql_type eq SQL_INTEGER){
	    throw "Trying to assign a non numerical parameter to an integer value in the database" if ($param !~ /^\d+$/);
	}
379 380 381 382 383 384 385 386 387 388
	#both paramters have been entered, push it to the bind_param array
	push @{$self->{'_bind_param_generic_fetch'}},[$param,$sql_type];
    }
    elsif (!defined $param && !defined $sql_type){
	#when there are no arguments, return the array
	return $self->{'_bind_param_generic_fetch'};
    }
	
}

389 390 391 392 393 394 395
# Used to reset the params without circumventing scope
sub _bind_param_generic_fetch {
  my ($self, $_bind_param_generic_fetch) = @_;
  $self->{'_bind_param_generic_fetch'} = $_bind_param_generic_fetch if $_bind_param_generic_fetch;
  return $self->{_bind_param_generic_fetch};
}

396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
=head2 generate_in_constraint
  
  Arg [1]     : ArrayRef or Scalar $list
                List or a single value of items to be pushed into an IN statement
  Arg [2]     : Scalar $column
                Column this IN statement is being applied to. Please fully resolve the
                column.
  Arg [3]     : Scalar $param_type
                Data type which should be used when binding. Please use DBI data type symbols
  Arg [4]     : Scalar boolean $inline_variables
                Boolean to control if variables are inlined in the constraint. If
                false values are bound via bind_param_generic_fetch() (the default behaviour).

  Description : Used internally to generate a SQL constraint to restrict a query by an IN statement.
                The code generates the complete IN statement.
  Returntype  : String
  Exceptions  : If no list is supplied, the list of values is empty or no data type was given
  Caller      : general
414

415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445
=cut

sub generate_in_constraint {
  my ($self, $list, $column, $param_type, $inline_variables) = @_;
  throw("A list of values must be given") if ! defined $list;
  $list = wrap_array($list); # homogenise into an array
  throw "We should be given at least one value to insert" if scalar(@{$list}) == 0;
  throw "Please supply the DBI param type" if ! defined $param_type;
  #Figure out if we need to quote our values if we are asked to inline the variables
  my $quote_values = 1;
  if($param_type == SQL_INTEGER || $param_type == SQL_TINYINT || $param_type == SQL_DOUBLE ) {
    $quote_values = 0;
  }

  my $constraint = qq{${column} IN (};
  if($inline_variables) {
    if($quote_values) {
      $constraint .= join(q{,}, map { qq{"${_}"} } @{$list});  
    }
    else {
      $constraint .= join(q{,}, @{$list});  
    }
  }
  else {
    my @subs = ('?') x scalar(@{$list});
    $constraint .= join(q{,}, @subs);
    $self->bind_param_generic_fetch($_, $param_type) for @{$list};
  }
  $constraint .= q{)};
  return $constraint;
}
446

447 448 449 450 451 452 453 454 455
=head2 generic_fetch

  Arg [1]    : (optional) string $constraint
               An SQL query constraint (i.e. part of the WHERE clause)
  Arg [2]    : (optional) Bio::EnsEMBL::AssemblyMapper $mapper
               A mapper object used to remap features
               as they are retrieved from the database
  Arg [3]    : (optional) Bio::EnsEMBL::Slice $slice
               A slice that features should be remapped to
456
  Example    : $fts = $a->generic_fetch('contig_id in (1234, 1235)');
457 458 459
  Description: Performs a database fetch and returns feature objects in
               contig coordinates.
  Returntype : listref of Bio::EnsEMBL::SeqFeature in contig coordinates
460
  Exceptions : Thrown if there is an issue with querying the data
461
  Caller     : BaseFeatureAdaptor, ProxyDnaAlignFeatureAdaptor::generic_fetch
462
  Status     : Stable
463 464 465 466 467

=cut

sub generic_fetch {
  my ($self, $constraint, $mapper, $slice) = @_;
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
  my $sql = $self->_generate_sql($constraint);
  my $params = $self->bind_param_generic_fetch();
  $params ||= [];
  $self->{_bind_param_generic_fetch} = undef;
  my $sth = $self->db()->dbc()->prepare($sql);
  my $i = 1;
  foreach my $param (@{$params}){
    $sth->bind_param($i,$param->[0],$param->[1]);
    $i++;
  }
  eval { $sth->execute() };
  if ($@) {
    throw("Detected an error whilst executing SQL '${sql}': $@");
  }

  my $res = $self->_objs_from_sth($sth, $mapper, $slice);
  $sth->finish();
  return $res;
}

=head2 generic_count
489

490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513
  Arg [1]    : (optional) string $constraint
               An SQL query constraint (i.e. part of the WHERE clause)
  Example    : $number_feats = $a->generic_count('contig_id in (1234, 1235)');
  Description: Performs a database fetch and returns a count of those features
               found. This is analagous to C<generic_fetch()>
  Returntype : Integer count of the elements.
  Exceptions : Thrown if there is an issue with querying the data

=cut

sub generic_count {
  my ($self, $constraint) = @_;
  my $sql = $self->_generate_sql($constraint, 'count(*)');
  my $params = $self->bind_param_generic_fetch();
  $params ||= [];
  $self->{_bind_param_generic_fetch} = undef;
  my $h = $self->db()->dbc()->sql_helper();
  my $count = $h->execute_single_result(-SQL => $sql, -PARAMS => $params);
  return $count;
}

sub _generate_sql {
  my ($self, $constraint, @input_columns) = @_;
  
514 515 516 517 518 519
  my @tabs = $self->_tables();

  my $extra_default_where;

  # Hack for feature types that needs to be restricted to species_id (in
  # coord_system).
520 521
  if (    $self->is_multispecies()
       && $self->isa('Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor')
522 523
       && !$self->isa('Bio::EnsEMBL::DBSQL::UnmappedObjectAdaptor') )
  {
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
    # We do a check to see if there is already seq_region
    # and coord_system defined to ensure we get the right
    # alias.  We then do the extra query irrespectively of
    # what has already been specified by the user.
    my %thash = map { $_->[0] => $_->[1] } @tabs;

    my $sr_alias =
      ( exists( $thash{seq_region} ) ? $thash{seq_region} : 'sr' );
    my $cs_alias =
      ( exists( $thash{coord_system} ) ? $thash{coord_system} : 'cs' );

    if ( !exists( $thash{seq_region} ) ) {
      push( @tabs, [ 'seq_region', $sr_alias ] );
    }
    if ( !exists( $thash{coord_system} ) ) {
      push( @tabs, [ 'coord_system', $cs_alias ] );
    }
541 542

    $extra_default_where = sprintf(
543 544 545 546 547
                      '%s.seq_region_id = %s.seq_region_id '
                        . 'AND %s.coord_system_id = %s.coord_system_id '
                        . 'AND %s.species_id = ?',
                      $tabs[0]->[1], $sr_alias, $sr_alias,
                      $cs_alias,     $cs_alias );
548 549

    $self->bind_param_generic_fetch( $self->species_id(), SQL_INTEGER );
550
  } ## end if ( $self->is_multispecies...)
551

552 553
  @input_columns = $self->_columns() if ! @input_columns;
  my $columns = join(', ', @input_columns);
554 555 556 557 558 559

  #
  # Construct a left join statement if one was defined, and remove the
  # left-joined table from the table list
  #
  my @left_join_list = $self->_left_join();
560
  my $left_join_prefix = '';
561 562 563 564 565
  my $left_join = '';
  my @tables;
  if(@left_join_list) {
    my %left_join_hash = map { $_->[0] => $_->[1] } @left_join_list;
    while(my $t = shift @tabs) {
566 567
        my $t_alias = $t->[0] . " " . $t->[1];
      if( exists $left_join_hash{ $t->[0] } || exists $left_join_hash{$t_alias}) {
568
        my $condition = $left_join_hash{ $t->[0] };
569
        $condition ||= $left_join_hash{$t_alias};
570
        my $syn = $t->[1];
571 572 573
        $left_join .=
          "\n  LEFT JOIN " . $t->[0] . " $syn ON $condition ) ";
        $left_join_prefix .= '(';
574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
      } else {
        push @tables, $t;
      }
    }
  } else {
    @tables = @tabs;
  }

  my $straight_join = '';

  if($self->_straight_join()) {
    $straight_join = "STRAIGHT_JOIN";
  }

  #construct a nice table string like 'table1 t1, table2 t2'
  my $tablenames = join(', ', map({ join(' ', @$_) } @tables));

591
  my $sql =
592
      "SELECT $straight_join $columns \n"
593
    . "FROM $left_join_prefix ($tablenames) $left_join";
594

595
  my $default_where = $self->_default_where_clause();
596 597
  my $final_clause = $self->_final_clause;

598 599 600 601 602 603 604 605
  if ($extra_default_where) {
    if ($default_where) {
      $default_where .= "\n AND $extra_default_where";
    } else {
      $default_where = $extra_default_where;
    }
  }

606
  #append a where clause if it was defined
607
  if ($constraint) {
608
    $sql .= "\n WHERE $constraint ";
609
    if ($default_where) {
610 611
      $sql .= " AND\n       $default_where ";
    }
612
  } elsif ($default_where) {
613 614 615 616 617
    $sql .= "\n WHERE $default_where ";
  }

  #append additional clauses which may have been defined
  $sql .= "\n$final_clause";
618
  
619
  # FOR DEBUG:
620 621
  #printf(STDERR "SQL:\n%s\n", $sql);
  
622
  return $sql;
623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
}


=head2 fetch_by_dbID

  Arg [1]    : int $id
               The unique database identifier for the feature to be obtained
  Example    : $feat = $adaptor->fetch_by_dbID(1234));
               $feat = $feat->transform('contig');
  Description: Returns the feature created from the database defined by the
               the id $id.  The feature will be returned in its native
               coordinate system.  That is, the coordinate system in which it
               is stored in the database.  In order to convert it to a
               particular coordinate system use the transfer() or transform()
               method.  If the feature is not found in the database then
               undef is returned instead
  Returntype : Bio::EnsEMBL::Feature or undef
  Exceptions : thrown if $id arg is not provided
               does not exist
  Caller     : general
643
  Status     : Stable
644 645 646

=cut

647 648 649 650 651 652 653 654 655 656 657 658
sub fetch_by_dbID {
  my ($self, $id) = @_;
  if ($self->_no_id_cache()) {
    return $self->_uncached_fetch_by_dbID($id);
  }
  return $self->_id_cache()->get($id);
}

# The actual implmenetation moved sideways to allow for uncached access
# otherwise we'd constantly loop

sub _uncached_fetch_by_dbID{
659 660 661 662 663 664 665
  my ($self,$id) = @_;

  throw("id argument is required") if(!defined $id);

  #construct a constraint like 't1.table1_id = 123'
  my @tabs = $self->_tables;
  my ($name, $syn) = @{$tabs[0]};
666 667
  $self->bind_param_generic_fetch($id,SQL_INTEGER);
  my $constraint = "${syn}.${name}_id = ?";
668 669 670 671 672 673 674 675 676 677 678 679

  #Should only be one
  my ($feat) = @{$self->generic_fetch($constraint)};

  return undef if(!$feat);

  return $feat;
}


=head2 fetch_all_by_dbID_list

680 681 682
  Arg [1]    : listref of integers $id_list
               The unique database identifiers for the features to
               be obtained.
683
  Arg [2]    : optional - Bio::EnsEMBL::Slice to map features onto.
684
  Example    : @feats = @{$adaptor->fetch_all_by_dbID_list([1234, 2131, 982]))};
685 686 687 688 689 690 691 692 693 694
  Description: Returns the features created from the database
               defined by the the IDs in contained in the provided
               ID list $id_list.  The features will be returned
               in their native coordinate system.  That is, the
               coordinate system in which they are stored in the
               database.  In order to convert the features to a
               particular coordinate system use the transfer() or
               transform() method.  If none of the features are
               found in the database a reference to an empty list is
               returned.
695 696 697 698
  Returntype : listref of Bio::EnsEMBL::Features
  Exceptions : thrown if $id arg is not provided
               does not exist
  Caller     : general
699
  Status     : Stable
700 701 702 703

=cut

sub fetch_all_by_dbID_list {
704 705 706 707 708 709 710 711 712 713
  my ($self, $id_list_ref, $slice) = @_;
  if ($self->_no_id_cache()) {
    return $self->_uncached_fetch_all_by_dbID_list($id_list_ref, $slice);
  }
  return $self->_id_cache()->get_by_list($id_list_ref, $slice);
}

# The actual implmenetation moved sideways to allow for uncached access
# otherwise we'd constantly loop
sub _uncached_fetch_all_by_dbID_list {
714
  my ( $self, $id_list_ref, $slice ) = @_;
715
  return $self->_uncached_fetch_all_by_id_list($id_list_ref, $slice, "dbID", 1);
716 717 718 719 720 721 722 723 724 725 726
} ## end sub fetch_all_by_dbID_list

=head2 _uncached_fetch_all_by_id_list

  Arg [1]    : listref of IDs
  Arg [2]    : (optional) Bio::EnsEMBL::Slice $slice
               A slice that features should be remapped to
  Arg [3]    : String describing the ID type.
               Valid values include dbID and stable_id. dbID is an alias for
               the primary key, while other names map directly to table columns
               of the Feature this adaptor manages.
727 728 729 730 731 732 733 734 735 736
  Arg [4]    : Boolean $numeric
               Indicates if the incoming data is to be processed as a numeric
               or as a String. If arg [3] was set to dbID then we default this to
               be true. If arg [3] was set to stable_id then we default this to
               be false.
               When not using a standard arg[3] the IDs are assumed to be Strings.
  Arg [5]    : Integer $max_size
               Control the maximum number of IDs sent to a database in a single 
               query. Defaults to 2K for Strings and 16K for integers. Only
               provide if you know *exactly* why you need to edit it.
737 738 739
  Example    : $list_of_features = $adaptor->_uncached_fetch_all_by_id_list(
                   [qw(ENSG00000101321 ENSG00000101346 ENSG00000101367)],
                   undef,
740 741 742 743 744 745 746 747 748 749 750 751 752
                   "stable_id", 0); #using strings
               
               # Numeric set to true because we are using numerics
               $list_of_features = $adaptor->_uncached_fetch_all_by_id_list(
                   [1,2,3,4],
                   undef,
                   "dbID", 1);

               # Numeric defaults to true because we are querying using dbID
               $list_of_features = $adaptor->_uncached_fetch_all_by_id_list(
                   [1,2,3,4],
                   undef,
                   "dbID");
753 754 755
  Description: This is a generic method used to fetch lists of features by IDs.
               It avoids caches, meaning it is best suited for block fetching.
               See fetch_all_by_dbID_list() for more info.
756
  Returntype : ArrayRef of Bio::EnsEMBL::Feature
757 758 759 760 761 762
  Exceptions : Thrown if a list of IDs is not supplied.
  Caller     : BaseFeatureAdaptor, BaseAdaptor and derived classes.

=cut

sub _uncached_fetch_all_by_id_list {
763
    my ( $self, $id_list_ref, $slice, $id_type, $numeric, $max_size ) = @_;
764

765
  if ( !defined($id_list_ref) || ref($id_list_ref) ne 'ARRAY' ) {
766 767 768
    throw("id_list list reference argument is required");
  }

769
  if ( !@{$id_list_ref} ) { return [] }
770

771 772 773 774
  # Construct a constraint like 't1.table1_id = 123'
  my @tabs = $self->_tables();
  my ( $name, $syn ) = @{ $tabs[0] };

775 776 777 778
  # prepare column name for query. If the id_type was
  # set to dbID then we assume the column must be
  # tablename_id e.g. gene_id. Otherwise we assume the id_type
  # is the field/column name
779
  my $field_name;
780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818
  if($id_type eq 'dbID') {
    $field_name = $name.'_id';
    # If numeric was not set default it to 1 since this is an int
    $numeric = 1 if ! defined $numeric;
  }
  elsif($id_type eq 'stable_id') {
    # If numeric was not set default it to 0 since this is a string
    $numeric = 0 if ! defined $numeric; 
    $field_name = $id_type;
  }
  else {
    $field_name = $id_type;
  }

  my $sql_data_type;

  # Ensuring we do not exceed MySQL's max_allowed_packet (defaults to 1MB)
  # by splitting large queries into smaller queries of at most 256KB
  # (262,144 8-bit characters)
  # If we had a numeric then really we are talking about working with
  # integers. Normal max ensembl id size is 12 plus 2 characters for
  # commas in our IN statement comes to 14. Even bloating this to 16 gives
  # a max number of 16,384 IDs (262114/16).
  # 
  if($numeric) {
    my $first_id = $id_list_ref->[0];
    if(!looks_like_number($first_id)) {
      throw "You specified that we are looking for numerics but $first_id is not a numeric";
    }
    $max_size = 16384 if ! defined $max_size;
    $sql_data_type = SQL_INTEGER;
  }
  # However when dealing with Strings those can be very large (assuming
  # 128 is the max length of a stable ID). 128 is 8x smaller than our
  # previous max expected integer so we reduce the max ids by 8. This gives
  # 2048 IDs (16384/8)
  else {
    $max_size = 2048 if ! defined $max_size;
    $sql_data_type = SQL_VARCHAR;
819 820 821
  }
  
  # build up unique id list, also validate on the way by
822
  my %id_list;
823
  for (@{$id_list_ref}) {
824
    $id_list{$_}++;
825
  }
826
  my @id_list = keys %id_list;
827

828
  my @out;
829
  my $inline = 1;
830
  while (@id_list) {
831
    my @ids;
832 833 834 835
    my $id_str;

    if ( scalar(@id_list) > $max_size ) {
      @ids = splice( @id_list, 0, $max_size );
836 837
    } 
    else {
838 839
      @ids     = @id_list;
      @id_list = ();
840
    }
841 842
    # Push off to our IN statement constructor for this work
    my $constraint = $self->generate_in_constraint(\@ids, "${syn}.${field_name}", $sql_data_type, $inline);
843
    push @out, @{ $self->generic_fetch($constraint, undef, $slice) };
844 845 846
  }

  return \@out;
847
}
848

Arne Stabenau's avatar
Arne Stabenau committed
849 850
# might not be a good idea, but for convenience
# shouldnt be called on the BIG tables though
851

Arne Stabenau's avatar
Arne Stabenau committed
852
sub fetch_all {
853 854
  my $self = shift;
  return $self->generic_fetch();
Arne Stabenau's avatar
Arne Stabenau committed
855 856
}

857 858 859 860 861 862
=head2 last_insert_id

  Arg [1]     : (optional) $field the name of the field the inserted ID was pushed 
                into
  Arg [2]     : (optional) HashRef used to pass extra attributes through to the 
                DBD driver
863 864
  Arg [3]     : (optional) $table the name of the table to use if the adaptor
                does not implement C<_tables()>
865 866 867 868 869 870 871 872 873 874 875
  Description : Delegating method which uses DBI to extract the last inserted 
                identifier. If using MySQL we just call the DBI method 
                L<DBI::last_insert_id()> since MySQL ignores any extra
                arguments. See L<DBI> for more information about this 
                delegated method. 
  Example     : my $id = $self->last_insert_id('my_id'); my $other_id = $self->last_insert_id();
  Returntype  : Scalar or undef
  
=cut

sub last_insert_id {
876
  my ($self, $field, $attributes, $table) = @_;
877 878
  my $dbc = $self->dbc();
  my $dbh = $dbc->db_handle();
879
  my @args;
880
  if($dbc->driver() eq 'mysql') {
881
    @args = (undef,undef,undef,undef);
882
  }
883 884 885 886 887
  else {
    if(!$table) {
      ($table) = $self->_tables();
    }
    @args = (undef, $dbc->dbname(), $table->[0], $field);
888
  }
889 890
  $attributes ||= {};
  return $dbh->last_insert_id(@args, $attributes);
891 892
}

893 894 895 896 897 898 899
=head2 _id_cache

  Description : Used to return an instance of a support BaseCache module
                which can be used to speed up object access. The method
                also respects the DBAdaptor's no_cache() flag and will
                return undef in those situations
  Example     : my $cache = $self->_id_cache();
900
  Returntype  : Bio::EnsEMBL::DBSQL::Support::BaseCache
901 902 903 904 905
  
=cut

sub _id_cache {
  my ($self) = @_;
906
  return if $self->db()->no_cache() && !$self->ignore_cache_override;
907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
  if(! exists $self->{_id_cache}) {
    $self->{_id_cache} = $self->_build_id_cache();
  }
  return $self->{_id_cache};
}

=head2 _no_id_cache

  Description : Flags if the ID based caching is active or not. This could be
                due to the adaptor not wanting to cache or because of
                a global no_cache() flag on the DBAdaptor instance
  Returntype  : Boolean
  
=cut

sub _no_id_cache {
  my ($self) = @_;
  return 1 if ! $self->_id_cache();
  return 0;
}

928 929 930 931 932 933 934
=head2 ignore_cache_override

    Description : Method to interfere with no_cache directive from Registry on
                  a per adaptor basis. This method should be called after new()
                  in order to trigger the _build_id_cache at first query.                  
    Example     : $adaptor->ignore_cache_override(1);              
    Returntype  : Boolean
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
935

936 937 938 939 940 941 942 943
=cut

sub ignore_cache_override {
    my $self = shift;
    $self->{'_override'} = shift if(@_);
    unless (defined($self->{'_override'})) {return}
    return $self->{'_override'}; 
}
Arne Stabenau's avatar
Arne Stabenau committed
944

945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
=head2 schema_version

    Description : Returns the schema version of the currently connected
                  DBAdaptor. The subroutine also caches this value so
                  repeated calls continue to be speedy.                  
    Example     : $adaptor->schema_version();            
    Returntype  : Integer

=cut

sub schema_version {
  my ($self) = @_;
  return $self->{_schema_version} if exists $self->{_schema_version};
  my $mc = $self->db()->get_MetaContainer();
  return $self->{_schema_version} = $mc->get_schema_version();
}

962 963 964 965
#_tables
#
#  Args       : none
#  Example    : $tablename = $self->_table_name()
966 967 968 969 970 971
#  Description: ABSTRACT PROTECTED
#               Subclasses are responsible for implementing this
#               method.  It should list of [tablename, alias] pairs.
#               Additionally the primary table (with the dbID,
#               analysis_id, and score) should be the first table in
#               the list. e.g:
972 973 974 975 976 977 978 979 980
#               ( ['repeat_feature',   'rf'],
#                 ['repeat_consensus', 'rc']);
#               used to obtain features.  
#  Returntype : list of [tablename, alias] pairs
#  Exceptions : thrown if not implemented by subclass
#  Caller     : BaseFeatureAdaptor::generic_fetch
#

sub _tables {
981 982
  throw(   "abstract method _tables not defined "
         . "by implementing subclass of BaseAdaptor" );
983 984 985 986 987 988 989
}


#_columns
#
#  Args       : none
#  Example    : $tablename = $self->_columns()
990 991 992 993
#  Description: ABSTRACT PROTECTED
#               Subclasses are responsible for implementing this
#               method.  It should return a list of columns to be
#               used for feature creation.
994 995 996 997 998 999
#  Returntype : list of strings
#  Exceptions : thrown if not implemented by subclass
#  Caller     : BaseFeatureAdaptor::generic_fetch
#

sub _columns {
1000 1001
  throw(   "abstract method _columns not defined "
         . "by implementing subclass of BaseAdaptor" );
1002 1003 1004 1005 1006 1007 1008
}


# _default_where_clause
#
#  Arg [1]    : none
#  Example    : none
1009 1010 1011 1012
#  Description: May be overridden to provide an additional where
#               constraint to the SQL query which is generated to
#               fetch feature records.  This constraint is always
#               appended to the end of the generated where clause
1013 1014 1015 1016 1017
#  Returntype : string
#  Exceptions : none
#  Caller     : generic_fetch
#

1018
sub _default_where_clause { return '' }
1019 1020 1021 1022 1023 1024


# _left_join

#  Arg [1]    : none
#  Example    : none
1025 1026 1027 1028
#  Description: Can be overridden by a subclass to specify any left
#               joins which should occur.  The table name specigfied
#               in the join must still be present in the return
#               values of.
1029 1030 1031 1032 1033
#  Returntype : a {'tablename' => 'join condition'} pair
#  Exceptions : none
#  Caller     : general
#

1034
sub _left_join { return () }
1035 1036 1037 1038 1039 1040


#_final_clause

#  Arg [1]    : none
#  Example    : none
1041 1042 1043 1044
#  Description: May be overriden to provide an additional clause
#               to the end of the SQL query used to fetch feature
#               records.  This is useful to add a required ORDER BY
#               clause to the query for example.
1045 1046 1047 1048
#  Returntype : string
#  Exceptions : none
#  Caller     : generic_fetch

1049
sub _final_clause { return '' }
1050 1051 1052 1053 1054 1055 1056


#_objs_from_sth

#  Arg [1]    : DBI::row_hashref $hashref containing key-value pairs 
#               for each of the columns specified by the _columns method
#  Example    : my @feats = $self->_obj_from_hashref
1057 1058 1059 1060 1061
#  Description: ABSTRACT PROTECTED
#               The subclass is responsible for implementing this
#               method.  It should take in a DBI row hash reference
#               and return a list of created features in contig
#               coordinates.
1062 1063 1064 1065 1066
#  Returntype : list of Bio::EnsEMBL::*Features in contig coordinates
#  Exceptions : thrown if not implemented by subclass
#  Caller     : BaseFeatureAdaptor::generic_fetch

sub _objs_from_sth {
1067 1068
  throw(   "abstract method _objs_from_sth not defined "
         . "by implementing subclass of BaseAdaptor" );
1069 1070
}

1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084
#_build_id_cache

#  Example    : my $id_cache = $self->_build_id_cache
#  Description: ABSTRACT PROTECTED
#               The subclass is responsible for returning an instance
#               of the Bio::EnsEMBL::DBSQL::Support::BaseCache
#               which can be used to speed up ID based fetch operations
#  Returntype : Instance of Bio::EnsEMBL::DBSQL::Support::BaseCache
#  Exceptions : Could be thrown by the implementing sub-class 
#  Caller     : BaseAdaptor::_id_cache
sub _build_id_cache {
  return;
}

1085
sub dump_data {
Web Admin's avatar
Web Admin committed
1086 1087 1088 1089 1090 1091
  my $self = shift;
  my $data = shift;

  my $dumper = Data::Dumper->new([$data]);
  $dumper->Indent(0);
  $dumper->Terse(1);
1092 1093 1094
   my $dump = $dumper->Dump();
# $dump =~ s/'/\\'/g; 
 # $dump =~ s/^\$VAR1 = //;
Web Admin's avatar
Web Admin committed
1095
  return $dump;
1096 1097 1098 1099 1100 1101 1102
}

sub get_dumped_data {
    my $self = shift;
    my $data = shift;

    $data =~ s/\n|\r|\f|\\//g;
1103
    return eval ($data); ## no critic
1104 1105
}

1106

Graham McVicker's avatar
Graham McVicker committed
1107
1;