GeneAdaptor.pm 70.7 KB
Newer Older
1 2
=head1 LICENSE

3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
Copyright [1999-2013] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=cut
18 19 20 21 22 23



=head1 CONTACT

  Please email comments or questions to the public Ensembl
Magali Ruffier's avatar
Magali Ruffier committed
24
  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 26

  Questions may also be sent to the Ensembl help desk at
Magali Ruffier's avatar
Magali Ruffier committed
27
  <http://www.ensembl.org/Help/Contact>.
28 29

=cut
30 31

=head1 NAME
32 33
Bio::EnsEMBL::DBSQL::GeneAdaptor - Database adaptor for the retrieval and
storage of Gene objects
34 35 36

=head1 SYNOPSIS

37
  use Bio::EnsEMBL::Registry;
38

39
  Bio::EnsEMBL::Registry->load_registry_from_db(
40 41
    -host => 'ensembldb.ensembl.org',
    -user => 'anonymous',
42
  );
43

44 45
  $gene_adaptor =
    Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "gene" );
46 47 48 49 50

  $gene = $gene_adaptor->fetch_by_dbID(1234);

  $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000184129');

51
  @genes = @{ $gene_adaptor->fetch_all_by_external_name('BRCA2') };
52

53 54
  $slice_adaptor =
    Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "slice" );
55

56 57
  $slice =
    $slice_adaptor->fetch_by_region( 'chromosome', '1', 1, 1000000 );
58

59
  @genes = @{ $gene_adaptor->fetch_all_by_Slice($slice) };
60

61
=head1 DESCRIPTION
62

63 64
This is a database aware adaptor for the retrieval and storage of gene
objects.
65

66
=head1 METHODS
67

68
=cut
69

70 71
package Bio::EnsEMBL::DBSQL::GeneAdaptor;

72
use strict;
73

74
use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning );
75
use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
Graham McVicker's avatar
Graham McVicker committed
76
use Bio::EnsEMBL::DBSQL::SliceAdaptor;
77
use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
78
use Bio::EnsEMBL::DBSQL::DBAdaptor;
79
use Bio::EnsEMBL::Gene;
80

James Gilbert's avatar
 
James Gilbert committed
81
use vars '@ISA';
82
@ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor);
83

84 85
# _tables
#  Arg [1]    : none
86 87
#  Description: PROTECTED implementation of superclass abstract method.
#               Returns the names, aliases of the tables to use for queries.
88 89 90
#  Returntype : list of listrefs of strings
#  Exceptions : none
#  Caller     : internal
91
#  Status     : Stable
92

93
sub _tables {
94
  return (['gene', 'g'], ['xref', 'x'], ['external_db', 'exdb']);
95
}
96

97 98 99
# _columns
#  Arg [1]    : none
#  Example    : none
100 101
#  Description: PROTECTED implementation of superclass abstract method.
#               Returns a list of columns to use for queries.
102 103 104
#  Returntype : list of strings
#  Exceptions : none
#  Caller     : internal
105
#  Status     : Stable
106 107

sub _columns {
108 109
  my ($self) = @_;

110 111
  my $created_date  = $self->db()->dbc()->from_date_to_seconds("g.created_date");
  my $modified_date = $self->db()->dbc()->from_date_to_seconds("g.modified_date");
112

113
  return ('g.gene_id', 'g.seq_region_id', 'g.seq_region_start', 'g.seq_region_end', 'g.seq_region_strand', 'g.analysis_id', 'g.biotype', 'g.display_xref_id', 'g.description', 'g.status', 'g.source', 'g.is_current', 'g.canonical_transcript_id', 'g.stable_id', 'g.version', $created_date, $modified_date, 'x.display_label', 'x.dbprimary_acc', 'x.description', 'x.version', 'exdb.db_name', 'exdb.status', 'exdb.db_release', 'exdb.db_display_name', 'x.info_type', 'x.info_text');
114
}
115

116
sub _left_join {
117
  return (['xref', "x.xref_id = g.display_xref_id"], ['external_db', "exdb.external_db_id = x.external_db_id"]);
118
}
119

120
=head2 list_dbIDs
121

122 123
  Example    : @gene_ids = @{$gene_adaptor->list_dbIDs()};
  Description: Gets an array of internal ids for all genes in the current db
124
  Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
125
  Returntype : Listref of Ints
Graham McVicker's avatar
Graham McVicker committed
126
  Exceptions : none
127
  Caller     : general
128
  Status     : Stable
129 130 131

=cut

132
sub list_dbIDs {
133
  my ($self, $ordered) = @_;
134

135
  return $self->_list_dbIDs("gene", undef, $ordered);
136 137
}

138
=head2 list_stable_ids
139

140
  Example    : @stable_gene_ids = @{$gene_adaptor->list_stable_ids()};
141 142
  Description: Gets an listref of stable ids for all genes in the current db
  Returntype : reference to a list of strings
143
  Exceptions : none
144
  Caller     : general
145
  Status     : Stable
146

147
=cut
148

149
sub list_stable_ids {
150
  my ($self) = @_;
151

152
  return $self->_list_dbIDs("gene", "stable_id");
153 154
}

Patrick Meidl's avatar
Patrick Meidl committed
155 156 157 158 159 160
sub list_seq_region_ids {
  my $self = shift;

  return $self->_list_seq_region_ids('gene');
}

161 162
=head2 fetch_by_display_label

163 164 165
  Arg [1]    : String $label - display label of gene to fetch
  Example    : my $gene = $geneAdaptor->fetch_by_display_label("BRCA2");
  Description: Returns the gene which has the given display label or undef if
166 167 168
               there is none. If there are more than 1, the gene on the 
               reference slice is reported or if none are on the reference,
               the first one is reported.
169 170 171
  Returntype : Bio::EnsEMBL::Gene
  Exceptions : none
  Caller     : general
172
  Status     : Stable
173 174 175 176

=cut

sub fetch_by_display_label {
177
  my $self  = shift;
178 179
  my $label = shift;

180
  my $constraint = "x.display_label = ? AND g.is_current = 1";
181 182
  $self->bind_param_generic_fetch($label, SQL_VARCHAR);
  my @genes = @{$self->generic_fetch($constraint)};
183 184
  my $gene;
  if (scalar(@genes) > 1) {
185
	foreach my $gene_tmp (@genes) {
186
	  if ($gene_tmp->slice->is_reference) {
187
		$gene = $gene_tmp;
188 189
	  }
	  last if ($gene);
190 191
	}
	if (!$gene) {
192
	  $gene = $genes[0];
193
	}
194 195

  } elsif (scalar(@genes) == 1) {
196 197
	$gene = $genes[0];
  }
198

199
  return $gene;
200
} ## end sub fetch_by_display_label
201

202
=head2 fetch_all_by_display_label
203

204
  Arg [1]    : String $label - display label of genes to fetch
205
  Example    : my @genes = @{$geneAdaptor->fetch_all_by_display_label("PPP1R2P1")};
206 207 208 209 210 211 212 213 214 215
  Description: Returns all genes which have the given display label or undef if
               there are none. 
  Returntype : listref of Bio::EnsEMBL::Gene objects
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub fetch_all_by_display_label {
216
  my $self  = shift;
217 218 219
  my $label = shift;

  my $constraint = "x.display_label = ? AND g.is_current = 1";
220 221
  $self->bind_param_generic_fetch($label, SQL_VARCHAR);
  my $genes = $self->generic_fetch($constraint);
222 223 224

  return $genes;
}
225

226 227
=head2 fetch_by_stable_id

228 229
  Arg [1]    : String $id 
               The stable ID of the gene to retrieve
Graham McVicker's avatar
Graham McVicker committed
230
  Example    : $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000148944');
231 232
  Description: Retrieves a gene object from the database via its stable id.
               The gene will be retrieved in its native coordinate system (i.e.
233
               in the coordinate system it is stored in the database). It may
234
               be converted to a different coordinate system through a call to
235
               transform() or transfer(). If the gene or exon is not found
236
               undef is returned instead.
237
  Returntype : Bio::EnsEMBL::Gene or undef
238
  Exceptions : if we cant get the gene in given coord system
Graham McVicker's avatar
Graham McVicker committed
239
  Caller     : general
240
  Status     : Stable
241 242 243

=cut

244
sub fetch_by_stable_id {
245
  my ($self, $stable_id) = @_;
246

Monika Komorowska's avatar
Monika Komorowska committed
247
  my $constraint = "g.stable_id = ? AND g.is_current = 1";
248 249
  $self->bind_param_generic_fetch($stable_id, SQL_VARCHAR);
  my ($gene) = @{$self->generic_fetch($constraint)};
250

251 252
  return $gene;
}
253

254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
=head2 fetch_all_by_source

  Arg [1]    : String $source
               listref of $sources
               The source of the gene to retrieve. You can have as an argument a reference
               to a list of sources
  Example    : $genes = $gene_adaptor->fetch_all_by_source('havana'); 
               $genes = $gene_adaptor->fetch_all_by_source(['ensembl', 'vega']);
  Description: Retrieves an array reference of gene objects from the database via its source or sources.
               The gene will be retrieved in its native coordinate system (i.e.
               in the coordinate system it is stored in the database). It may
               be converted to a different coordinate system through a call to
               transform() or transfer(). If the gene or exon is not found
               undef is returned instead.
  Returntype  : listref of Bio::EnsEMBL::Gene
  Exceptions : if we cant get the gene in given coord system
  Caller     : general
  Status     : Stable

=cut

sub fetch_all_by_source {
  my ($self, $source) = @_;
  my @genes = @{$self->generic_fetch($self->source_constraint($source))};
  return \@genes;
}

=head2 source_constraint 

  Arg [1]    : String $source
               listref of $sources
               The source of the gene to retrieve. You can have as an argument a reference
               to a list of sources
  Description: Used internally to generate a SQL constraint to restrict a transcript query by source
  Returntype  : String
  Exceptions : If source is not supplied
  Caller     : general
  Status     : Stable

=cut

sub source_constraint {
  my ($self, $sources, $inline_variables) = @_;
  my $constraint = "g.is_current = 1";
  my $in_statement = $self->generate_in_constraint($sources, 'g.source', SQL_VARCHAR, $inline_variables);
  $constraint .= " and $in_statement";
  return $constraint;
}

303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
=head2 count_all_by_source

  Arg [1]     : String $source
                listref of $source
                The source of the gene to retrieve. You can have as an argument a reference
                to a list of sources
  Example     : $cnt = $gene_adaptor->count_all_by_source('ensembl'); 
                $cnt = $gene_adaptor->count_all_by_source(['havana', 'vega']);
  Description : Retrieves count of gene objects from the database via its source or sources.
  Returntype  : integer
  Caller      : general
  Status      : Stable

=cut

sub count_all_by_source {
  my ($self, $source) = @_;
  return $self->generic_count($self->source_constraint($source));
}

Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
323 324 325
=head2 fetch_all_by_biotype 

  Arg [1]    : String $biotype 
326 327 328 329 330 331
               listref of $biotypes
               The biotype of the gene to retrieve. You can have as an argument a reference
               to a list of biotypes
  Example    : $gene = $gene_adaptor->fetch_all_by_biotype('protein_coding'); 
               $gene = $gene_adaptor->fetch_all_by_biotypes(['protein_coding', 'sRNA', 'miRNA']);
  Description: Retrieves an array reference of gene objects from the database via its biotype or biotypes.
Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
332 333 334 335 336 337 338 339
               The genes will be retrieved in its native coordinate system (i.e.
               in the coordinate system it is stored in the database). It may
               be converted to a different coordinate system through a call to
               transform() or transfer(). If the gene or exon is not found
               undef is returned instead.
  Returntype  : listref of Bio::EnsEMBL::Gene
  Exceptions : if we cant get the gene in given coord system
  Caller     : general
340
  Status     : Stable
Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
341 342 343 344 345

=cut

sub fetch_all_by_biotype {
  my ($self, $biotype) = @_;
346 347 348 349 350 351
  my @genes = @{$self->generic_fetch($self->biotype_constraint($biotype))};
  return \@genes;
}

=head2 biotype_constraint 

352
  Arg [1]    : String $biotypes 
353 354 355 356 357 358 359 360 361 362
               listref of $biotypes
               The biotype of the gene to retrieve. You can have as an argument a reference
               to a list of biotypes
  Description: Used internally to generate a SQL constraint to restrict a gene query by biotype
  Returntype  : String
  Exceptions : If biotype is not supplied
  Caller     : general
  Status     : Stable

=cut
Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
363

364
sub biotype_constraint {
365 366 367 368
  my ($self, $biotypes, $inline_variables) = @_;
  my $constraint = "g.is_current = 1";
  my $in_statement = $self->generate_in_constraint($biotypes, 'g.biotype', SQL_VARCHAR, $inline_variables);
  $constraint .= " and $in_statement";
369
  return $constraint;
370
}
Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
371

372
=head2 count_all_by_biotype 
373

374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
  Arg [1]     : String $biotype 
                listref of $biotypes
                The biotype of the gene to retrieve. You can have as an argument a reference
                to a list of biotypes
  Example     : $cnt = $gene_adaptor->count_all_by_biotype('protein_coding'); 
                $cnt = $gene_adaptor->count_all_by_biotypes(['protein_coding', 'sRNA', 'miRNA']);
  Description : Retrieves count of gene objects from the database via its biotype or biotypes.
  Returntype  : integer
  Caller      : general
  Status      : Stable

=cut

sub count_all_by_biotype {
  my ($self, $biotype) = @_;
  return $self->generic_count($self->biotype_constraint($biotype));
}

sub fetch_all {
  my ($self)     = @_;
394
  my $constraint = 'g.biotype != "LRG_gene" and g.is_current = 1';
395 396
  my @genes      = @{$self->generic_fetch($constraint)};
  return \@genes;
397 398
}

399
=head2 fetch_all_versions_by_stable_id 
400

401 402 403 404 405 406 407 408 409 410
  Arg [1]     : String $stable_id 
                The stable ID of the gene to retrieve
  Example     : $gene = $gene_adaptor->fetch_all_versions_by_stable_id
                  ('ENSG00000148944');
  Description : Similar to fetch_by_stable_id, but retrieves all versions of a
                gene stored in the database.
  Returntype  : listref of Bio::EnsEMBL::Gene
  Exceptions  : if we cant get the gene in given coord system
  Caller      : general
  Status      : At Risk
Graham McVicker's avatar
Graham McVicker committed
411

412 413 414 415 416
=cut

sub fetch_all_versions_by_stable_id {
  my ($self, $stable_id) = @_;

Monika Komorowska's avatar
Monika Komorowska committed
417
  my $constraint = "g.stable_id = ?";
418
  $self->bind_param_generic_fetch($stable_id, SQL_VARCHAR);
419 420
  return $self->generic_fetch($constraint);
}
421

Web Admin's avatar
Web Admin committed
422 423
=head2 fetch_by_exon_stable_id

424
  Arg [1]    : String $id
425 426 427 428
               The stable id of an exon of the gene to retrieve
  Example    : $gene = $gene_adptr->fetch_by_exon_stable_id('ENSE00000148944');
  Description: Retrieves a gene object from the database via an exon stable id.
               The gene will be retrieved in its native coordinate system (i.e.
429
               in the coordinate system it is stored in the database). It may
430
               be converted to a different coordinate system through a call to
431
               transform() or transfer(). If the gene or exon is not found
432
               undef is returned instead.
433
  Returntype : Bio::EnsEMBL::Gene or undef
434
  Exceptions : none
Web Admin's avatar
Web Admin committed
435
  Caller     : general
436
  Status     : Stable
Web Admin's avatar
Web Admin committed
437 438 439

=cut

440 441
sub fetch_by_exon_stable_id {
  my ($self, $stable_id, $version) = @_;
442

443 444
  my $sql = qq(
      SELECT t.gene_id
Web Admin's avatar
Web Admin committed
445 446
        FROM transcript as t,
             exon_transcript as et,
Monika Komorowska's avatar
Monika Komorowska committed
447
             exon as e
448
       WHERE t.transcript_id = et.transcript_id 
449
         AND et.exon_id = e.exon_id
Monika Komorowska's avatar
Monika Komorowska committed
450
         AND e.stable_id = ?
451 452
         AND e.is_current = 1
  );
Web Admin's avatar
Web Admin committed
453

454 455 456
  my $sth = $self->prepare($sql);
  $sth->bind_param(1, $stable_id, SQL_VARCHAR);
  $sth->execute();
Web Admin's avatar
Web Admin committed
457

458
  my ($dbID) = $sth->fetchrow_array();
Web Admin's avatar
Web Admin committed
459

460
  return undef if (!defined($dbID));
Web Admin's avatar
Web Admin committed
461

462
  my $gene = $self->fetch_by_dbID($dbID);
Web Admin's avatar
Web Admin committed
463

464
  return $gene;
465
} ## end sub fetch_by_exon_stable_id
466

467
=head2 fetch_all_by_domain
468

469 470
  Arg [1]    : String $domain
               The domain to fetch genes from
471
  Example    : my @genes = @{ $gene_adaptor->fetch_all_by_domain($domain) };
472 473
  Description: Retrieves a listref of genes whose translation contain interpro
               domain $domain. The genes are returned in their native coord
474
               system (i.e. the coord_system they are stored in). If the coord
475 476
               system needs to be changed, then tranform or transfer should be
               called on the individual objects returned.
477 478 479
  Returntype : list of Bio::EnsEMBL::Genes
  Exceptions : none
  Caller     : domainview
480
  Status     : Stable
481 482 483

=cut

484
sub fetch_all_by_domain {
485
  my ($self, $domain) = @_;
486

487 488
  throw("domain argument is required") unless ($domain);

489 490
  my $sth = $self->prepare(
	qq(
491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
  SELECT    tr.gene_id
  FROM      interpro i,
            protein_feature pf,
            transcript tr,
            translation tl,
            seq_region sr,
            coord_system cs
  WHERE     cs.species_id = ?
    AND     cs.coord_system_id = sr.coord_system_id
    AND     sr.seq_region_id = tr.seq_region_id
    AND     tr.is_current = 1
    AND     tr.transcript_id = tl.transcript_id
    AND     tl.translation_id = pf.translation_id
    AND     pf.hit_name = i.id
    AND     i.interpro_ac = ?
  GROUP BY  tr.gene_id));

508 509
  $sth->bind_param(1, $self->species_id(), SQL_VARCHAR);
  $sth->bind_param(2, $domain,             SQL_VARCHAR);
510

511
  $sth->execute();
512

513 514
  my @array = @{$sth->fetchall_arrayref()};
  $sth->finish();
515

516
  my @gene_ids = map { $_->[0] } @array;
517

518 519
  return $self->fetch_all_by_dbID_list(\@gene_ids);
} ## end sub fetch_all_by_domain
520

Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
521
=head2 fetch_all_by_Slice_and_external_dbname_link
522 523 524 525 526 527

  Arg [1]    : Bio::EnsEMBL::Slice $slice
               The slice to fetch genes on.
  Arg [2]    : (optional) string $logic_name
               the logic name of the type of features to obtain
  Arg [3]    : (optional) boolean $load_transcripts
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
528 529
               if true, transcripts will be loaded immediately
               rather than lazy loaded later.
530 531
  Arg [4]    : String
               Name of the external database to fetch the Genes by
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
532 533
  Example    : @genes = @{
                 $ga->fetch_all_by_Slice_and_external_dbname_link(
534
                                          $slice, undef, undef, "HGNC" ) };
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
535 536 537 538 539 540
  Description: Overrides superclass method to optionally load
               transcripts immediately rather than lazy-loading them
               later.  This is more efficient when there are a lot
               of genes whose transcripts are going to be used. The
               genes are then filtered to return only those with
               external database links of the type specified
541 542 543
  Returntype : reference to list of genes
  Exceptions : thrown if exon cannot be placed on transcript slice
  Caller     : 
544
  Status     : Stable
545 546 547 548

=cut

sub fetch_all_by_Slice_and_external_dbname_link {
549
  my ($self, $slice, $logic_name, $load_transcripts, $db_name) = @_;
550 551

  # Get the external_db_id(s) from the name.
552 553 554 555 556 557 558 559
  my $dbentry_adaptor = $self->get_DBEntryAdaptor();
  my $external_db_ids = $dbentry_adaptor->get_external_db_ids($db_name, undef, 'ignore release');

  if (scalar(@{$external_db_ids}) == 0) {
    my $external_db_names = $dbentry_adaptor->get_distinct_external_dbs();
    my $available = join("\n", map { "\t${_}"} @{$external_db_names});
    warning sprintf("Could not find external database " . "'%s' in the external_db table\n" . "Available are:\n%s", $db_name, $available);
    return [];
560
  }
561 562

  # Get the gene_ids for those with links.
563 564
  my $dbe_adaptor = $self->db()->get_DBEntryAdaptor();

565
  my %linked_genes;
566
  foreach my $local_external_db_id (@{$external_db_ids}) {
567 568 569 570
    my @linked_genes = $dbe_adaptor->list_gene_ids_by_external_db_id($local_external_db_id);
    foreach my $gene_id (@linked_genes) {
      $linked_genes{$gene_id} = 1;
    }
571 572 573
  }

  # Get all the genes on the slice.
574
  my $genes = $self->SUPER::fetch_all_by_Slice_constraint($slice, 'g.is_current = 1', $logic_name);
575

576 577 578
  # Create a list of those that are in the gene_ids list.
  my @genes_passed;
  foreach my $gene (@$genes) {
579 580 581
	if (exists($linked_genes{$gene->dbID()})) {
	  push(@genes_passed, $gene);
	}
582 583
  }

584
  # Return the list of those that passed.
585
  return \@genes_passed;
586
} ## end sub fetch_all_by_Slice_and_external_dbname_link
587

588 589 590 591
=head2 fetch_all_by_Slice

  Arg [1]    : Bio::EnsEMBL::Slice $slice
               The slice to fetch genes on.
592 593
  Arg [2]    : (optional) string $logic_name
               the logic name of the type of features to obtain
594 595 596
  Arg [3]    : (optional) boolean $load_transcripts
               if true, transcripts will be loaded immediately rather than
               lazy loaded later.
597 598 599 600
  Arg [4]    : (optional) string $source
               the source name of the features to obtain.
  Arg [5]    : (optional) string biotype
                the biotype of the features to obtain.
601 602 603 604 605
  Example    : @genes = @{$gene_adaptor->fetch_all_by_Slice()};
  Description: Overrides superclass method to optionally load transcripts
               immediately rather than lazy-loading them later.  This
               is more efficient when there are a lot of genes whose
               transcripts are going to be used.
Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
606
  Returntype : reference to list of genes 
607
  Exceptions : thrown if exon cannot be placed on transcript slice
Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
608
  Caller     : Slice::get_all_Genes
609
  Status     : Stable
610 611 612 613

=cut

sub fetch_all_by_Slice {
614
  my ($self, $slice, $logic_name, $load_transcripts, $source, $biotype) = @_;
615 616 617

  my $constraint = 'g.is_current = 1';

618 619
  if (defined($source)) {
	$constraint .= " and g.source = '$source'";
620
  }
621
  if (defined($biotype)) {
622 623
    my $inline_variables = 1;
    $constraint .= " and ".$self->generate_in_constraint($biotype, 'g.biotype', SQL_VARCHAR, $inline_variables);
624
  }
625

626
  my $genes = $self->SUPER::fetch_all_by_Slice_constraint($slice, $constraint, $logic_name);
627

628
  # If there are less than two genes, still do lazy-loading.
629 630
  if (!$load_transcripts || @$genes < 2) {
	return $genes;
631 632
  }

633 634
  # Preload all of the transcripts now, instead of lazy loading later,
  # faster than one query per transcript.
635

636 637
  # First check if transcripts are already preloaded.
  # FIXME: Should check all transcripts.
638 639
  if (exists($genes->[0]->{'_transcript_array'})) {
	return $genes;
640
  }
641

642
  # Get extent of region spanned by transcripts.
643
  my ($min_start, $max_end);
644
  foreach my $g (@$genes) {
645 646 647 648 649 650
	if (!defined($min_start) || $g->seq_region_start() < $min_start) {
	  $min_start = $g->seq_region_start();
	}
	if (!defined($max_end) || $g->seq_region_end() > $max_end) {
	  $max_end = $g->seq_region_end();
	}
651 652 653 654
  }

  my $ext_slice;

655 656
  if ($min_start >= $slice->start() && $max_end <= $slice->end()) {
	$ext_slice = $slice;
657
  } else {
658 659
	my $sa = $self->db()->get_SliceAdaptor();
	$ext_slice = $sa->fetch_by_region($slice->coord_system->name(), $slice->seq_region_name(), $min_start, $max_end, $slice->strand(), $slice->coord_system->version());
660 661
  }

662
  # Associate transcript identifiers with genes.
663

664
  my %g_hash = map { $_->dbID => $_ } @{$genes};
665

666
  my $g_id_str = join(',', keys(%g_hash));
667

668
  my $sth = $self->prepare("SELECT gene_id, transcript_id " . "FROM   transcript " . "WHERE  gene_id IN ($g_id_str)");
669 670 671

  $sth->execute();

672 673
  my ($g_id, $tr_id);
  $sth->bind_columns(\($g_id, $tr_id));
674 675 676

  my %tr_g_hash;

677 678
  while ($sth->fetch()) {
	$tr_g_hash{$tr_id} = $g_hash{$g_id};
679 680
  }

681 682
  my $ta = $self->db()->get_TranscriptAdaptor();
  my $transcripts = $ta->fetch_all_by_Slice($ext_slice, 1, undef, sprintf("t.transcript_id IN (%s)", join(',', sort { $a <=> $b } keys(%tr_g_hash))));
683 684

  # Move transcripts onto gene slice, and add them to genes.
685 686 687 688 689 690 691 692 693 694 695 696
  foreach my $tr (@{$transcripts}) {
	if (!exists($tr_g_hash{$tr->dbID()})) { next }

	my $new_tr;
	if ($slice != $ext_slice) {
	  $new_tr = $tr->transfer($slice);
	  if (!defined($new_tr)) {
		throw("Unexpected. " . "Transcript could not be transfered onto Gene slice.");
	  }
	} else {
	  $new_tr = $tr;
	}
697

698
	$tr_g_hash{$tr->dbID()}->add_Transcript($new_tr);
699 700 701
  }

  return $genes;
702
} ## end sub fetch_all_by_Slice
703

704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
=head2 count_all_by_Slice

  Arg [1]    : Bio::EnsEMBL::Slice $slice
               The slice to count genes on.
  Arg [2]    : (optional) biotype(s) string or arrayref of strings 
                the biotype of the features to count.
  Arg [1]    : (optional) string $source
               the source name of the features to count.
  Example    : $cnt = $gene_adaptor->count_all_by_Slice();
  Description: Method to count genes on a given slice, filtering by biotype and source
  Returntype : integer
  Exceptions : thrown if exon cannot be placed on transcript slice
  Status     : Stable
  Caller     : general
=cut

sub count_all_by_Slice {
  my ($self, $slice, $biotype, $source) = @_;

  my $constraint = 'g.is_current = 1';
  if (defined($source)) {
	$constraint .= " and g.source = '$source'";
  }
  if (defined($biotype)) {
	$constraint .= " and " . $self->biotype_constraint($biotype);
  }

  return $self->count_by_Slice_constraint($slice, $constraint);
}

734
=head2 fetch_by_transcript_id
735

736 737
  Arg [1]    : Int $trans_id
               Unique database identifier for the transcript whose gene should
738 739 740
               be retrieved. The gene is returned in its native coord
               system (i.e. the coord_system it is stored in). If the coord
               system needs to be changed, then tranform or transfer should
741
               be called on the returned object. undef is returned if the
742
               gene or transcript is not found in the database.
743
  Example    : $gene = $gene_adaptor->fetch_by_transcript_id(1241);
Graham McVicker's avatar
Graham McVicker committed
744
  Description: Retrieves a gene from the database via the database identifier
745
               of one of its transcripts.
Graham McVicker's avatar
Graham McVicker committed
746 747
  Returntype : Bio::EnsEMBL::Gene
  Exceptions : none
748
  Caller     : general
749
  Status     : Stable
750 751 752

=cut

753
sub fetch_by_transcript_id {
754
  my ($self, $trans_id) = @_;
755

756
  # this is a cheap SQL call
757 758
  my $sth = $self->prepare(
	qq(
759 760 761 762
      SELECT tr.gene_id
      FROM transcript tr
      WHERE tr.transcript_id = ?
  ));
763

764 765
  $sth->bind_param(1, $trans_id, SQL_INTEGER);
  $sth->execute();
766

767
  my ($geneid) = $sth->fetchrow_array();
768

769
  $sth->finish();
770

771
  return undef if (!defined $geneid);
772 773 774

  my $gene = $self->fetch_by_dbID($geneid);
  return $gene;
775 776
}

777 778
=head2 fetch_by_transcript_stable_id

779 780 781 782 783 784
  Arg [1]    : string $trans_stable_id
               transcript stable ID whose gene should be retrieved
  Example    : my $gene = $gene_adaptor->fetch_by_transcript_stable_id
                 ('ENST0000234');
  Description: Retrieves a gene from the database via the stable ID of one of
               its transcripts
785 786
  Returntype : Bio::EnsEMBL::Gene
  Exceptions : none
787
  Caller     : general
788
  Status     : Stable
789 790 791 792

=cut

sub fetch_by_transcript_stable_id {
793
  my ($self, $trans_stable_id) = @_;
794

795 796
  my $sth = $self->prepare(
	qq(
Monika Komorowska's avatar
Monika Komorowska committed
797 798 799 800
        SELECT  gene_id
	FROM	transcript
        WHERE   stable_id = ?
        AND     is_current = 1
801 802
    ));

803 804
  $sth->bind_param(1, $trans_stable_id, SQL_VARCHAR);
  $sth->execute();
805

806 807
  my ($geneid) = $sth->fetchrow_array();
  $sth->finish;
808

809
  return undef if (!defined $geneid);
810

811 812 813
  my $gene = $self->fetch_by_dbID($geneid);
  return $gene;
}
814

815 816
=head2 fetch_by_translation_stable_id

817 818 819 820 821
  Arg [1]    : String $translation_stable_id
               The stable id of a translation of the gene to be obtained
  Example    : my $gene = $gene_adaptor->fetch_by_translation_stable_id
                 ('ENSP00000278194');
  Description: Retrieves a gene via the stable id of one of its translations.
822
  Returntype : Bio::EnsEMBL::Gene
Graham McVicker's avatar
Graham McVicker committed
823
  Exceptions : none
824
  Caller     : general
825
  Status     : Stable
Web Admin's avatar
Web Admin committed
826 827 828

=cut

829
sub fetch_by_translation_stable_id {
830
  my ($self, $translation_stable_id) = @_;
831

832 833
  my $sth = $self->prepare(
	qq(
834 835
        SELECT  tr.gene_id
	FROM    transcript tr,
Monika Komorowska's avatar
Monika Komorowska committed
836 837
                translation tl
	WHERE   tl.stable_id = ?
838 839 840 841
        AND     tr.transcript_id = tl.transcript_id
        AND     tr.is_current = 1
    ));

842 843
  $sth->bind_param(1, $translation_stable_id, SQL_VARCHAR);