GeneAdaptor.pm 68.8 KB
Newer Older
1

2 3
=head1 LICENSE

4
  Copyright (c) 1999-2013 The European Bioinformatics Institute and
5 6 7 8 9 10 11 12 13 14
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
15
  developers list at <dev@ensembl.org>.
16 17 18 19 20

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut
21 22

=head1 NAME
23 24
Bio::EnsEMBL::DBSQL::GeneAdaptor - Database adaptor for the retrieval and
storage of Gene objects
25 26 27

=head1 SYNOPSIS

28
  use Bio::EnsEMBL::Registry;
29

30
  Bio::EnsEMBL::Registry->load_registry_from_db(
31 32
    -host => 'ensembldb.ensembl.org',
    -user => 'anonymous',
33
  );
34

35 36
  $gene_adaptor =
    Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "gene" );
37 38 39 40 41

  $gene = $gene_adaptor->fetch_by_dbID(1234);

  $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000184129');

42
  @genes = @{ $gene_adaptor->fetch_all_by_external_name('BRCA2') };
43

44 45
  $slice_adaptor =
    Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "slice" );
46

47 48
  $slice =
    $slice_adaptor->fetch_by_region( 'chromosome', '1', 1, 1000000 );
49

50
  @genes = @{ $gene_adaptor->fetch_all_by_Slice($slice) };
51

52
=head1 DESCRIPTION
53

54 55
This is a database aware adaptor for the retrieval and storage of gene
objects.
56

57
=head1 METHODS
58

59
=cut
60

61 62
package Bio::EnsEMBL::DBSQL::GeneAdaptor;

63
use strict;
64

65
use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning );
66
use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
Graham McVicker's avatar
Graham McVicker committed
67
use Bio::EnsEMBL::DBSQL::SliceAdaptor;
68
use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
69
use Bio::EnsEMBL::DBSQL::DBAdaptor;
70
use Bio::EnsEMBL::Gene;
71

James Gilbert's avatar
 
James Gilbert committed
72
use vars '@ISA';
73
@ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor);
74

75 76
# _tables
#  Arg [1]    : none
77 78
#  Description: PROTECTED implementation of superclass abstract method.
#               Returns the names, aliases of the tables to use for queries.
79 80 81
#  Returntype : list of listrefs of strings
#  Exceptions : none
#  Caller     : internal
82
#  Status     : Stable
83

84
sub _tables {
85
  return (['gene', 'g'], ['xref', 'x'], ['external_db', 'exdb']);
86
}
87

88 89 90
# _columns
#  Arg [1]    : none
#  Example    : none
91 92
#  Description: PROTECTED implementation of superclass abstract method.
#               Returns a list of columns to use for queries.
93 94 95
#  Returntype : list of strings
#  Exceptions : none
#  Caller     : internal
96
#  Status     : Stable
97 98

sub _columns {
99 100
  my ($self) = @_;

101 102
  my $created_date  = $self->db()->dbc()->from_date_to_seconds("g.created_date");
  my $modified_date = $self->db()->dbc()->from_date_to_seconds("g.modified_date");
103

104 105
  return ('g.gene_id', 'g.seq_region_id', 'g.seq_region_start', 'g.seq_region_end', 'g.seq_region_strand', 'g.analysis_id', 'g.biotype', 'g.display_xref_id', 'g.description', 'g.status', 'g.source', 'g.is_current', 'g.canonical_transcript_id', 'g.canonical_annotation', 'g.stable_id', 'g.version', $created_date, $modified_date, 'x.display_label', 'x.dbprimary_acc', 'x.description', 'x.version', 'exdb.db_name', 'exdb.status', 'exdb.db_release', 'exdb.db_display_name', 'x.info_type', 'x.info_text');
}
106

107
sub _left_join {
108
  return (['xref', "x.xref_id = g.display_xref_id"], ['external_db', "exdb.external_db_id = x.external_db_id"]);
109
}
110

111
=head2 list_dbIDs
112

113 114
  Example    : @gene_ids = @{$gene_adaptor->list_dbIDs()};
  Description: Gets an array of internal ids for all genes in the current db
115
  Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
116
  Returntype : Listref of Ints
Graham McVicker's avatar
Graham McVicker committed
117
  Exceptions : none
118
  Caller     : general
119
  Status     : Stable
120 121 122

=cut

123
sub list_dbIDs {
124
  my ($self, $ordered) = @_;
125

126
  return $self->_list_dbIDs("gene", undef, $ordered);
127 128
}

129
=head2 list_stable_ids
130

131
  Example    : @stable_gene_ids = @{$gene_adaptor->list_stable_ids()};
132 133
  Description: Gets an listref of stable ids for all genes in the current db
  Returntype : reference to a list of strings
134
  Exceptions : none
135
  Caller     : general
136
  Status     : Stable
137

138
=cut
139

140
sub list_stable_ids {
141
  my ($self) = @_;
142

143
  return $self->_list_dbIDs("gene", "stable_id");
144 145
}

Patrick Meidl's avatar
Patrick Meidl committed
146 147 148 149 150 151
sub list_seq_region_ids {
  my $self = shift;

  return $self->_list_seq_region_ids('gene');
}

152 153
=head2 fetch_by_display_label

154 155 156
  Arg [1]    : String $label - display label of gene to fetch
  Example    : my $gene = $geneAdaptor->fetch_by_display_label("BRCA2");
  Description: Returns the gene which has the given display label or undef if
157 158 159
               there is none. If there are more than 1, the gene on the 
               reference slice is reported or if none are on the reference,
               the first one is reported.
160 161 162
  Returntype : Bio::EnsEMBL::Gene
  Exceptions : none
  Caller     : general
163
  Status     : Stable
164 165 166 167

=cut

sub fetch_by_display_label {
168
  my $self  = shift;
169 170
  my $label = shift;

171
  my $constraint = "x.display_label = ? AND g.is_current = 1";
172 173
  $self->bind_param_generic_fetch($label, SQL_VARCHAR);
  my @genes = @{$self->generic_fetch($constraint)};
174 175
  my $gene;
  if (scalar(@genes) > 1) {
176
	foreach my $gene_tmp (@genes) {
177
	  if ($gene_tmp->slice->is_reference) {
178
		$gene = $gene_tmp;
179 180
	  }
	  last if ($gene);
181 182
	}
	if (!$gene) {
183
	  $gene = $genes[0];
184
	}
185 186

  } elsif (scalar(@genes) == 1) {
187 188
	$gene = $genes[0];
  }
189

190
  return $gene;
191
} ## end sub fetch_by_display_label
192

193
=head2 fetch_all_by_display_label
194

195
  Arg [1]    : String $label - display label of genes to fetch
196
  Example    : my @genes = @{$geneAdaptor->fetch_all_by_display_label("PPP1R2P1")};
197 198 199 200 201 202 203 204 205 206
  Description: Returns all genes which have the given display label or undef if
               there are none. 
  Returntype : listref of Bio::EnsEMBL::Gene objects
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub fetch_all_by_display_label {
207
  my $self  = shift;
208 209 210
  my $label = shift;

  my $constraint = "x.display_label = ? AND g.is_current = 1";
211 212
  $self->bind_param_generic_fetch($label, SQL_VARCHAR);
  my $genes = $self->generic_fetch($constraint);
213 214 215

  return $genes;
}
216

217 218
=head2 fetch_by_stable_id

219 220
  Arg [1]    : String $id 
               The stable ID of the gene to retrieve
Graham McVicker's avatar
Graham McVicker committed
221
  Example    : $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000148944');
222 223
  Description: Retrieves a gene object from the database via its stable id.
               The gene will be retrieved in its native coordinate system (i.e.
224
               in the coordinate system it is stored in the database). It may
225
               be converted to a different coordinate system through a call to
226
               transform() or transfer(). If the gene or exon is not found
227
               undef is returned instead.
228
  Returntype : Bio::EnsEMBL::Gene or undef
229
  Exceptions : if we cant get the gene in given coord system
Graham McVicker's avatar
Graham McVicker committed
230
  Caller     : general
231
  Status     : Stable
232 233 234

=cut

235
sub fetch_by_stable_id {
236
  my ($self, $stable_id) = @_;
237

Monika Komorowska's avatar
Monika Komorowska committed
238
  my $constraint = "g.stable_id = ? AND g.is_current = 1";
239 240
  $self->bind_param_generic_fetch($stable_id, SQL_VARCHAR);
  my ($gene) = @{$self->generic_fetch($constraint)};
241

242 243
  return $gene;
}
244

Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
245 246 247
=head2 fetch_all_by_biotype 

  Arg [1]    : String $biotype 
248 249 250 251 252 253
               listref of $biotypes
               The biotype of the gene to retrieve. You can have as an argument a reference
               to a list of biotypes
  Example    : $gene = $gene_adaptor->fetch_all_by_biotype('protein_coding'); 
               $gene = $gene_adaptor->fetch_all_by_biotypes(['protein_coding', 'sRNA', 'miRNA']);
  Description: Retrieves an array reference of gene objects from the database via its biotype or biotypes.
Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
254 255 256 257 258 259 260 261
               The genes will be retrieved in its native coordinate system (i.e.
               in the coordinate system it is stored in the database). It may
               be converted to a different coordinate system through a call to
               transform() or transfer(). If the gene or exon is not found
               undef is returned instead.
  Returntype  : listref of Bio::EnsEMBL::Gene
  Exceptions : if we cant get the gene in given coord system
  Caller     : general
262
  Status     : Stable
Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
263 264 265 266 267

=cut

sub fetch_all_by_biotype {
  my ($self, $biotype) = @_;
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
  my @genes = @{$self->generic_fetch($self->biotype_constraint($biotype))};
  return \@genes;
}

=head2 biotype_constraint 

  Arg [1]    : String $biotype 
               listref of $biotypes
               The biotype of the gene to retrieve. You can have as an argument a reference
               to a list of biotypes
  Description: Used internally to generate a SQL constraint to restrict a gene query by biotype
  Returntype  : String
  Exceptions : If biotype is not supplied
  Caller     : general
  Status     : Stable

=cut
Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
285

286 287
sub biotype_constraint {
  my ($self, $biotype) = @_;
288 289
  if (!defined $biotype) {
	throw("Biotype or listref of biotypes expected");
290 291
  }
  my $constraint;
292 293 294 295 296 297 298 299
  if (ref($biotype) eq 'ARRAY') {
	$constraint = "g.biotype IN (";
	foreach my $b (@{$biotype}) {
	  $constraint .= "?,";
	  $self->bind_param_generic_fetch($b, SQL_VARCHAR);
	}
	chop($constraint);    #remove last , from expression
	$constraint .= ") and g.is_current = 1";
Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
300

301 302 303 304
  } else {
	$constraint = "g.biotype = ? and g.is_current = 1";
	$self->bind_param_generic_fetch($biotype, SQL_VARCHAR);
  }
305
  return $constraint;
306
}
Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
307

308
=head2 count_all_by_biotype 
309

310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
  Arg [1]     : String $biotype 
                listref of $biotypes
                The biotype of the gene to retrieve. You can have as an argument a reference
                to a list of biotypes
  Example     : $cnt = $gene_adaptor->count_all_by_biotype('protein_coding'); 
                $cnt = $gene_adaptor->count_all_by_biotypes(['protein_coding', 'sRNA', 'miRNA']);
  Description : Retrieves count of gene objects from the database via its biotype or biotypes.
  Returntype  : integer
  Caller      : general
  Status      : Stable

=cut

sub count_all_by_biotype {
  my ($self, $biotype) = @_;
  return $self->generic_count($self->biotype_constraint($biotype));
}

sub fetch_all {
  my ($self)     = @_;
330
  my $constraint = 'g.biotype != "LRG_gene" and g.is_current = 1';
331 332
  my @genes      = @{$self->generic_fetch($constraint)};
  return \@genes;
333 334
}

335
=head2 fetch_all_versions_by_stable_id 
336

337 338 339 340 341 342 343 344 345 346
  Arg [1]     : String $stable_id 
                The stable ID of the gene to retrieve
  Example     : $gene = $gene_adaptor->fetch_all_versions_by_stable_id
                  ('ENSG00000148944');
  Description : Similar to fetch_by_stable_id, but retrieves all versions of a
                gene stored in the database.
  Returntype  : listref of Bio::EnsEMBL::Gene
  Exceptions  : if we cant get the gene in given coord system
  Caller      : general
  Status      : At Risk
Graham McVicker's avatar
Graham McVicker committed
347

348 349 350 351 352
=cut

sub fetch_all_versions_by_stable_id {
  my ($self, $stable_id) = @_;

Monika Komorowska's avatar
Monika Komorowska committed
353
  my $constraint = "g.stable_id = ?";
354
  $self->bind_param_generic_fetch($stable_id, SQL_VARCHAR);
355 356
  return $self->generic_fetch($constraint);
}
357

Web Admin's avatar
Web Admin committed
358 359
=head2 fetch_by_exon_stable_id

360
  Arg [1]    : String $id
361 362 363 364
               The stable id of an exon of the gene to retrieve
  Example    : $gene = $gene_adptr->fetch_by_exon_stable_id('ENSE00000148944');
  Description: Retrieves a gene object from the database via an exon stable id.
               The gene will be retrieved in its native coordinate system (i.e.
365
               in the coordinate system it is stored in the database). It may
366
               be converted to a different coordinate system through a call to
367
               transform() or transfer(). If the gene or exon is not found
368
               undef is returned instead.
369
  Returntype : Bio::EnsEMBL::Gene or undef
370
  Exceptions : none
Web Admin's avatar
Web Admin committed
371
  Caller     : general
372
  Status     : Stable
Web Admin's avatar
Web Admin committed
373 374 375

=cut

376 377
sub fetch_by_exon_stable_id {
  my ($self, $stable_id, $version) = @_;
378

379 380
  my $sql = qq(
      SELECT t.gene_id
Web Admin's avatar
Web Admin committed
381 382
        FROM transcript as t,
             exon_transcript as et,
Monika Komorowska's avatar
Monika Komorowska committed
383
             exon as e
384
       WHERE t.transcript_id = et.transcript_id 
385
         AND et.exon_id = e.exon_id
Monika Komorowska's avatar
Monika Komorowska committed
386
         AND e.stable_id = ?
387 388
         AND e.is_current = 1
  );
Web Admin's avatar
Web Admin committed
389

390 391 392
  my $sth = $self->prepare($sql);
  $sth->bind_param(1, $stable_id, SQL_VARCHAR);
  $sth->execute();
Web Admin's avatar
Web Admin committed
393

394
  my ($dbID) = $sth->fetchrow_array();
Web Admin's avatar
Web Admin committed
395

396
  return undef if (!defined($dbID));
Web Admin's avatar
Web Admin committed
397

398
  my $gene = $self->fetch_by_dbID($dbID);
Web Admin's avatar
Web Admin committed
399

400
  return $gene;
401
} ## end sub fetch_by_exon_stable_id
402

403
=head2 fetch_all_by_domain
404

405 406
  Arg [1]    : String $domain
               The domain to fetch genes from
407
  Example    : my @genes = @{ $gene_adaptor->fetch_all_by_domain($domain) };
408 409
  Description: Retrieves a listref of genes whose translation contain interpro
               domain $domain. The genes are returned in their native coord
410
               system (i.e. the coord_system they are stored in). If the coord
411 412
               system needs to be changed, then tranform or transfer should be
               called on the individual objects returned.
413 414 415
  Returntype : list of Bio::EnsEMBL::Genes
  Exceptions : none
  Caller     : domainview
416
  Status     : Stable
417 418 419

=cut

420
sub fetch_all_by_domain {
421
  my ($self, $domain) = @_;
422

423 424
  throw("domain argument is required") unless ($domain);

425 426
  my $sth = $self->prepare(
	qq(
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
  SELECT    tr.gene_id
  FROM      interpro i,
            protein_feature pf,
            transcript tr,
            translation tl,
            seq_region sr,
            coord_system cs
  WHERE     cs.species_id = ?
    AND     cs.coord_system_id = sr.coord_system_id
    AND     sr.seq_region_id = tr.seq_region_id
    AND     tr.is_current = 1
    AND     tr.transcript_id = tl.transcript_id
    AND     tl.translation_id = pf.translation_id
    AND     pf.hit_name = i.id
    AND     i.interpro_ac = ?
  GROUP BY  tr.gene_id));

444 445
  $sth->bind_param(1, $self->species_id(), SQL_VARCHAR);
  $sth->bind_param(2, $domain,             SQL_VARCHAR);
446

447
  $sth->execute();
448

449 450
  my @array = @{$sth->fetchall_arrayref()};
  $sth->finish();
451

452
  my @gene_ids = map { $_->[0] } @array;
453

454 455
  return $self->fetch_all_by_dbID_list(\@gene_ids);
} ## end sub fetch_all_by_domain
456

Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
457
=head2 fetch_all_by_Slice_and_external_dbname_link
458 459 460 461 462 463

  Arg [1]    : Bio::EnsEMBL::Slice $slice
               The slice to fetch genes on.
  Arg [2]    : (optional) string $logic_name
               the logic name of the type of features to obtain
  Arg [3]    : (optional) boolean $load_transcripts
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
464 465
               if true, transcripts will be loaded immediately
               rather than lazy loaded later.
466
  Arg [4]    : Name of the external database
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
467 468 469 470 471 472 473 474 475
  Example    : @genes = @{
                 $ga->fetch_all_by_Slice_and_external_dbname_link(
                                          $slice, undef, undef, "HUGO" ) };
  Description: Overrides superclass method to optionally load
               transcripts immediately rather than lazy-loading them
               later.  This is more efficient when there are a lot
               of genes whose transcripts are going to be used. The
               genes are then filtered to return only those with
               external database links of the type specified
476 477 478
  Returntype : reference to list of genes
  Exceptions : thrown if exon cannot be placed on transcript slice
  Caller     : 
479
  Status     : Stable
480 481 482 483

=cut

sub fetch_all_by_Slice_and_external_dbname_link {
484
  my ($self, $slice, $logic_name, $load_transcripts, $db_name) = @_;
485 486

  # Get the external_db_id(s) from the name.
487
  my $sth = $self->prepare("SELECT external_db_id FROM external_db WHERE db_name = ?");
488

489
  $sth->bind_param(1, $db_name, SQL_VARCHAR);
490
  $sth->execute();
491 492

  my $external_db_id;
493
  $sth->bind_columns(\$external_db_id);
494 495

  my @external_db_ids;
496 497
  while ($sth->fetch()) {
	push(@external_db_ids, $external_db_id);
498 499
  }

500 501
  if (scalar(@external_db_ids) == 0) {
	warn sprintf("Could not find external database " . "'%s' in the external_db table\n" . "Available are:\n", $db_name);
502

503
	$sth = $self->prepare("SELECT DISTINCT db_name FROM external_db");
504

505 506
	$sth->execute();
	$sth->bind_columns(\$external_db_id);
507

508 509 510 511
	while ($sth->fetch()) {
	  warn "\t$external_db_id\n";
	}
	return [];
512
  }
513 514

  # Get the gene_ids for those with links.
515 516
  my $dbe_adaptor = $self->db()->get_DBEntryAdaptor();

517
  my %linked_genes;
518 519 520 521 522
  foreach my $local_external_db_id (@external_db_ids) {
    my @linked_genes = $dbe_adaptor->list_gene_ids_by_external_db_id($local_external_db_id);
    foreach my $gene_id (@linked_genes) {
      $linked_genes{$gene_id} = 1;
    }
523 524 525
  }

  # Get all the genes on the slice.
526
  my $genes = $self->SUPER::fetch_all_by_Slice_constraint($slice, 'g.is_current = 1', $logic_name);
527

528 529 530
  # Create a list of those that are in the gene_ids list.
  my @genes_passed;
  foreach my $gene (@$genes) {
531 532 533
	if (exists($linked_genes{$gene->dbID()})) {
	  push(@genes_passed, $gene);
	}
534 535
  }

536
  # Return the list of those that passed.
537
  return \@genes_passed;
538
} ## end sub fetch_all_by_Slice_and_external_dbname_link
539

540 541 542 543
=head2 fetch_all_by_Slice

  Arg [1]    : Bio::EnsEMBL::Slice $slice
               The slice to fetch genes on.
544 545
  Arg [2]    : (optional) string $logic_name
               the logic name of the type of features to obtain
546 547 548
  Arg [3]    : (optional) boolean $load_transcripts
               if true, transcripts will be loaded immediately rather than
               lazy loaded later.
549 550 551 552
  Arg [4]    : (optional) string $source
               the source name of the features to obtain.
  Arg [5]    : (optional) string biotype
                the biotype of the features to obtain.
553 554 555 556 557
  Example    : @genes = @{$gene_adaptor->fetch_all_by_Slice()};
  Description: Overrides superclass method to optionally load transcripts
               immediately rather than lazy-loading them later.  This
               is more efficient when there are a lot of genes whose
               transcripts are going to be used.
Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
558
  Returntype : reference to list of genes 
559
  Exceptions : thrown if exon cannot be placed on transcript slice
Jan-hinnerk Vogel's avatar
 
Jan-hinnerk Vogel committed
560
  Caller     : Slice::get_all_Genes
561
  Status     : Stable
562 563 564 565

=cut

sub fetch_all_by_Slice {
566
  my ($self, $slice, $logic_name, $load_transcripts, $source, $biotype) = @_;
567 568 569

  my $constraint = 'g.is_current = 1';

570 571
  if (defined($source)) {
	$constraint .= " and g.source = '$source'";
572
  }
573 574
  if (defined($biotype)) {
	$constraint .= " and g.biotype = '$biotype'";
575
  }
576

577
  my $genes = $self->SUPER::fetch_all_by_Slice_constraint($slice, $constraint, $logic_name);
578

579
  # If there are less than two genes, still do lazy-loading.
580 581
  if (!$load_transcripts || @$genes < 2) {
	return $genes;
582 583
  }

584 585
  # Preload all of the transcripts now, instead of lazy loading later,
  # faster than one query per transcript.
586

587 588
  # First check if transcripts are already preloaded.
  # FIXME: Should check all transcripts.
589 590
  if (exists($genes->[0]->{'_transcript_array'})) {
	return $genes;
591
  }
592

593
  # Get extent of region spanned by transcripts.
594
  my ($min_start, $max_end);
595
  foreach my $g (@$genes) {
596 597 598 599 600 601
	if (!defined($min_start) || $g->seq_region_start() < $min_start) {
	  $min_start = $g->seq_region_start();
	}
	if (!defined($max_end) || $g->seq_region_end() > $max_end) {
	  $max_end = $g->seq_region_end();
	}
602 603 604 605
  }

  my $ext_slice;

606 607
  if ($min_start >= $slice->start() && $max_end <= $slice->end()) {
	$ext_slice = $slice;
608
  } else {
609 610
	my $sa = $self->db()->get_SliceAdaptor();
	$ext_slice = $sa->fetch_by_region($slice->coord_system->name(), $slice->seq_region_name(), $min_start, $max_end, $slice->strand(), $slice->coord_system->version());
611 612
  }

613
  # Associate transcript identifiers with genes.
614

615
  my %g_hash = map { $_->dbID => $_ } @{$genes};
616

617
  my $g_id_str = join(',', keys(%g_hash));
618

619
  my $sth = $self->prepare("SELECT gene_id, transcript_id " . "FROM   transcript " . "WHERE  gene_id IN ($g_id_str)");
620 621 622

  $sth->execute();

623 624
  my ($g_id, $tr_id);
  $sth->bind_columns(\($g_id, $tr_id));
625 626 627

  my %tr_g_hash;

628 629
  while ($sth->fetch()) {
	$tr_g_hash{$tr_id} = $g_hash{$g_id};
630 631
  }

632 633
  my $ta = $self->db()->get_TranscriptAdaptor();
  my $transcripts = $ta->fetch_all_by_Slice($ext_slice, 1, undef, sprintf("t.transcript_id IN (%s)", join(',', sort { $a <=> $b } keys(%tr_g_hash))));
634 635

  # Move transcripts onto gene slice, and add them to genes.
636 637 638 639 640 641 642 643 644 645 646 647
  foreach my $tr (@{$transcripts}) {
	if (!exists($tr_g_hash{$tr->dbID()})) { next }

	my $new_tr;
	if ($slice != $ext_slice) {
	  $new_tr = $tr->transfer($slice);
	  if (!defined($new_tr)) {
		throw("Unexpected. " . "Transcript could not be transfered onto Gene slice.");
	  }
	} else {
	  $new_tr = $tr;
	}
648

649
	$tr_g_hash{$tr->dbID()}->add_Transcript($new_tr);
650 651 652
  }

  return $genes;
653
} ## end sub fetch_all_by_Slice
654

655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
=head2 count_all_by_Slice

  Arg [1]    : Bio::EnsEMBL::Slice $slice
               The slice to count genes on.
  Arg [2]    : (optional) biotype(s) string or arrayref of strings 
                the biotype of the features to count.
  Arg [1]    : (optional) string $source
               the source name of the features to count.
  Example    : $cnt = $gene_adaptor->count_all_by_Slice();
  Description: Method to count genes on a given slice, filtering by biotype and source
  Returntype : integer
  Exceptions : thrown if exon cannot be placed on transcript slice
  Status     : Stable
  Caller     : general
=cut

sub count_all_by_Slice {
  my ($self, $slice, $biotype, $source) = @_;

  my $constraint = 'g.is_current = 1';
  if (defined($source)) {
	$constraint .= " and g.source = '$source'";
  }
  if (defined($biotype)) {
	$constraint .= " and " . $self->biotype_constraint($biotype);
  }

  return $self->count_by_Slice_constraint($slice, $constraint);
}

685
=head2 fetch_by_transcript_id
686

687 688
  Arg [1]    : Int $trans_id
               Unique database identifier for the transcript whose gene should
689 690 691
               be retrieved. The gene is returned in its native coord
               system (i.e. the coord_system it is stored in). If the coord
               system needs to be changed, then tranform or transfer should
692
               be called on the returned object. undef is returned if the
693
               gene or transcript is not found in the database.
694
  Example    : $gene = $gene_adaptor->fetch_by_transcript_id(1241);
Graham McVicker's avatar
Graham McVicker committed
695
  Description: Retrieves a gene from the database via the database identifier
696
               of one of its transcripts.
Graham McVicker's avatar
Graham McVicker committed
697 698
  Returntype : Bio::EnsEMBL::Gene
  Exceptions : none
699
  Caller     : general
700
  Status     : Stable
701 702 703

=cut

704
sub fetch_by_transcript_id {
705
  my ($self, $trans_id) = @_;
706

707
  # this is a cheap SQL call
708 709
  my $sth = $self->prepare(
	qq(
710 711 712 713
      SELECT tr.gene_id
      FROM transcript tr
      WHERE tr.transcript_id = ?
  ));
714

715 716
  $sth->bind_param(1, $trans_id, SQL_INTEGER);
  $sth->execute();
717

718
  my ($geneid) = $sth->fetchrow_array();
719

720
  $sth->finish();
721

722
  return undef if (!defined $geneid);
723 724 725

  my $gene = $self->fetch_by_dbID($geneid);
  return $gene;
726 727
}

728 729
=head2 fetch_by_transcript_stable_id

730 731 732 733 734 735
  Arg [1]    : string $trans_stable_id
               transcript stable ID whose gene should be retrieved
  Example    : my $gene = $gene_adaptor->fetch_by_transcript_stable_id
                 ('ENST0000234');
  Description: Retrieves a gene from the database via the stable ID of one of
               its transcripts
736 737
  Returntype : Bio::EnsEMBL::Gene
  Exceptions : none
738
  Caller     : general
739
  Status     : Stable
740 741 742 743

=cut

sub fetch_by_transcript_stable_id {
744
  my ($self, $trans_stable_id) = @_;
745

746 747
  my $sth = $self->prepare(
	qq(
Monika Komorowska's avatar
Monika Komorowska committed
748 749 750 751
        SELECT  gene_id
	FROM	transcript
        WHERE   stable_id = ?
        AND     is_current = 1
752 753
    ));

754 755
  $sth->bind_param(1, $trans_stable_id, SQL_VARCHAR);
  $sth->execute();
756

757 758
  my ($geneid) = $sth->fetchrow_array();
  $sth->finish;
759

760
  return undef if (!defined $geneid);
761

762 763 764
  my $gene = $self->fetch_by_dbID($geneid);
  return $gene;
}
765

766 767
=head2 fetch_by_translation_stable_id

768 769 770 771 772
  Arg [1]    : String $translation_stable_id
               The stable id of a translation of the gene to be obtained
  Example    : my $gene = $gene_adaptor->fetch_by_translation_stable_id
                 ('ENSP00000278194');
  Description: Retrieves a gene via the stable id of one of its translations.
773
  Returntype : Bio::EnsEMBL::Gene
Graham McVicker's avatar
Graham McVicker committed
774
  Exceptions : none
775
  Caller     : general
776
  Status     : Stable
Web Admin's avatar
Web Admin committed
777 778 779

=cut

780
sub fetch_by_translation_stable_id {
781
  my ($self, $translation_stable_id) = @_;
782

783 784
  my $sth = $self->prepare(
	qq(
785 786
        SELECT  tr.gene_id
	FROM    transcript tr,
Monika Komorowska's avatar
Monika Komorowska committed
787 788
                translation tl
	WHERE   tl.stable_id = ?
789 790 791 792
        AND     tr.transcript_id = tl.transcript_id
        AND     tr.is_current = 1
    ));

793 794
  $sth->bind_param(1, $translation_stable_id, SQL_VARCHAR);
  $sth->execute();
Web Admin's avatar
Web Admin committed
795

796 797 798 799 800 801
  my ($geneid) = $sth->fetchrow_array();
  $sth->finish;
  if (!defined $geneid) {
	return undef;
  }
  return $self->fetch_by_dbID($geneid);