diff --git a/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm index 91fe5819086bc0ae8924bee8752331b549bb52bd..ea80ad1d1c5856f75356754a241dc0f805dadb92 100755 --- a/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm @@ -51,11 +51,9 @@ use Bio::EnsEMBL::DBSQL::DBConnection; =head2 new - Arg [1] : string SOURCE - The source name of the database. This may be removed soon. - Arg [2] : Bio::EnsEMBL::DBSQL::DBAdaptor DNADB - The dna database to be attached to this database. This may also - be changed. + Arg [-DNADB]: (optional) Bio::EnsEMBL::DBSQL::DBAdaptor DNADB + All sequence, assembly, contig information etc, will be + retrieved from this database instead. Arg [..] : Other args are passed to superclass Bio::EnsEMBL::DBSQL::DBConnection Example : $db = new Bio::EnsEMBL::DBSQL::DBAdaptor( @@ -76,11 +74,7 @@ sub new { #call superclass constructor my $self = $class->SUPER::new(@args); - my ( $source, $dnadb ) = $self->_rearrange([qw(SOURCE DNADB)],@args); - - if(defined $source) { - $self->source($source); - } + my ( $dnadb ) = $self->_rearrange([qw(DNADB)],@args); if(defined $dnadb) { $self->dnadb($dnadb); @@ -90,31 +84,6 @@ sub new { } -=head2 source - - Arg [1] : (optional) string $source - The source of info in the database connected to (e.g. 'embl') - Example : $db_adaptor->source('sanger'); - Description: Sets/Gets the source or human readable name of the genes in - the connected database. For example for the sanger db the source - would be 'sanger'. - Returntype : string - Exceptions : none - Caller : Bio::EnsEMBL::GeneAdaptor Bio::EnsEMBL::LiteGeneAdaptor EnsWeb - -=cut - -sub source { - my ($self, $source) = @_; - - if(defined $source) { - $self->{'_source'} = $source; - } - - return $self->{'_source'}; -} - - =head2 get_MetaContainer Args : none @@ -951,4 +920,20 @@ sub add_ExternalFeatureFactory{ } + + + +# +# sub DEPRECATED METHODS +# +# + +sub source { + my $self = shift; + + $self->warn("DBAdaptor::source method is deprecated - do not use" . + $self->stack_trace_dump); + +} + 1; diff --git a/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm index b88ad1e00d9309e7aa3c0007291b06731216a708..072579cc66bd973f792ff85b5f990324556c6509 100644 --- a/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm @@ -194,7 +194,6 @@ sub fetch_by_dbID { $ana = $self->db->get_AnalysisAdaptor->fetch_by_dbID($arr[4]); $gene->analysis($ana); $gene->type($arr[5]); - $gene->source($self->db->source()); $first = 0; } @@ -435,6 +434,7 @@ sub fetch_all_by_contig_list{ Arg [1] : Bio::EnsEMBL::Slice $slice the slice to fetch genes from + Arg [2] Example : $genes = $gene_adaptor->fetch_all_by_Slice($slice); Description: Retrieves all genes which are present on a slice Returntype : listref of Bio::EnsEMBL::Genes in slice coordinates @@ -444,22 +444,22 @@ sub fetch_all_by_contig_list{ =cut sub fetch_all_by_Slice { - my ( $self, $slice, $type ) = @_; + my ( $self, $slice, $logic_name ) = @_; my @out = (); + $logic_name ||= ''; + + my $key = $slice->name .":" . $logic_name; + #check the cache which uses the slice name as it key - if($self->{'_slice_gene_cache'}{$slice->name()}) { - return $self->{'_slice_gene_cache'}{$slice->name()}; + if($self->{'_slice_gene_cache'}{$key}) { + return $self->{'_slice_gene_cache'}{$key}; } my $mapper = $self->db->get_AssemblyMapperAdaptor->fetch_by_type ( $slice->assembly_type() ); - $mapper->register_region( $slice->chr_name(), - $slice->chr_start(), - $slice->chr_end()); - my @cids = $mapper->list_contig_ids( $slice->chr_name(), $slice->chr_start(), $slice->chr_end()); @@ -468,15 +468,31 @@ sub fetch_all_by_Slice { if ( scalar (@cids) == 0 ) { return []; } - + my $str = "(".join( ",",@cids ).")"; - + + my $where = "WHERE e.contig_id in $str + AND et.exon_id = e.exon_id + AND et.transcript_id = t.transcript_id + AND g.gene_id = t.gene_id"; + + if($logic_name) { + #determine analysis id via logic_name + my $analysis = + $self->db->get_AnalysisAdaptor()->fetch_by_logic_name($logic_name); + unless(defined($analysis) && $analysis->dbID()) { + $self->warn("No analysis for logic name $logic_name exists"); + return []; + } + + my $analysis_id = $analysis->dbID; + $where .= " AND g.analysis_id = $analysis_id"; + } + my $sql = " SELECT distinct(t.gene_id) - FROM transcript t,exon_transcript et,exon e - WHERE e.contig_id in $str - AND et.exon_id = e.exon_id - AND et.transcript_id = t.transcript_id"; + FROM transcript t,exon_transcript et,exon e, gene g + $where"; my $sth = $self->db->prepare($sql); $sth->execute; @@ -493,7 +509,7 @@ sub fetch_all_by_Slice { } #place the results in an LRU cache - $self->{'_slice_gene_cache'}{$slice->name} = \@out; + $self->{'_slice_gene_cache'}{$key} = \@out; return \@out; } @@ -1071,7 +1087,7 @@ sub update { $sth->execute($gene->type, $gene->analysis->dbID, $xref_id, - $tcout, + $tcount, $gene->dbID); } diff --git a/modules/Bio/EnsEMBL/Gene.pm b/modules/Bio/EnsEMBL/Gene.pm index 1c1adbc7c1ccea2853f942f407d8bf511f00f9c7..87ed3f4336cefba8fa7520e231cc07a97f4bd004 100755 --- a/modules/Bio/EnsEMBL/Gene.pm +++ b/modules/Bio/EnsEMBL/Gene.pm @@ -1,9 +1,6 @@ - # # BioPerl module for Gene # -# Cared for by Ewan Birney <birney@sanger.ac.uk> -# # Copyright Ewan Birney # # You may distribute this module under the same terms as perl itself @@ -20,48 +17,45 @@ Confirmed genes. Basically has a set of transcripts =head1 DESCRIPTION -Needs more description. +A representation of a Gene within the ensembl system. A gene is basically a +set of one or more alternative transcripts. =head1 CONTACT -Describe contact details here - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ +Contact the EnsEMBL development mailing list for info <ensembl-dev@ebi.ac.uk> =cut - -# Let the code begin... - - package Bio::EnsEMBL::Gene; use vars qw(@ISA); use strict; -# Object preamble - inheriets from Bio::SeqFeature::Generic - use Bio::EnsEMBL::Root; use Bio::EnsEMBL::TranscriptI; use Bio::Annotation::DBLink; use Bio::EnsEMBL::DBEntry; - @ISA = qw(Bio::EnsEMBL::Root); -# new() is inherited from Bio::Root::Object -# _initialize is where the heavy stuff will happen when new is called + +=head2 new + + Arg [1] : none + Example : $gene = Bio::EnsEMBL::Gene->new(); + Description: Creates a new gene object + Returntype : Bio::EnsEMBL::Gene + Exceptions : none + Caller : general + +=cut sub new { my($class,@args) = @_; my $self = bless {}, $class; - $self->{'_transcript_array'} = []; -# $self->{'_db_link'} = []; -# set stuff in self from @args - return $self; # success - we hope! + + return $self; } @@ -248,29 +242,6 @@ sub chr_name { } -=head2 source - - Arg [1] : string $source - Example : none - Description: get/set for attribute source - Returntype : string - Exceptions : none - Caller : general - -=cut - - -sub source { - my ($self, $source) = @_; - - if(defined $source) { - $self->{'_source'} = $source; - } - - return $self->{'_source'}; -} - - =head2 is_known diff --git a/modules/Bio/EnsEMBL/Lite/GeneAdaptor.pm b/modules/Bio/EnsEMBL/Lite/GeneAdaptor.pm index ba0c8bb7fa70acf3beab10cb437348b41e87b138..d9c53cb4a17a615ba2ae888df50d803abd0f95b9 100644 --- a/modules/Bio/EnsEMBL/Lite/GeneAdaptor.pm +++ b/modules/Bio/EnsEMBL/Lite/GeneAdaptor.pm @@ -30,6 +30,7 @@ package Bio::EnsEMBL::Lite::GeneAdaptor; use Bio::EnsEMBL::DBSQL::BaseAdaptor; use Bio::EnsEMBL::DBSQL::DBAdaptor; use Bio::EnsEMBL::Transcript; +use Bio::EnsEMBL::Analysis; use Bio::EnsEMBL::Gene; use Bio::EnsEMBL::Utils::Cache; #CPAN LRU Cache @@ -166,12 +167,14 @@ sub fetch_by_gene_id_list { =cut sub fetch_all_by_Slice { - my ( $self, $slice, $empty_flag ) = @_; + my ( $self, $slice, $logic_name, $empty_flag ) = @_; + + my $key = $slice->name . ":$logic_name"; if($empty_flag) { # return from cache or the _get_empty_Genes fn while caching results.... - return $self->{'_slice_empty_gene_cache'}{$slice->name()} ||= - $self->_get_empty_Genes($slice); + return $self->{'_slice_empty_gene_cache'}{$key} ||= + $self->_get_empty_Genes($slice, $logic_name); } #check the cache which uses the slice name as it key @@ -179,52 +182,42 @@ sub fetch_all_by_Slice { return $self->{'_slice_gene_cache'}{$slice->name()}; } - my $sth = $self->prepare( "SELECT t.id, t.transcript_id, t.chr_name, t.chr_start, t.chr_end, + my $where = 'WHERE t.chr_name = ? and t.chr_start <= ? and + t.chr_start >= ? and t.chr_end >= ? + AND g.gene_id = t.gene_id + AND g.db = t.db'; + + my @bind_vals = ( $slice->chr_name, + $slice->chr_end, + $slice->chr_start - $MAX_TRANSCRIPT_LENGTH, + $slice->chr_start); + + if($logic_name) { + $where .= " and g.analysis = ?"; + push @bind_vals, $logic_name; + } + + my $sth = $self->prepare( + "SELECT t.id, t.transcript_id, t.chr_name, t.chr_start, t.chr_end, t.chr_strand, t.transcript_name, t.translation_id, t.translation_name, t.gene_id, t.type, t.gene_name, t.db, - t.exon_structure, t.external_name, t.external_status, t.exon_ids, t.external_db, - t.coding_start, t.coding_end, + t.exon_structure, t.external_name, t.external_status, + t.exon_ids, t.external_db, t.coding_start, t.coding_end, g.external_name as gene_external_name, g.external_db as gene_external_db, g.external_status as gene_external_status, - g.type as gene_type - FROM transcript t - LEFT JOIN gene g - ON g.gene_id = t.gene_id - AND g.db = t.db - WHERE t.chr_name = ? and t.chr_start <= ? and t.chr_start >= ? and - t.chr_end >= ?" - ); + g.type as gene_type, + g.analysis as analysis + FROM transcript t, gene g + $where"); - $sth->execute( $slice->chr_name, $slice->chr_end, - $slice->chr_start - $MAX_TRANSCRIPT_LENGTH, - $slice->chr_start ); + $sth->execute( @bind_vals ); - return $self->{'_slice_gene_cache'}{$slice->name} = + return $self->{'_slice_gene_cache'}{$key} = $self->_objects_from_sth( $sth, $slice ); - } -=head2 fetch_by_Slice - - Arg [1] : none - Example : none - Description: DEPRECATED use fetch_all_by_Slice instead - Returntype : none - Exceptions : none - Caller : none - -=cut - -sub fetch_by_Slice { - my ($self, @args) = @_; - - $self->warn("fetch_by_Slice has been renamed fetch_all_by_Slice\n" . caller); - - return $self->fetch_all_by_Slice(@args); -} - sub fetch_by_DBEntry { my ($self, $db, $dbentry, $chr_coords ) = @_; @@ -262,7 +255,8 @@ sub fetch_by_stable_id { t.external_db, t.coding_start, t.coding_end, g.external_name as gene_external_name, g.external_status as gene_external_status, - g.external_db as gene_external_db, g.type as gene_type + g.external_db as gene_external_db, g.type as gene_type + g.analysis as analysis FROM transcript t LEFT JOIN gene g ON g.gene_id = t.gene_id @@ -298,7 +292,8 @@ sub fetch_by_transcript_stable_id { t.external_db, t.coding_start, t.coding_end, g.external_name as gene_external_name, g.external_status as gene_external_status, - g.external_db as gene_external_db, g.type as gene_type + g.external_db as gene_external_db, g.type as gene_type + g.analysis as analysis FROM transcript t LEFT JOIN gene g ON g.gene_id = t.gene_id @@ -328,8 +323,10 @@ sub _objects_from_sth { my %exon_cache; my %gene_cache; + my %analysis_cache; my $core_db_adaptor = $self->db->get_db_adaptor('core'); + my ( $gene, $transcript, $translation ); my ( $exon_id ); @@ -347,9 +344,14 @@ sub _objects_from_sth { $gene->stable_id( $hr->{'gene_name'} ); $gene->dbID( $hr->{'gene_id'} ); $gene->adaptor( $core_db_adaptor->get_GeneAdaptor() ); - $gene->source( $hr->{'db'} ); + #$gene->source( $hr->{'db'} ); $gene->strand( $hr->{'chr_strand'} ); + #set the gene's analysis + $analysis_cache{$hr->{'analysis'}} ||= + Bio::EnsEMBL::Analysis->new(-logic_name => $hr->{'analysis'}); + $gene->analysis($analysis_cache{$hr->{'analysis'}}); + if( defined $hr->{'gene_type' } ) { $gene->external_name( $hr->{'gene_external_status'} ); $gene->external_name( $hr->{'gene_external_name'} ); @@ -549,22 +551,37 @@ sub _objects_from_sth { =cut sub _get_empty_Genes { - my ($self, $slice) = @_; + my ($self, $slice, $logic_name) = @_; my $chr_start = $slice->chr_start(); my $chr_end = $slice->chr_end(); my $chr_name = $slice->chr_name(); + + + my $where = "WHERE g.chr_name = ? AND g.chr_start <= ? AND + g.chr_start >= ? AND g.chr_end >= ?"; + + my @bind_vals = ( $chr_name, + $chr_end, + $chr_start - $MAX_TRANSCRIPT_LENGTH, + $chr_start ); + + + if($logic_name) { + $where .= " and g.analysis_id = ?"; + push @bind_vals, $logic_name; + } + my $sth = $self->prepare ( "SELECT g.db, g.gene_id, g.gene_name, g.chr_name, g.chr_start, - g.chr_end, g.chr_strand, g.type, g.external_name, g.external_db, g.external_status + g.chr_end, g.chr_strand, g.type, g.external_name, g.external_db, + g.external_status, g.analysis FROM gene g - WHERE g.chr_name = ? AND g.chr_start <= ? AND - g.chr_start >= ? AND g.chr_end >= ?" ); + $where" + ); - $sth->execute( $chr_name, $chr_end, - $chr_start - $MAX_TRANSCRIPT_LENGTH, - $chr_start ); + $sth->execute(@bind_vals); my @out = (); @@ -572,6 +589,8 @@ sub _get_empty_Genes { my $hashref; + my %analysis_cache; + while($hashref = $sth->fetchrow_hashref()) { my $gene = new Bio::EnsEMBL::Gene(); $gene->start($hashref->{'chr_start'} - $chr_start); @@ -579,9 +598,14 @@ sub _get_empty_Genes { $gene->stable_id( $hashref->{'gene_name'} ); $gene->dbID( $hashref->{'gene_id'} ); $gene->adaptor( $core_gene_adaptor ); - $gene->source( $hashref->{'db'} ); + #$gene->source( $hashref->{'db'} ); $gene->strand( $hashref->{'chr_strand'} ); + my $analysis = $analysis_cache{$hashref->{'analysis'}} ||= + Bio::EnsEMBL::Analysis->new(-logic_name => $hashref->{'analysis'}); + + $gene->analysis($analysis); + if( defined $hashref->{'type' } ) { $gene->external_status( $hashref->{'external_status'} ); $gene->external_name( $hashref->{'external_name'} ); @@ -641,7 +665,8 @@ sub fetch_all_by_external_name { t.coding_start, t.coding_end, g.external_name as gene_external_name, g.external_db as gene_external_db, g.external_status as gene_external_status, - g.type as gene_type + g.type as gene_type, + g.analysis as analysis FROM transcript t, gene_xref as gx, gene as g where g.gene_id = t.gene_id AND g.db = t.db and t.db = ? and gx.external_name = ? and gx.gene_id = g.gene_id order by g.gene_name, t.gene_name" @@ -683,7 +708,7 @@ sub _get_empty_Genes_by_external_name { $gene->stable_id( $hashref->{'gene_name'} ); $gene->dbID( $hashref->{'gene_id'} ); $gene->adaptor( $core_gene_adaptor ); - $gene->source( $hashref->{'db'} ); + #$gene->source( $hashref->{'db'} ); $gene->strand( $hashref->{'chr_strand'} ); if( defined $hashref->{'type' } ) { diff --git a/modules/Bio/EnsEMBL/Slice.pm b/modules/Bio/EnsEMBL/Slice.pm index 0d2b52499418f70e65ead3000f9f5490fa0bcf32..9e5894ef9eb717dee85a8f503e68758ce8c949d9 100644 --- a/modules/Bio/EnsEMBL/Slice.pm +++ b/modules/Bio/EnsEMBL/Slice.pm @@ -34,10 +34,6 @@ This modules is part of the Ensembl project http://www.ensembl.org Questions can be posted to the ensembl-dev mailing list: ensembl-dev@ebi.ac.uk -=head1 APPENDIX - -The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ - =cut @@ -58,7 +54,6 @@ use Bio::EnsEMBL::Tile; @ISA = qw(Bio::EnsEMBL::Root Bio::PrimarySeqI); - =head2 new Arg [...] : List of optional named arguments @@ -593,7 +588,9 @@ sub get_all_SNPs { =head2 get_all_Genes - Arg [1] : (optional) boolean $empty_flag + Arg [1] : (optional) string $logic_name + The name of the analysis used to generate the genes to retrieve + Arg [2] : (optional) boolean $empty_flag Example : @genes = $slice->get_all_Genes; Description: Retrieves all genes that overlap this slice. The empty flag is used by the web code and is used to retrieve light weight genes @@ -608,40 +605,18 @@ sub get_all_SNPs { =cut sub get_all_Genes{ - my ($self, $empty_flag) = @_; + my ($self, $logic_name, $empty_flag) = @_; #caching is performed on a per slice basis in the GeneAdaptor - return $self->adaptor->db->get_GeneAdaptor->fetch_all_by_Slice($self, + return $self->adaptor->db->get_GeneAdaptor->fetch_all_by_Slice($self, + $logic_name, $empty_flag); } -=head2 get_all_Genes_by_source - Arg [1] : string $source - Arg [2] : (optional) boolean $empty_flag - Example : @genes = @{$slice->get_all_Genes_by_souce('core')}; - Description: Retrieves genes that overlap this slice from database $source. - This is primarily used by web code to retrieve subsets of genes - from the lite database (which contains an sets of genes from - several databases). The empty flag indicates light weight - genes that only have a start, end and strand should be used - (only works if lite db is available). If the lite database has - been attached to the core database this method will use the - lite database (and genes will not be as full featured). - Returntype : listref of Bio::EnsEMBL::Genes - Exceptions : none - Caller : contigview -=cut - -sub get_all_Genes_by_source{ - my ($self, $source, $empty_flag) = @_; - my @out = - grep { $_->source eq $source } @{$self->get_all_Genes($empty_flag)}; - return \@out; -} @@ -1353,4 +1328,25 @@ sub accession_number { # sub DEPRECATED METHODS # ############################################################################### + +=head2 get_all_Genes_by_source + + Arg [1] : none + Example : none + Description: DEPRECATED use get_all_Genes instead + Returntype : none + Exceptions : none + Caller : none + +=cut + +sub get_all_Genes_by_source { + my ($self, @args) = @_; + + $self->warn("call to deprecated method get_all_Genes_by_source. " . + "Use get_all_Genes instead\n " . join(':', caller)); + + return $self->get_all_Genes; +} + 1;