diff --git a/modules/Bio/EnsEMBL/Gene.pm b/modules/Bio/EnsEMBL/Gene.pm index 841eef206b50af738415c8cfaac7ac266e6a3b0f..265507cb335e5ecbd211cb4dfb209d8a0b02bbc0 100755 --- a/modules/Bio/EnsEMBL/Gene.pm +++ b/modules/Bio/EnsEMBL/Gene.pm @@ -380,12 +380,14 @@ sub dbID { Arg [1] : string $external_name Example : none - Description: get/set for attribute external_name. It could be calculated - from dblinks in a species dependent way. Well introduce - that later. + Description: get/set for attribute external_name. It initially calculates + the longest transcript for the gene in question and then + delegates the call to the external_name method on Transcript. + Species dependant searching is handled by this method on + Transcript. Returntype : string Exceptions : none - Caller : Lite::GeneAdaptor knows how to set it correct + Caller : general =cut @@ -395,36 +397,92 @@ sub external_name { if( defined $arg ) { $self->{'_external_name'} = $arg; } + else { + # find the transcript with the longest length which is + # attached to this gene. Use the longest length as the + # determining factor when having to select between transcripts + # and their external references + my $transcript = $self->_get_longest_Transcript; + + $self->{'_external_name'} = $transcript->external_name; + } - return $self->{'_external_name'}; + return $self->{'_external_name'}; } - =head2 external_db Arg [1] : string $external_db Example : none Description: get/set for attribute external_db. The db is the one that - belongs to the external_name + belongs to the external_name. It initially calculates + the longest transcript for the gene in question and then + delegates the call to the external_db method on Transcript. + Species dependant searching is handled by this method on + Transcript. Returntype : string Exceptions : none Caller : general =cut - sub external_db { my ($self, $arg ) = @_; if( defined $arg ) { $self->{'_external_db'} = $arg; } + else { + # find the transcript with the longest length which is + # attached to this gene. Use the longest length as the + # determining factor when having to select between transcripts + # and their external references + my $transcript = $self->_get_longest_Transcript; + + $self->{'_external_db'} = $transcript->external_db; + } return $self->{'_external_db'}; } +=head2 _get_longest_Transcript + + Args : none + Example : none + Description: An INTERNAL method which determines the longest transcript + for the given gene.get/set for attribute external_db. The db is the one that + belongs to the external_name. It initially calculates + the longest transcript for the gene in question and then + delegates the call to the external_db method on Transcript. + Species dependant searching is handled by this method on + Transcript. + Returntype : a single Bio::EnsEMBL::Transcript + Exceptions : none + Caller : external_name and external_db methods on Gene.pm + +=cut + +sub _get_longest_Transcript { + my $self = shift; + + my $transcripts = $self->get_all_Transcripts; + + my $longest_index = 0; + my $longest_length = 0; + my $tran_count = 0; + + foreach my $trans ( @{$transcripts} ) { + if ( $trans->length > $longest_length ) { + $longest_length = $trans->length; + $longest_index = $tran_count; + } + $tran_count++; + } + + return $transcripts->[$longest_index]; +} =head2 description @@ -883,7 +941,7 @@ sub temporary_id { Description: You can set the species for this gene if you want to use species specific behaviour. Otherwise species is retrieved from attached database. - Returntype : Bio::EnsEMBL::Species + Returntype : Bio::Species Exceptions : none Caller : external_name, external_db, general for setting diff --git a/modules/Bio/EnsEMBL/Transcript.pm b/modules/Bio/EnsEMBL/Transcript.pm index 478bcb6c1284683ac7cdca4041c4f3d70c3c443a..df4ff6b1d42307b39d7f070a4646cb8aaba0a0dc 100755 --- a/modules/Bio/EnsEMBL/Transcript.pm +++ b/modules/Bio/EnsEMBL/Transcript.pm @@ -167,6 +167,9 @@ sub external_db { if( defined $arg ) { $self->{'_external_db'} = $arg; } + else { + $self->{'_external_db'} = $self->_get_external_info("db"); + } return $self->{'_external_db'}; } @@ -184,16 +187,108 @@ sub external_db { =cut sub external_name { - my ($self, $arg ) = @_; + my ($self, $arg) = @_; if( defined $arg ) { $self->{'_external_name'} = $arg; } + else { + $self->{'_external_name'} = $self->_get_external_info("name"); + } return $self->{'_external_name'}; } +=head2 _get_external_info + + Title : _get_external_info + Usage : $ext_name = $obj->_get_external_info(); + Function: external_name if available + Example : + Returns : the external name of this transcript + Args : string. Switch on whether to return a name or dbname. + +=cut + +sub _get_external_info { + my ($self, $required) = @_; + + # find out from which species this translation comes from + my $species = $self->species->species; + + # go and grab the list of DBLinks + my $dblinks = $self->get_all_DBLinks; + + # set the priority of the order in which the external dbs are searched + # based on the species + # the actual order of dbs was determined by the deprecated priority column + # in the external_db table + + my @priority_order = []; + + # the kind of case statment switching is performed on the first records + # from the meta table of the relevant species. + + # human + if ( $species eq 'sapiens' ) { + @priority_order = qw{ HUGO SWISSPROT SPTREMBL RefSeq LocusLink }; + } + # anopheles + elsif ( $species eq 'gambiae' ) { + @priority_order = qw{ ANOSUB SWISSPROT SPTREMBL }; + } + # zebra fish + elsif ( $species eq 'rerio' ) { + @priority_order = qw{ SWISSPROT SPTREMBL }; + } + # fugu + elsif ( $species eq 'rubripes' ) { + @priority_order = qw{ SWISSPROT SPTREMBL RefSeq LocusLink HUGO }; + } + # mouse + elsif ( $species eq 'musculus' ) { + @priority_order = qw{ MarkerSymbol SWISSPROT RefSeq LocusLink }; + } + # default list if species is not set + else { + $self->warn("Transcript::external_name - No species set. Using default DB order."); + @priority_order = qw{ HUGO SWISSPROT SPTREMBL RefSeq LocusLink }; + } + + # find a match (first one) for the db with the highest available priority + my $name = undef; + my $db = undef; + + # we would hope that each transcript has only a single DBLink per db but + # implement as a loop just in case, taking the first relevant record found + foreach my $curr_db ( @priority_order ) { + foreach my $dbl ( @{$dblinks} ) { + if ( $curr_db eq $dbl->dbname ) { + $name = $dbl->primary_id; + $db = $dbl->dbname; + last; + } + } + if ( defined $name ) { + last; + } + } + + if ( $required eq 'name' ) { + return $name; + } + elsif ( $required eq 'db' ) { + return $db; + } + else { + $self->warn("Transcript::_get_external_info - no xref data could be retrieved."); + return undef; + } + +} + + sub is_known { my $self = shift; if( defined $self->external_name() && $self->external_name() ne '' ) { @@ -1467,6 +1562,36 @@ sub species { +=head2 species + + Arg [1] : optional Bio::Species $species + Example : none + Description: You can set the species for this gene if you want to use species + specific behaviour. Otherwise species is retrieved from attached + database. + Returntype : Bio::Species + Exceptions : none + Caller : external_name, external_db, general for setting + +=cut + + +sub species { + my ( $self, $species ) = @_; + + if( defined $species ) { + $self->{species} = $species; + } else { + if( ! exists $self->{species} ) { + if( defined $self->adaptor() ) { + $self->{species} = $self->adaptor()->db->get_MetaContainer() + ->get_Species(); + } + } + } + + return $self->{species}; +} ##########################################################