From de54045dece241328bae10ee107922e667ffc716 Mon Sep 17 00:00:00 2001 From: Nathan Johnson <njohnson@ebi.ac.uk> Date: Wed, 9 Feb 2011 11:49:45 +0000 Subject: [PATCH] Added fetch_all_by_name with fuzzy matching --- .../Bio/EnsEMBL/DBSQL/OntologyTermAdaptor.pm | 158 ++++++++++++++++++ 1 file changed, 158 insertions(+) diff --git a/modules/Bio/EnsEMBL/DBSQL/OntologyTermAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/OntologyTermAdaptor.pm index 048dbc7ae3..c7f8e61f6e 100644 --- a/modules/Bio/EnsEMBL/DBSQL/OntologyTermAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/OntologyTermAdaptor.pm @@ -60,6 +60,164 @@ use Bio::EnsEMBL::OntologyTerm; use base qw( Bio::EnsEMBL::DBSQL::BaseAdaptor ); +=head2 new + + Arg [1] : Bio::EnsEMBL::DBSQL::DBAdaptor + Argument required for parent class + Bio::EnsEMBL::DBSQL::BaseAdaptor. + + Arg [2] : String + The particular ontology that this ontology adaptor + deals with. + + Caller : Bio::EnsEMBL::DBSQL::GOTermAdaptor + Bio::EnsEMBL::DBSQL::SOTermAdaptor + + Description : Creates an ontology term adaptor. + + Example : + + my $ot_adaptor = + Bio::EnsEMBL::DBSQL::OntologyTermAdaptor->new( $dba, 'GO' ); + + Return type : Bio::EnsEMBL::DBSQL::OntologyTermAdaptor + +=cut + +sub new { + my ( $proto, $dba, $ontology ) = @_; + + if ( !ref($dba) || !$dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor') ) { + throw('First argument needs to be a ' + . 'Bio::EnsEMBL::DBSQL::DBAdaptor object' ); + } + + my $this = $proto->SUPER::new($dba); + + $this->{'ontology'} = $ontology; + + return $this; +} + +=head2 ontology + + Arg : None + + Description : Returns the name of the ontology which this + adaptor is used to retrieve terms for. + + Example : + + my $ontology = $ot_adaptor->ontology(); + + Return type : String + +=cut + +sub ontology { + my ($this) = @_; + return $this->{'ontology'}; +} + +=head2 fetch_all_by_name + + Arg [1] : String - Name of term + + Arg [2] : int - Fuzzy match flag: + 1 = Try with spaces and underscores or none + 2 = Pre/Append wildcards + 3 = Both of the above + + Description : Fetches an ontology term(s) given a name. + + Example : + + my ($term) = @{$ot_adaptor->fetch_by_name('DNA_binding_site')}; + + Return type : ARRAYREF of Bio::EnsEMBL::OntologyTerm Objects + +=cut + +sub fetch_all_by_name { + my ( $this, $name, $fuzzy) = @_; + + #fetch_all because term-ontolgy does not have unique key + #And fuzzy may bring back >1 term + #Case insensitivity is implicit due to table character collection + my ($name_clause, $name_string); + + if(! $fuzzy){ + $name_clause = 'term.name = ?'; + $name_string = $name; + } + elsif($fuzzy == 2){ + $name_clause = ' term.name like ?'; + $name_string = "\%${name}\%"; + } + elsif($fuzzy < 4){ + + #$fuzzy == 3 + $name_clause = ' term.name rlike ?'; + + ($name_string = $name) =~ s/[\s_]+/\[ _\]*/g; + #no need for .* at flanks as this is the default rlike behaviour + + if($fuzzy == 1){ + $name_string = "^${name_string}\$"; + } + } + else{ + throw("Fuzzy match level can only be set to 1, 2 or 3 not $fuzzy"); + } + + + my $statement = q( +SELECT term.term_id, + term.accession, + term.name, + term.definition, + term.subsets, + ontology.namespace +FROM ontology, + term +WHERE ontology.name = ? + AND ontology.ontology_id = term.ontology_id + AND ).$name_clause; + + my $sth = $this->prepare($statement); + $sth->bind_param( 1, $this->{'ontology'}, SQL_VARCHAR ); + $sth->bind_param( 2, $name_string, SQL_VARCHAR ); + + $sth->execute(); + + my ( $dbid, $accession, $term_name, $definition, $subsets, $namespace ); + $sth->bind_columns( + \( $dbid, $accession, $term_name, $definition, $subsets, $namespace ) ); + + my @terms; + + + while($sth->fetch){ + + $subsets ||= ''; + push @terms, Bio::EnsEMBL::OntologyTerm->new + ( + '-dbid' => $dbid, + '-adaptor' => $this, + '-accession' => $accession, + '-namespace' => $namespace, + '-subsets' => [ split( /,/, $subsets ) ], + '-name' => $term_name, + '-definition' => $definition + ); + } + + $sth->finish(); + + return \@terms; +} ## end sub fetch_by_name + + =head2 fetch_by_accession Arg [1] : String -- GitLab