# # BioPerl module for Bio::EnsEMBL::DBSQL::BaseAdaptor # # Cared for by Ewan Birney <birney@ebi.ac.uk> # # Copyright Ewan Birney # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME Bio::EnsEMBL::DBSQL::BaseAdaptor - Base Adaptor for DBSQL adaptors =head1 SYNOPSIS # base adaptor provides # SQL prepare function $adaptor->prepare("sql statement"); # get of root DBAdaptor object $adaptor->db(); # constructor, ok for inheritence $adaptor = Bio::EnsEMBL::DBSQL::SubClassOfBaseAdaptor->new($dbobj) =head1 DESCRIPTION This is a true base class for Adaptors in the Ensembl DBSQL system. Original idea from Arne Adaptors are expected to have the following functions $obj = $adaptor->fetch_by_dbID($internal_id); which builds the object from the primary key of the object. This function is crucial because it allows adaptors to collaborate relatively independently of each other - in other words, we can change the schema under one adaptor without too many knock on changes through the other adaptors. Most adaptors will also have $dbid = $adaptor->store($obj); which stores the object. Currently the storing of an object also causes the objects to set $obj->dbID correctly and attach the adaptor. Other fetch functions go by the convention of @object_array = @{$adaptor->fetch_all_by_XXXX($arguments_for_XXXX)}; sometimes it returns an array ref denoted by the 'all' in the name of the method, sometimes an individual object. For example $gene = $gene_adaptor->fetch_by_stable_id($stable_id); or @fp = @{$simple_feature_adaptor->fetch_all_by_Slice($slice)}; Occassionally adaptors need to provide access to lists of ids. In this case the convention is to go list_XXXX, such as @gene_ids = @{$gene_adaptor->list_geneIds()}; (note: this method is poorly named) =head1 CONTACT Post questions to the EnsEMBL developer mailing list: <ensembl-dev@ebi.ac.uk> =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut package Bio::EnsEMBL::DBSQL::BaseAdaptor; use vars qw(@ISA); use strict; use Bio::EnsEMBL::Root; use Bio::EnsEMBL::Utils::Exception qw(throw); @ISA = qw(Bio::EnsEMBL::Root); =head2 new Arg [1] : Bio::EnsEMBL::DBSQL::DBConnection $dbobj Example : $adaptor = new AdaptorInheritedFromBaseAdaptor($dbobj); Description: Creates a new BaseAdaptor object. The intent is that this constructor would be called by an inherited superclass either automatically or through $self->SUPER::new in an overridden new method. Returntype : Bio::EnsEMBL::DBSQL::BaseAdaptor Exceptions : none Caller : Bio::EnsEMBL::DBSQL::DBConnection =cut sub new { my ($class,$dbobj) = @_; my $self = {}; bless $self,$class; if( !defined $dbobj || !ref $dbobj ) { throw("Don't have a db [$dbobj] for new adaptor"); } if($dbobj->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')){ $self->db($dbobj); $self->dbc($dbobj->dbc); } elsif( ref($dbobj) =~ /DBAdaptor$/){ $self->db($dbobj); $self->dbc($dbobj->dbc); } elsif( ref($dbobj) =~ /DBConnection$/){ $self->dbc($dbobj); } else{ throw("Don't have a DBAdaptor [$dbobj] for new adaptor"); } return $self; } =head2 prepare Arg [1] : string $string a SQL query to be prepared by this adaptors database Example : $sth = $adaptor->prepare("select yadda from blabla") Description: provides a DBI statement handle from the adaptor. A convenience function so you dont have to write $adaptor->db->prepare all the time Returntype : DBI::StatementHandle Exceptions : none Caller : Adaptors inherited from BaseAdaptor =cut sub prepare{ my ($self,$string) = @_; return $self->dbc->prepare($string); } =head2 db Arg [1] : (optional) Bio::EnsEMBL::DBSQL::DBConnection $obj the database this adaptor is using. Example : $db = $adaptor->db(); Description: Getter/Setter for the DatabaseConnection that this adaptor is using. Returntype : Bio::EnsEMBL::DBSQL::DBConnection Exceptions : none Caller : Adaptors inherited fro BaseAdaptor =cut sub db{ my $self = shift; $self->{'db'} = shift if(@_); return $self->{'db'}; } sub dbc{ my $self = shift; $self->{'dbc'} = shift if(@_); return $self->{'dbc'}; } # list primary keys for a particular table # args are table name and primary key field # if primary key field is not supplied, tablename_id is assumed # returns listref of IDs sub _list_dbIDs { my ($self, $table, $pk) = @_; if (!defined($pk)) { $pk = $table . "_id"; } my @out; my $sql = "SELECT " . $pk . " FROM " . $table; my $sth = $self->prepare($sql); $sth->execute; while (my ($id) = $sth->fetchrow) { push(@out, $id); } $sth->finish; return \@out; } # _straight_join # Arg [1] : (optional) boolean $new_val # Example : $self->_straight_join(1); # $self->generic_fetch($constraint); # $self->_straight_join(0); # Description: PROTECTED Getter/Setter that turns on/off the use of # a straight join in queries. # Returntype : boolean # Exceptions : none # Caller : general sub _straight_join { my $self = shift; if(@_) { $self->{'_straight_join'} = shift; } return $self->{'_straight_join'}; } =head2 generic_fetch Arg [1] : (optional) string $constraint An SQL query constraint (i.e. part of the WHERE clause) Arg [2] : (optional) Bio::EnsEMBL::AssemblyMapper $mapper A mapper object used to remap features as they are retrieved from the database Arg [3] : (optional) Bio::EnsEMBL::Slice $slice A slice that features should be remapped to Example : $fts = $a->generic_fetch('contig_id in (1234, 1235)', 'Swall'); Description: Performs a database fetch and returns feature objects in contig coordinates. Returntype : listref of Bio::EnsEMBL::SeqFeature in contig coordinates Exceptions : none Caller : BaseFeatureAdaptor, ProxyDnaAlignFeatureAdaptor::generic_fetch =cut sub generic_fetch { my ($self, $constraint, $mapper, $slice) = @_; my @tabs = $self->_tables; my $columns = join(', ', $self->_columns()); my $db = $self->db(); # # Construct a left join statement if one was defined, and remove the # left-joined table from the table list # my @left_join_list = $self->_left_join(); my $left_join = ''; my @tables; if(@left_join_list) { my %left_join_hash = map { $_->[0] => $_->[1] } @left_join_list; while(my $t = shift @tabs) { if( exists $left_join_hash{ $t->[0] } ) { my $condition = $left_join_hash{ $t->[0] }; my $syn = $t->[1]; $left_join .= "LEFT JOIN\n ".$t->[0]." $syn ON $condition "; } else { push @tables, $t; } } } else { @tables = @tabs; } my $straight_join = ''; if($self->_straight_join()) { $straight_join = "STRAIGHT_JOIN"; } #construct a nice table string like 'table1 t1, table2 t2' my $tablenames = join(', ', map({ join(' ', @$_) } @tables)); my $sql = "SELECT $straight_join $columns\n FROM $tablenames $left_join"; my $default_where = $self->_default_where_clause; my $final_clause = $self->_final_clause; #append a where clause if it was defined if($constraint) { $sql .= "\n WHERE $constraint "; if($default_where) { $sql .= " AND\n $default_where "; } } elsif($default_where) { $sql .= "\n WHERE $default_where "; } #append additional clauses which may have been defined $sql .= "\n$final_clause"; my $sth = $db->dbc->prepare($sql); $sth->execute; my $res = $self->_objs_from_sth($sth, $mapper, $slice); $sth->finish(); return $res; } =head2 fetch_by_dbID Arg [1] : int $id The unique database identifier for the feature to be obtained Example : $feat = $adaptor->fetch_by_dbID(1234)); $feat = $feat->transform('contig'); Description: Returns the feature created from the database defined by the the id $id. The feature will be returned in its native coordinate system. That is, the coordinate system in which it is stored in the database. In order to convert it to a particular coordinate system use the transfer() or transform() method. If the feature is not found in the database then undef is returned instead Returntype : Bio::EnsEMBL::Feature or undef Exceptions : thrown if $id arg is not provided does not exist Caller : general =cut sub fetch_by_dbID{ my ($self,$id) = @_; throw("id argument is required") if(!defined $id); #construct a constraint like 't1.table1_id = 123' my @tabs = $self->_tables; my ($name, $syn) = @{$tabs[0]}; my $constraint = "${syn}.${name}_id = $id"; #Should only be one my ($feat) = @{$self->generic_fetch($constraint)}; return undef if(!$feat); return $feat; } =head2 fetch_all_by_dbID_list Arg [1] : listref of ints $id_list The unique database identifiers for the features to be obtained Example : @feats = @{$adaptor->fetch_by_dbID_list([1234, 2131, 982]))}; Description: Returns the features created from the database defined by the the ids in contained in the id list $id_list. The features will be returned in their native coordinate system. That is, the coordinate system in which they are stored in the database. In order to convert the features to a particular coordinate system use the transfer() or transform() method. If none of the features are found in the database a reference to an empty list is returned. Returntype : listref of Bio::EnsEMBL::Features Exceptions : thrown if $id arg is not provided does not exist Caller : general =cut sub fetch_all_by_dbID_list { my ($self,$id_list_ref) = @_; if(!defined($id_list_ref) || ref($id_list_ref) ne 'ARRAY') { throw("id_list list reference argument is required"); } return [] if(!@$id_list_ref); my @out; #construct a constraint like 't1.table1_id = 123' my @tabs = $self->_tables; my ($name, $syn) = @{$tabs[0]}; # mysql is faster and we ensure that we do not exceed the max query size by # splitting large queries into smaller queries of 200 ids my $max_size = 200; my @id_list = @$id_list_ref; while(@id_list) { my @ids; if(@id_list > $max_size) { @ids = splice(@id_list, 0, $max_size); } else { @ids = splice(@id_list, 0); } my $id_str; if(@ids > 1) { $id_str = " IN (" . join(',', @ids). ")"; } else { $id_str = " = " . $ids[0]; } my $constraint = "${syn}.${name}_id $id_str"; push @out, @{$self->generic_fetch($constraint)}; } return \@out; } # might not be a good idea, but for convenience # shouldnt be called on the BIG tables though sub fetch_all { my $self = shift; return $self->generic_fetch(); } #_tables # # Args : none # Example : $tablename = $self->_table_name() # Description: ABSTRACT PROTECTED Subclasses are responsible for implementing # this method. It should list of [tablename, alias] pairs. # Additionally the primary table (with the dbID, analysis_id, and # score) should be the first table in the list. # e.g: # ( ['repeat_feature', 'rf'], # ['repeat_consensus', 'rc']); # used to obtain features. # Returntype : list of [tablename, alias] pairs # Exceptions : thrown if not implemented by subclass # Caller : BaseFeatureAdaptor::generic_fetch # sub _tables { my $self = shift; throw("abstract method _tables not defined by implementing" . " subclass of BaseFeatureAdaptor"); return undef; } #_columns # # Args : none # Example : $tablename = $self->_columns() # Description: ABSTRACT PROTECTED Subclasses are responsible for implementing # this method. It should return a list of columns to be used # for feature creation # Returntype : list of strings # Exceptions : thrown if not implemented by subclass # Caller : BaseFeatureAdaptor::generic_fetch # sub _columns { my $self = shift; throw("abstract method _columns not defined by implementing" . " subclass of BaseFeatureAdaptor"); } # _default_where_clause # # Arg [1] : none # Example : none # Description: May be overridden to provide an additional where constraint to # the SQL query which is generated to fetch feature records. # This constraint is always appended to the end of the generated # where clause # Returntype : string # Exceptions : none # Caller : generic_fetch # sub _default_where_clause { my $self = shift; return ''; } # _left_join # Arg [1] : none # Example : none # Description: Can be overridden by a subclass to specify any left joins # which should occur. The table name specigfied in the join # must still be present in the return values of # Returntype : a {'tablename' => 'join condition'} pair # Exceptions : none # Caller : general # sub _left_join { my $self = shift; return (); } #_final_clause # Arg [1] : none # Example : none # Description: May be overriden to provide an additional clause to the end # of the SQL query used to fetch feature records. # This is useful to add a required ORDER BY clause to the # query for example. # Returntype : string # Exceptions : none # Caller : generic_fetch sub _final_clause { my $self = shift; return ''; } #_objs_from_sth # Arg [1] : DBI::row_hashref $hashref containing key-value pairs # for each of the columns specified by the _columns method # Example : my @feats = $self->_obj_from_hashref # Description: ABSTRACT PROTECTED The subclass is responsible for implementing # this method. It should take in a DBI row hash reference and # return a list of created features in contig coordinates. # Returntype : list of Bio::EnsEMBL::*Features in contig coordinates # Exceptions : thrown if not implemented by subclass # Caller : BaseFeatureAdaptor::generic_fetch sub _objs_from_sth { my $self = shift; throw("abstract method _obj_from_sth not defined by implementing" . " subclass of BaseFeatureAdaptor"); } 1;