From ab0dc4e1ad0f6d4e5bce9ea9048e11fa47b06701 Mon Sep 17 00:00:00 2001 From: Ewan Birney <birney@sanger.ac.uk> Date: Fri, 11 Jan 2002 12:17:08 +0000 Subject: [PATCH] removed final crufty bits from DBSQL. Should be cruft-free down there! --- .../Bio/EnsEMBL/DBSQL/CrossMatchDBAdaptor.pm | 283 -- modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm | 85 +- modules/Bio/EnsEMBL/DBSQL/ExternalWrapper.pm | 306 --- modules/Bio/EnsEMBL/DBSQL/Feature_Obj.pm | 849 ------ .../Bio/EnsEMBL/DBSQL/Feature_ObjForeign.pm | 852 ------ modules/Bio/EnsEMBL/DBSQL/ObjForeign.pm | 146 - modules/Bio/EnsEMBL/DBSQL/RawContig.pm | 2411 ----------------- .../DBSQL/SymmetricContigFeatureContainer.pm | 147 - modules/Bio/EnsEMBL/RawContig.pm | 3 + 9 files changed, 46 insertions(+), 5036 deletions(-) delete mode 100755 modules/Bio/EnsEMBL/DBSQL/CrossMatchDBAdaptor.pm delete mode 100644 modules/Bio/EnsEMBL/DBSQL/ExternalWrapper.pm delete mode 100755 modules/Bio/EnsEMBL/DBSQL/Feature_Obj.pm delete mode 100644 modules/Bio/EnsEMBL/DBSQL/Feature_ObjForeign.pm delete mode 100644 modules/Bio/EnsEMBL/DBSQL/ObjForeign.pm delete mode 100755 modules/Bio/EnsEMBL/DBSQL/RawContig.pm delete mode 100755 modules/Bio/EnsEMBL/DBSQL/SymmetricContigFeatureContainer.pm diff --git a/modules/Bio/EnsEMBL/DBSQL/CrossMatchDBAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/CrossMatchDBAdaptor.pm deleted file mode 100755 index d3939b49c8..0000000000 --- a/modules/Bio/EnsEMBL/DBSQL/CrossMatchDBAdaptor.pm +++ /dev/null @@ -1,283 +0,0 @@ - -# -# Ensembl module for Bio::EnsEMBL::DBSQL::CrossMatchDBAdaptor -# -# Cared for by Ewan Birney <birney@ebi.ac.uk> -# -# Copyright GRL and EBI -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::EnsEMBL::DBSQL::CrossMatchDBAdaptor - DESCRIPTION of Object - -=head1 SYNOPSIS - -my $db=Bio::EnsEMBL::DBSQL::Obj->new(-dbname=>'july_dna',-host=>'ecs1c',-user=>'ensadmin'); - -my $cross=Bio::EnsEMBL::DBSQL::CrossMatchDBAdaptor->new(-dbname=>'crossmatch',-host=>'ecs1c',-user=>'ensadmin'); - -$db->_crossdb($cross); - -=head1 DESCRIPTION - -This Object is a database adapter for the crossmatch database, it loads the old and new databases, which are held in _new_db and _old_db. It also gets a SymmetricFeatureContainer object, where the methods for getting and returning crossmatches are. - -The crossdb can then be added to a standard db in its _crossdb method. - -=head1 CONTACT - -Ensembl - ensembl-dev@ebi.ac.uk - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - - -package Bio::EnsEMBL::DBSQL::CrossMatchDBAdaptor; -use vars qw(@ISA); -use strict; - -# Object preamble - inheriets from Bio::Root::RootI -use DBI; -use Bio::Root::RootI; -use Bio::EnsEMBL::DBSQL::SymmetricContigFeatureContainer; -use Bio::EnsEMBL::DBLoader; - -@ISA = qw(Bio::Root::RootI); - -sub new { - my($class,@args) = @_; - - my $self = {}; - bless $self,$class; - - my ($db,$host,$driver,$user,$password,$port) = - $self->_rearrange([qw(DBNAME - HOST - DRIVER - USER - PASS - PORT - )],@args); - $db || $self->throw("Database object must have a database name"); - $user || $self->throw("Database object must have a user"); - - if( ! $driver ) { - $driver = 'mysql'; - } - - if( ! $host ) { - $host = 'localhost'; - } - my $dsn = "DBI:$driver:database=$db;host=$host"; - my $dbh = DBI->connect("$dsn","$user",$password, {RaiseError => 1}); - - $self->_db_handle($dbh); - - return $self; -} - - -=head2 new_dbobj - - Title : new_dbobj - Usage : $crossdb->new_dbobj - Function: reads the dblocation table to load the new db - Example : $crossdb->new_dbobj() - Returns : Bio::EnsEMBL::DBSQL::Obj - Args : none - - -=cut - -sub new_dbobj{ - my ($self) = @_; - - my $t = $self->_new_dbobj; - if( defined $t ) { return $t; } - - # yank it out ;) - - my $sth = $self->prepare("select newdatabase from dblocation"); - $sth->execute; - my ($loc) = $sth->fetchrow_array; - - #print STDERR "New database locator: $loc\n"; - my $db = Bio::EnsEMBL::DBLoader->new($loc); - - $self->_new_dbobj($db); - $db->_crossdb($self); - return $db; -} - - -=head2 old_dbobj - - Title : old_dbobj - Usage : $crossdb->old_dbobj - Function: reads the dblocation table to load the old db - Example : $crossdb->old_dbobj() - Returns : Bio::EnsEMBL::DBSQL::Obj - Args : none - - -=cut - -sub old_dbobj{ - my ($self) = @_; - - my $t = $self->_old_dbobj; - if( defined $t ) { return $t; } - - # yank it out ;) - my $sth = $self->prepare("select olddatabase from dblocation"); - $sth->execute; - my ($loc) = $sth->fetchrow_array; - #print STDERR "Old database locator: $loc\n"; - my $db = Bio::EnsEMBL::DBLoader->new($loc); - - $self->_old_dbobj($db); - - return $db; -} - -=head2 get_SymmetricContigFeatureContainer - - Title : get_SymmetricContigFeatureContainer - Usage : $crossdb->get_SymmetricContigFeatureContainer - Function: Gets a Bio::EnsEMBL::DBSQL::SymmetricContigFeatureContainer - Example : $crossdb->get_SymmetricContigFeatureContainer - Returns : Bio::EnsEMBL::DBSQL::SymmetricContigFeatureContainer - Args : none - - -=cut - -sub get_SymmetricContigFeatureContainer{ - my ($self) = @_; - - return Bio::EnsEMBL::DBSQL::SymmetricContigFeatureContainer->new($self); -} - -=head2 get_clonelist - - Title : get_clonelist - Usage : $crossdb->get_clonelist - Function: Reads the clonelist table, to return list of clones with different - versions between old and new - Example : $crossdb->get_clonelist - Returns : array of strings - Args : none - -=cut - -sub get_clonelist{ - my ($self) = @_; - - my @clones; - my $sth=$self->prepare("select clone from clonelist"); - $sth->execute; - while (my $clone = $sth->fetchrow_array()) { - push @clones,$clone; - } - return @clones; -} - -=head2 _new_dbobj - - Title : _new_dbobj - Usage : $obj->_new_dbobj($newval) - Function: get/set for the new db adapter - Returns : value of _new_dbobj - Args : newvalue (optional) - - -=cut - -sub _new_dbobj{ - my $obj = shift; - if( @_ ) { - my $value = shift; - $obj->{'_new_dbobj'} = $value; - } - return $obj->{'_new_dbobj'}; - -} - -=head2 _old_dbobj - - Title : _old_dbobj - Usage : $obj->_old_dbobj($newval) - Function: get/set for the old db adapter - Returns : value of _old_dbobj - Args : newvalue (optional) - - -=cut - -sub _old_dbobj{ - my $obj = shift; - if( @_ ) { - my $value = shift; - $obj->{'_old_dbobj'} = $value; - } - return $obj->{'_old_dbobj'}; - -} - -=head2 prepare - - Title : prepare - Usage : $sth = $dbobj->prepare($statement); - Function: prepares a SQL statement on the DBI handle - Example :$sth = $dbobj->prepare("select seq_start,seq_end from feature where analysis = \'example\' "); - Returns : A DBI statement handle object - Args : a SQL string - -=cut - -sub prepare { - my ($self,$string) = @_; - - if( ! $string ) { - $self->throw("Attempting to prepare an empty SQL query!"); - } - if( !defined $self->_db_handle ) { - $self->throw("Database object has lost its database handle! getting otta here!"); - } - - # should we try to verify the string? - - return $self->_db_handle->prepare($string); -} - -=head2 _db_handle - - Title : _db_handle - Usage : $obj->_db_handle($newval) - Function: get/set for the db handle - Example : $obj->_db_handle($newval) - Returns : value of _db_handle - Args : newvalue (optional) - - -=cut - -sub _db_handle{ - my ($self,$value) = @_; - if( defined $value) { - $self->{'_db_handle'} = $value; - } - return $self->{'_db_handle'}; - -} - diff --git a/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm index 4dcce3d6e4..55c99422d5 100755 --- a/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm @@ -319,48 +319,6 @@ sub password { } -=head2 get_Feature_Obj - - Title : get_Feature_Obj - Usage : - Function: - Example : - Returns : - Args : - -=cut - -sub get_Feature_Obj { - my( $self ) = @_; - - my( $feature_obj ); - unless ($feature_obj = $self->{'_feature_obj'}) { - require Bio::EnsEMBL::DBSQL::Feature_Obj; - $feature_obj = Bio::EnsEMBL::DBSQL::Feature_Obj->new($self); - $self->{'_feature_obj'} = $feature_obj; - } - - return $feature_obj; -} - -=head2 feature_Obj - - Title : feature_Obj - Usage : my $featureobj = $db->feature_Obj - Function: Returns the feature object database handle - Example : - Returns : Bio::EnsEMBL::DB::Feature_ObjI - Args : - -=cut - -sub feature_Obj { - my $self = shift; - - #$self->warn("feature_Obj is deprecated: using get_Feature_Obj instead!"); - return $self->get_Feature_Obj(@_); -} - =head2 get_MetaContainer @@ -2989,4 +2947,47 @@ sub remove_ExternalAdaptor { } +=head1 Old Functions + +Functions which are completely deprecated + +=cut + + +=head2 get_Feature_Obj + + Title : get_Feature_Obj + Usage : + Function: + Example : + Returns : + Args : + +=cut + +sub get_Feature_Obj { + my( $self ) = @_; + + $self->throw("No more Feature Objs!"); +} + +=head2 feature_Obj + + Title : feature_Obj + Usage : my $featureobj = $db->feature_Obj + Function: Returns the feature object database handle + Example : + Returns : Bio::EnsEMBL::DB::Feature_ObjI + Args : + +=cut + +sub feature_Obj { + my $self = shift; + $self->throw("No more Feature Objs!"); + +} + + + 1; diff --git a/modules/Bio/EnsEMBL/DBSQL/ExternalWrapper.pm b/modules/Bio/EnsEMBL/DBSQL/ExternalWrapper.pm deleted file mode 100644 index 49203ad201..0000000000 --- a/modules/Bio/EnsEMBL/DBSQL/ExternalWrapper.pm +++ /dev/null @@ -1,306 +0,0 @@ - -# -# Ensembl module for Bio::EnsEMBL::DBSQL::ExternalWrapper -# -# Cared for by Ewan Birney <birney@ebi.ac.uk> -# -# Copyright GRL and EBI -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::EnsEMBL::DBSQL::ExternalWrapper - Makes a standard Ensembl database a ExternalFeatureFactoryI implementing object - -=head1 SYNOPSIS - - # check out DB::ExternalFeatureFactoryI - -=head1 DESCRIPTION - -This class wraps a standard Ensembl database as if it is an -ExternalFeatureFactory interface, allowing it to serve up Genes and -(assumming someone gets to write this as well) features. - -The idea here is that this database will contain different data (eg, -data on EMBL CDS from the original entry) which is updated at a different -cycle from the main stuff. - -=head1 CONTACT - -Ensembl - ensembl-dev@ebi.ac.uk - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - - -package Bio::EnsEMBL::DBSQL::ExternalWrapper; -use vars qw(@ISA); -use strict; - -# Object preamble - inheriets from Bio::Root::RootI -use Bio::EnsEMBL::Gene; -use Bio::Root::RootI; -use Bio::EnsEMBL::DB::ExternalFeatureFactoryI; - -@ISA = qw(Bio::EnsEMBL::DB::ExternalFeatureFactoryI Bio::Root::RootI); - -sub new { - my($class,$dbobj) = @_; - - - my $self = {}; - bless $self,$class; - - if( !defined $dbobj ) { - $self->throw("No dbobj or not a dbobj [$dbobj]"); - } - - $self->dbobj($dbobj); - - return $self; -} - - -=head2 get_Ensembl_Genes_clone - - Title : get_Ensembl_Genes_clone - Usage : - Function: - Example : - Returns : - Args : - - -=cut - -sub get_Ensembl_Genes_clone{ - my ($self,$cloneid) = @_; - my $clone; - - my $dbobj = $self->dbobj; - - #print STDERR "Got dbobj $dbobj connected to ",$dbobj->dbname,"\n"; - - eval { - $clone = $self->dbobj->get_Clone($cloneid); - }; - - if( $@ ) { - # return nothing - return (); - } - - my @genes=$clone->get_all_Genes(); - #foreach my $gene ( @genes ) { - # print STDERR "got ",$gene->id,"\n"; - #} - - return $clone->get_all_Genes(); -} - -=head2 get_Ensembl_Genes_contig_list - - Title : get_Ensembl_Genes_contig_list - Usage : - Function: - Example : - Returns : - Args : - - -=cut - -sub get_Ensembl_Genes_contig_list{ - my ($self,@contigs) = @_; - - if( scalar @contigs == 0 ) { - return (); - } - #Hash of contigs by gene id - my %gc; - - #Hash of array of gene objects by contig - my %cg; - my $list; - my @genes; - my @todocontigs; - if (my $cgr = $self->cg) { - #Get them from the cache - #print STDERR "Getting external genes from the cache\n"; - my %cgh = %$cgr; - foreach my $c (@contigs) { - if ($cgh{$c}) { - foreach my $g (@{$cgh{$c}}) { - $g->refresh(); - } - push (@genes,@{$cgh{$c}}); - } - } - return @genes; - } - else { - foreach my $c (@contigs) { - $cg{$c}=[]; - } - push (@todocontigs,@contigs); - } - - if( scalar @todocontigs == 0 ) { - return @genes; - } - else { - foreach my $c ( @todocontigs ) { - $list .= "'$c',"; - } - chop $list; - $list = "($list)"; - - my $sth = $self->dbobj->prepare(" - SELECT t.gene_id,c.id - FROM transcript t,exon_transcript et,exon e, - contig c - WHERE c.id in $list - AND c.internal_id = e.contig_id - AND e.exon_id = et.exon_id - AND t.transcript_id = et.transcript_id"); - - $sth->execute(); - my @geneids; - - while( my ($id,$contig) = $sth->fetchrow_array ) { - if (!exists $gc{$id}) { - $gc{$id}=$contig; - } - } - - push(@geneids,keys(%gc)); - - if( scalar(@geneids) == 0 ) { - #print STDERR "No ids here...\n"; - return(); - } - - #print STDERR "Getting external genes normally\n"; - my $ga = $self->dbobj->get_GeneAdaptor(); - for my $gene_id ( @geneids ) { - my $gene = $ga->fetch_by_dbID( $gene_id ); - push( @genes, $gene ); - } - - foreach my $gene (@genes) { - if (my $contig = $gc{$gene->dbID}) { - push(@{$cg{$contig}},$gene); - } - } - $self->cg(\%cg); - - # print STDERR "Returning ".scalar(@genes)." $genes[0] genes...\n"; - return @genes; - } -} - -=head2 cg - - Title : cg - Usage : $obj->cg($newval) - Function: Getset for cg value - Returns : value of cg - Args : newvalue (optional) - - -=cut - -sub cg{ - my $obj = shift; - if( @_ ) { - my $value = shift; - $obj->{'cg'} = $value; - } - return $obj->{'cg'}; - -} - -=head2 get_Ensembl_SeqFeatures_contig - - Title : get_Ensembl_SeqFeatures_contig - Usage : - Function: - Example : - Returns : - Args : - - -=cut - -sub get_Ensembl_SeqFeatures_contig{ - my ($self,$contigid) = @_; - return; -} - - - -=head2 get_Ensembl_SeqFeatures_clone - - Title : get_Ensembl_SeqFeatures_clone - Usage : - Function: - Example : - Returns : - Args : - - -=cut - - - - -sub get_Ensembl_SeqFeatures_clone{ - my ($self,$contigid) = @_; - return; -} - - - - - - - - - - -=head2 dbobj - - Title : dbobj - Usage : $obj->dbobj($newval) - Function: - Returns : value of dbobj - Args : newvalue (optional) - - -=cut - -sub dbobj{ - my $obj = shift; - if( @_ ) { - my $value = shift; - $obj->{'dbobj'} = $value; - } - return $obj->{'dbobj'}; - -} - - - - - - - diff --git a/modules/Bio/EnsEMBL/DBSQL/Feature_Obj.pm b/modules/Bio/EnsEMBL/DBSQL/Feature_Obj.pm deleted file mode 100755 index cd25327e64..0000000000 --- a/modules/Bio/EnsEMBL/DBSQL/Feature_Obj.pm +++ /dev/null @@ -1,849 +0,0 @@ -# -# EnsEMBL module for Bio::EnsEMBL::DBSQL::Feature_Obj -# -# Cared for by Elia Stupka <elia@ebi.ac.uk> -# -# Copyright Elia Stupka -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::EnsEMBL::DBSQL::Feature_Obj - MySQL database adapter class for EnsEMBL Feature Objects - -=head1 SYNOPSIS - - use Bio::EnsEMBL::DBSQL::Obj; - use Bio::EnsEMBL::DBSQL::Feature_Obj; - - $db = new Bio::EnsEMBL::DBSQL::Obj( -user => 'root', -db => 'pog' , -host => 'caldy' , -driver => 'mysql' ); - my $feature_obj=Bio::EnsEMBL::Feature_Obj->new($obj); - - #Check if a feature exists - $feature_obj->exists(); - -=head1 DESCRIPTION - -This is one of the objects contained in Bio:EnsEMBL::DBSQL::Obj, dealing with -feature objects. - -The Obj object represents a database that is implemented somehow (you shouldn\'t -care much as long as you can get the object). - -=head1 CONTACT - -Elia Stupka: elia@ebi.ac.uk - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. Internal methods are -usually preceded with a _ - -=cut - - -# Let the code begin... - -package Bio::EnsEMBL::DBSQL::Feature_Obj; - -use vars qw(@ISA); -use strict; - -# Object preamble - inheriets from Bio::Root::Object - -use Bio::Root::RootI; -#use Bio::EnsEMBL::DBSQL::Obj; -use Bio::EnsEMBL::Gene; -use Bio::EnsEMBL::Exon; -use Bio::EnsEMBL::Transcript; -use Bio::EnsEMBL::FeatureFactory; -use DBI; -use Bio::EnsEMBL::DBSQL::Utils; -use Bio::EnsEMBL::DBSQL::DummyStatement; - - - -@ISA = qw(Bio::Root::RootI); - -# new() is inherited from Bio::Root::Object - -# _initialize is where the heavy stuff will happen when new is called - -sub new { - my($class,$db_obj) = @_; - my $self = {}; - bless $self,$class; - - $db_obj || $self->throw("Database Gene object must be passed a db obj!"); - $self->_db_obj($db_obj); - - return $self; # success - we hope! -} - -=head2 delete - - Title : delete - Usage : - Function: deletes all features from a contig; - Example : - Returns : - Args : - - -=cut - -sub delete { - my ($self,$contig) = @_; - - if (ref( $contig) && $contig->isa("Bio::EnsEMBL::DB::ContigI")) { - $self->throw("You have to give a contig id, not a contig object!"); - } - - my $sth = $self->_db_obj->prepare("select fs.feature,fs.fset " . - "from fset_feature as fs, " . - " feature as f " . - "where fs.feature = f.id " . - "and f.contig = '$contig'"); - - my $res = $sth->execute || $self->warn("Could not find features for contig $contig"); - - my %fset; - - while (my $rowhash = $sth->fetchrow_hashref) { - $fset{$rowhash->{fset}} = 1; - } - - my @fset = keys %fset; - - if ($#fset >= 0) { - my $fsstr = ""; - - foreach my $fs (@fset) { - $fsstr .= $fs . ","; - } - - chop($fsstr); - - - - $sth = $self->_db_obj->prepare("delete from fset where id in ($fsstr)"); - $res = $sth->execute; - - $sth = $self->_db_obj->prepare("delete from fset_feature where fset in ($fsstr)"); - $res = $sth->execute; - } - - #print(STDERR "Deleting features for contig $contig\n"); - - $sth = $self->_db_obj->prepare("delete from feature where contig = '$contig'"); - $res = $sth->execute; - - #print(STDERR "Deleting repeat features for contig $contig\n"); - - $sth = $self->_db_obj->prepare("delete from repeat_feature where contig = '$contig'"); - $res = $sth->execute; - -} - -=head2 write - - Title : write - Usage : $obj->write($contig,@features) - Function: Writes a feature on the genomic sequence of a contig into the database - Example : - Returns : nothing - Args : Bio::EnsEMBL::SeqFeatureI - - -=cut - -sub write { - my ($self,$contig,@features) = @_; - - # - # Yes - we need to rethink how we are writing features into the - # database. This is a little obtuse and clunky now - # - - $self->throw("$contig is not a Bio::EnsEMBL::DB::ContigI") - unless (defined($contig) && $contig->isa("Bio::EnsEMBL::DB::ContigI")); - - my $contigid = $contig->id; - my $analysis; - - - # Put the repeats in a different table, and also things we need to write - # as fsets. - my @repeats; - my @fset; - - FEATURE : - foreach my $feature ( @features ) { - - if( ! $feature->isa('Bio::EnsEMBL::SeqFeatureI') ) { - $self->throw("Feature $feature is not a feature!"); - } - - eval { - $feature->validate(); - }; - - if ($@) { - - next FEATURE; - } - - if($feature->isa('Bio::EnsEMBL::RepeatI')) { - push(@repeats,$feature); - } elsif ( $feature->sub_SeqFeature ) { - push(@fset,$feature); - } else { - if (!defined($feature->analysis)) { - $self->throw("Feature " . $feature->seqname . " " . $feature->source_tag ." doesn't have analysis. Can't write to database"); - } else { - $analysis = $feature->analysis; - } - - - my $analysisid = $self->write_Analysis($analysis); - - if ( $feature->isa('Bio::EnsEMBL::FeaturePair') ) { - my $homol = $feature->feature2; -#scp - hack to make p_value look like a float - $feature->p_value(&exponent($feature->p_value)); - my $sth = $self->_db_obj->prepare( - "insert into feature(id,contig,seq_start,seq_end,score,strand,name,analysis,hstart,hend,hid,perc_id,evalue,phase,end_phase) ". - "values ('NULL'," - .$contig->internal_id ."," - .$feature->start ."," - .$feature->end ."," - .$feature->score ."," - .$feature->strand ."," - ."'".$feature->source_tag."'," - .$analysisid ."," - .$homol->start ."," - .$homol->end ."," - ."'".$homol->seqname ."'," - .((defined $feature->percent_id) ? $feature->percent_id : 'NULL') ."," - .((defined $feature->p_value) ? ("\'".$feature->p_value."\'") : 'NULL') ."," - .((defined $feature->phase) ? $feature->phase : 'NULL') ."," - .((defined $feature->end_phase) ? $feature->end_phase : 'NULL') .")"); - - $sth->execute(); - - - } else { - my $sth = $self->_db_obj->prepare( "insert into feature(id,contig,seq_start,seq_end,score,strand,name,analysis,hstart,hend,hid,perc_id,evalue,phase,end_phase) ". - "values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"); - - $sth->execute('NULL', - $contig->internal_id, - $feature->start, - $feature->end, - $feature->score, - $feature->strand, - $feature->source_tag, - $analysisid, - -1, - -1, - "__NONE__", - 'NULL', - 'NULL', - 'NULL', - 'NULL' ); - } - } - } - - my $sth2 = $self->_db_obj->prepare("insert into repeat_feature(id,contig,seq_start,seq_end,score,strand,analysis,hstart,hend,hid) values(?,?,?,?,?,?,?,?,?,?)"); - - foreach my $feature (@repeats) { - if (!defined($feature->analysis)) { - $self->throw("Feature " . $feature->seqname . " " . $feature->source_tag ." doesn't have analysis. Can't write to database"); - } else { - $analysis = $feature->analysis; - } - - - my $analysisid = $self->write_Analysis($analysis); - my $homol = $feature->feature2; - - $sth2->execute('NULL', - $contig->internal_id, - $feature->start, - $feature->end, - $feature->score, - $feature->strand, - $analysisid, - $homol->start, - $homol->end, - $homol->seqname); - } - - - # Now the predictions - # we can't block do these as we need to get out the id wrt to the features - foreach my $feature ( @fset ) { -# print STDERR "Adding in a fset feature ",$feature->gff_string,"\n"; - - if (!defined($feature->analysis)) { - - $self->throw("Feature " . $feature->seqname . " " . - $feature->source_tag . - " doesn't have analysis. Can't write to database"); - } else { - $analysis = $feature->analysis; - } - - my $analysisid = $self->write_Analysis($analysis); - my $score = $feature->score(); - - if( !defined $score ) { $score = "-1000"; } - - my $sth3 = $self->_db_obj->prepare("insert into fset(id,score) values ('NULL',$score)"); - $sth3->execute(); - - # get out this id. This looks really clunk I know. Any better ideas... ? - - my $sth4 = $self->_db_obj->prepare("select LAST_INSERT_ID()"); - $sth4->execute(); - - my $arr = $sth4->fetchrow_arrayref(); - my $fset_id = $arr->[0]; - - # now write each sub feature - my $rank = 1; - - foreach my $sub ( $feature->sub_SeqFeature ) { -#scp - hack to make p_value look like a float - $feature->p_value(&exponent($feature->p_value)); - my $sth5 = $self->_db_obj->prepare("insert into feature " - ."(id,contig,seq_start,seq_end,score,strand,analysis,name,hstart,hend,hid,evalue,perc_id,phase,end_phase) " - ."values('NULL','" - .$contig->internal_id ."'," - .$sub->start ."," - .$sub->end . "," - .$sub->score . "," - .$sub->strand . "," - .$analysisid . "," - ."'".$sub->source_tag ."'," - ."-1,-1," - ."'".($sub->primary_tag || "__NONE__")."'," - . ((defined $sub->p_value) ? "\'".$sub->p_value."\'" : 'NULL') ."," - . ((defined $sub->percent_id) ? $sub->percent_id : 'NULL') ."," - . ((defined $sub->phase) ? $sub->phase : 'NULL') ."," - . ((defined $sub->end_phase) ? $sub->end_phase : 'NULL') .")"); - - $sth5->execute(); - my $sth6 = $self->_db_obj->prepare("insert into fset_feature(fset,feature,rank) values ($fset_id,LAST_INSERT_ID(),$rank)"); - $sth6->execute(); - $rank++; - } - } - - return 1; -} - -=head2 get_Protein_annseq - - Title : get_Protein_annseq - Usage : get_Protein_annseq ($ENSP); - Function: Creates an annseq object for a particular peptide, storing the peptide - sequence in $annseq->primary_seq, and adding all the protein features as generic - Seqfeatures - Example : - Returns : $annseq - Args : $ENSP - - -=cut - -sub get_Protein_annseq{ - my ($self,$ENSP) = @_; - - my $annseq = Bio::EnsEMBL::AnnSeq->new(); - - my $sth = $self->_db_obj->prepare("select id from transcript where translation = '$ENSP'"); - my $res = $sth->execute(); - my $rowhash = $sth->fetchrow_hashref; - - my $gene_obj=Bio::EnsEMBL::DBSQL::Gene_Obj->new($self->_db_obj); - my $transcript = $gene_obj->get_Transcript($rowhash->{'id'}); - my $translation = $gene_obj->get_Translation($ENSP); - - $transcript->translation($translation); - - my $seq = $transcript->translate(); - $annseq->primary_seq($seq); - - $sth = $self->_db_obj->prepare("select * from proteinfeature where translation = '$ENSP'"); - $res = $sth->execute(); - - while( my $rowhash = $sth->fetchrow_hashref) { - my $analysis = $rowhash->{'analysis'}; - my $sth2 = $self->_db_obj->prepare("select * from analysis where id = '$analysis'"); - my $res2 = $sth2->execute(); - my $rowhash2 = $sth2->fetchrow_hashref; - - my $feature = new Bio::SeqFeature::Generic ( -start => $rowhash->{'seq_start'}, - -end => $rowhash->{'seq_end'}, - -score => $rowhash->{'score'}, - -primary => $rowhash2->{'gff_feature'}, - -source => $rowhash2->{'gff_source'}); - - $annseq->add_SeqFeature($feature); - } - - return $annseq; -} - -=head2 write_all_Protein_features - - Title : write_all_Protein_features - Usage : $obj->write_all_Protein_features($ENSP) - Function: writes all protein features of a particular peptide into the database - Example : - Returns : - Args : - - -=cut - -sub write_all_Protein_features { - my ($self,$prot_annseq,$ENSP) = @_; - - my $c=0; - foreach my $feature ($prot_annseq->all_SeqFeatures()) { - my $sth = $self->_db_obj->prepare("insert into proteinfeature (id,seq_start, seq_end, score, analysis, translation) values (NULL," - .$feature->start()."," - .$feature->end()."," - .$feature->score().",'" - .$c."','" - .$ENSP."')"); - $sth->execute(); - - my $sth2 = $self->_db_obj->prepare("insert into analysis (id,db,db_version,program,program_version,gff_source,gff_feature) values ('$c','testens',1,'elia_program',1,'" - .$feature->source_tag()."','" - .$feature->primary_tag()."')"); - $sth2->execute(); - $c++; - } -} - -=head2 write_Protein_feature - - Title : write_Protein_feature - Usage : $obj->write_Protein_feature($ENSP, $feature) - Function: writes a protein feature object of a particular peptide into the database - Example : - Returns : - Args : - - -=cut - -sub write_Protein_feature { - my ($self,$ENSP,$feature) = @_; - - my $sth = $self->_db_obj->prepare("insert into proteinfeature (seq_start, seq_end, score, translation) values (" - .$feature->start()." ," - .$feature->end()." ,'" - .$feature->score()." ,'" - .$ENSP."' - )"); - $sth->execute(); -} - -=head2 get_Analysis - - Title : get_Analysis - Usage : - Function: - Example : - Returns : - Args : - - -=cut - -sub get_Analysis { - my ($self,$id) = @_; - my $sth = $self->_db_obj->prepare("select db,db_version,program,program_version,gff_source,gff_feature,id from analysis where id = $id"); - my $rv = $sth->execute; - my $rh = $sth->fetchrow_hashref; - - if ($sth->rows) { - my $anal = Bio::EnsEMBL::FeatureFactory->new_analysis(); - - if( defined $rh->{'db'} ) { - $anal->db($rh->{'db'}); - } - if( defined $rh->{'db_version'} ) { - $anal->db_version($rh->{'db_version'}); - } - - $anal->program ($rh->{'program'}); - $anal->program_version($rh->{'program_version'}); - $anal->gff_source ($rh->{gff_source}); - $anal->gff_feature ($rh->{gff_feature}); - my $mid = $rh->{'id'}; - - if( !$anal->isa('Bio::EnsEMBL::Ext::Analysis') ) { - $anal->dbID("$mid"); - } - - return $anal; - } else { - $self->throw("Can't fetch analysis id $id\n"); - } - -} - -=head2 exists_Analysis - - Title : exists_Analysis - Usage : $obj->exists_Analysis($anal) - Function: Tests whether this feature already exists in the database - Example : - Returns : Analysis id if the entry exists - Args : Bio::EnsEMBL::Analysis - - -=cut - -sub exists_Analysis { - my ($self,$anal) = @_; - - - $self->throw("Object is not a Bio::EnsEMBL::AnalysisI") unless $anal->isa("Bio::EnsEMBL::AnalysisI"); - # If all the attributes of the analysis object are not set it's existence can't be tested - $self->throw("program property of analysis object not defined") unless ($anal->program); - $self->throw("program_version property of analysis object not defined") unless ($anal->program_version); - $self->throw("gff_source property of analysis object not defined") unless ($anal->gff_source); - $self->throw("gff_feature property of analysis object not defined") unless ($anal->gff_feature); - - my $query; - - # remove leading components of path from db and program - # to prevent lines in 'analysis' table with explicit directories - # quick fix only: this ought to be done somewhere like Analysis.pm, not here - my $db = $anal->db; - $db =~ s!.*/!!; - $anal->db($db); - my $prog = $anal->program; - $prog =~ s!.*/!!; - $anal->program($prog); - - if ($anal->has_database == 1) { - $query = "select id from analysis where db = \"" . $anal->db . "\" and" . - " db_version = \"" . $anal->db_version . "\" and " . - " program = \"" . $anal->program . "\" and " . - " program_version = \"" . $anal->program_version . "\" and " . - " gff_source = \"" . $anal->gff_source . "\" and" . - " gff_feature = \"" . $anal->gff_feature . "\""; - } else { - $query = "select id from analysis where " . - " program = \"" . $anal->program . "\" and " . - " program_version = \"" . $anal->program_version . "\" and " . - " gff_source = \"" . $anal->gff_source . "\" and" . - " gff_feature = \"" . $anal->gff_feature . "\""; - } - - if( exists $self->_db_obj->_analysis_cache->{$query} ) { - return $self->_db_obj->_analysis_cache->{$query}; - } - - my $sth = $self->_db_obj->prepare($query); - my $rv = $sth->execute(); - - if ($rv && $sth->rows > 0) { - my $rowhash = $sth->fetchrow_hashref; - my $anaid = $rowhash->{'id'}; - $self->_db_obj->_analysis_cache->{$query} = $anaid; - return $anaid; - } else { - return 0; - } -} - -=head2 write_Analysis - - Title : write_Analysis - Usage : $obj->write_Analysis($anal) - Function: Writes analysis details to the database - Checks first whether this analysis entry already exists - Example : - Returns : int - Args : Bio::EnsEMBL::AnalysisI - -=cut - -sub write_Analysis { - my ($self,$anal) = @_; - - $self->throw("Argument is not a Bio::EnsEMBL::AnalysisI") unless $anal->isa("Bio::EnsEMBL::AnalysisI"); - # If all the attributes of the analysis object are not set it shouldn't be written - $self->throw("program property of analysis object not defined") unless ($anal->program); - $self->throw("program_version property of analysis object not defined") unless ($anal->program_version); - $self->throw("gff_source property of analysis object not defined") unless ($anal->gff_source); - $self->throw("gff_feature property of analysis object not defined") unless ($anal->gff_feature); - - # First check whether this entry already exists. - my $query; - my $analysisid = $self->exists_Analysis($anal); - return $analysisid if $analysisid; - - - if ($anal->has_database == 1) { - local $^W = 0; - $query = "insert into analysis(id,db,db_version,program,program_version,gff_source,gff_feature) values (NULL,\"" . - $anal->db . "\",\"" . - $anal->db_version . "\",\"" . - $anal->program . "\",\"" . - $anal->program_version . "\",\"" . - $anal->gff_source . "\",\"" . - $anal->gff_feature . "\")"; - } else { - $query = "insert into analysis(id,program,program_version,gff_source,gff_feature) values (NULL,\"" . - $anal->program . "\",\"" . - $anal->program_version . "\",\"" . - $anal->gff_source . "\",\"" . - $anal->gff_feature . "\")"; - } - - my $sth = $self->_db_obj->prepare($query); - my $rv = $sth->execute; - - - $sth = $self->_db_obj->prepare("select last_insert_id()"); - $rv = $sth->execute; - - if ($sth->rows == 1) { - my $rowhash = $sth->fetchrow_hashref; - return $rowhash->{'last_insert_id()'}; - } else { - $self->throw("Wrong number of rows returned : " . $sth->rows . " : should be 1"); - } - -} - -=head2 find_GenomeHits - - Title : find_GenomeHits - Usage : $obj->find_GenomeHits($hitid) - Function: - Example : - Returns : - Args : - - -=cut - - -sub find_GenomeHits { - my ($self,$arg) = @_; - - $self->throw("No hit id input") unless defined($arg); - - my $query = "select c.id, " . - "f.seq_start, " . - "f.seq_end, " . - "f.score, " . - "f.strand, " . - "f.analysis, " . - "f.name, " . - "f.hstart, " . - "f.hend, " . - "f.hid " . - "from feature as f,contig as c " . - "where f.hid = '$arg' and " . - "c.internal_id = f.contig"; - - my $sth = $self->_db_obj->prepare($query); - my $res = $sth->execute; - - my ($contig,$start,$end,$score,$strand,$analysisid,$name,$hstart,$hend,$hid); - - - $sth->bind_columns(undef,\$contig,\$start,\$end,\$score,\$strand,\$analysisid, - \$name,\$hstart,\$hend,\$hid); - - - my %analhash; # Stores all the analysis objects - my @features; - - while($sth->fetch) { - my $out; - my $analysis; - - if (!$analhash{$analysisid}) { - - $analysis = $self->get_Analysis($analysisid); - $analhash{$analysisid} = $analysis; - - } else { - $analysis = $analhash{$analysisid}; - } - - if( !defined $name ) { - $name = 'no_source'; - } - - $out = Bio::EnsEMBL::FeatureFactory->new_feature_pair(); - $out->set_all_fields($start,$end,$strand,$score,$name,'similarity',$contig, - $hstart,$hend,1,$score,$name,'similarity',$hid); - - $out->analysis($analysis); - $out->validate; - - push(@features,$out); - - } - - return @features; -} - - - -=head2 get_PredictionFeature_by_id - - Title : get_PredictionFeature_by_id - Usage : $obj->get_PredictionFeature_by_id($id) - Function: - Example : - Returns : - Args : - - -=cut - -sub get_PredictionFeature_by_id { - my ($self,$genscan_id) = @_; - - unless ($genscan_id){$self->throw("I need a genscan id");} - - my %analhash; - - my $query = "select f.id,f.seq_start,f.seq_end,f.strand,f.score,f.analysis,fset.id,c.id " . - "from feature f, fset ,fset_feature ff,contig c where ff.feature = f.id and fset.id = ff.fset ". - " and c.internal_id=f.contig and ff.fset = $genscan_id and name = \'genscan\'"; - - my $sth = $self->_db_obj->prepare($query); - - $sth->execute(); - - my ($fid,$start,$end,$strand,$score,$analysisid,$fsetid,$contig); - - $sth->bind_columns(undef,\$fid,\$start,\$end,\$strand,\$score,\$analysisid,\$fsetid,\$contig); - - my $fset; - my $analysis; - - $fset = new Bio::EnsEMBL::SeqFeature; - $fset->source_tag('genscan'); - $fset->primary_tag('prediction'); - $fset->id($genscan_id); - - while( my $row = $sth->fetchrow_hashref ) { - my $analysisid = $row->{analysis}; - - if (!$analhash{$analysisid}) { - $analysis = $self->get_Analysis($analysisid); - $analhash{$analysisid} = $analysis; - } else { - $analysis = $analhash{$analysisid}; - } - $fset->analysis($analysis); - $fset->seqname($contig); - $fset->raw_seqname($contig); - - my $out = new Bio::EnsEMBL::SeqFeature; - - $out->seqname ($contig); - $out->start ($row->{seq_start}); - $out->end ($row->{seq_end}); - $out->strand ($row->{strand}); - $out->source_tag('genscan'); - $out->primary_tag('prediction'); - - if( defined $score ) { - $out->score($row->{score}); - } - - $out->analysis($analysis); - - # Final check that everything is ok. - # MC THis isn't playing nicely with the c extensions - #$out->validate(); - $fset->add_sub_SeqFeature($out,'EXPAND'); - $fset->strand($row->{strand}); - } - - return $fset; - -} - - - - -=head2 get_PredictionFeature_as_Transcript - - Title : get_PredictionFeature_as_Transcript - Usage : $obj->get_PredictionFeature_as_Transcript($id) - Function: - Example : - Returns : - Args : - - -=cut - - - - - -sub get_PredictionFeature_as_Transcript{ - my ($self,$genscan_id)=@_; - unless ($genscan_id){$self->throw("I need a genscan id");} - - my $ft = $self->get_PredictionFeature_by_id($genscan_id); - my $contig = $self->_db_obj->get_Contig($ft->seqname); - - # Due to f*cked up genscan phases we are reduced to guessing the phases -# return &Bio::EnsEMBL::DBSQL::Utils::fset2transcript($ft,$contig); - return Bio::EnsEMBL::DBSQL::Utils::fset2transcript_guess_phases($ft,$contig); -} - - - -=head2 _db_obj - - Title : _db_obj - Usage : $obj->_db_obj($newval) - Function: - Example : - Returns : value of _db_obj - Args : newvalue (optional) - - -=cut - -sub _db_obj{ - my ($self,$value) = @_; - if( defined $value) { - $self->{'_db_obj'} = $value; - } - return $self->{'_db_obj'}; - -} - - - -sub exponent { - my ($number) = @_; - - my ($exp) = sprintf("%.3e", $number); - return $exp; -} diff --git a/modules/Bio/EnsEMBL/DBSQL/Feature_ObjForeign.pm b/modules/Bio/EnsEMBL/DBSQL/Feature_ObjForeign.pm deleted file mode 100644 index 9b7a1bce2f..0000000000 --- a/modules/Bio/EnsEMBL/DBSQL/Feature_ObjForeign.pm +++ /dev/null @@ -1,852 +0,0 @@ -# -# EnsEMBL module for Bio::EnsEMBL::DBSQL::Feature_ObjForeign -# -# Cared for by Elia Stupka <elia@ebi.ac.uk>, Philip lijnzaad@ebi.ac.uk -# -# Copyright EnsEMBL -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::EnsEMBL::DBSQL::Feature_ObjForeign, specialization of Feature_Obj that -allows you to translate table names to other names. The intended use is -for an database that reuses (readonly, not writing!) part of another -database in order to save on speed (no loading of all the assembly related -data) and disk usage. - - -=head1 SYNOPSIS - - use Bio::EnsEMBL::DBSQL::Obj; - use Bio::EnsEMBL::DBSQL::Feature_ObjForeign; - - $db = new Bio::EnsEMBL::DBSQL::Obj( -user => 'root', -db => 'pog' , -host => 'caldy' , -driver => 'mysql' ); - my $feature_obj =Bio::EnsEMBL::Feature_ObjForeign->new($obj); - - my $mapping = { feature => myfeature, - static_golden_path => ens081.static_golden_path - }; - my $feature_obj->table_name_tranlations $mapping; - - # From here on, all the application code first translates the table - # names, then does the queries on possibly translated names. - -=head1 DESCRIPTION - -Bio::EnsEMBL::DBSQL::Feature_ObjForeign, specialization of Feature_Obj that -allows you to translate table names to other names. The intended use is -for an database that reuses (readonly, not writing!) part of another -database in order to save on speed (no loading of all the assembly related -data) and disk usage. - -=head1 CONTACT - -Elia Stupka: elia@ebi.ac.uk, Philip lijnzaad@ebi.ac.uk - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. Internal -methods are usually preceded with a _ - -=cut - - -# Let the code begin... - -package Bio::EnsEMBL::DBSQL::Feature_ObjForeign; - -use vars qw(@ISA $AUTOLOAD); -use strict; - -# Object preamble - inheriets from Bio::Root::Object - -use Bio::Root::RootI; -#use Bio::EnsEMBL::DBSQL::Obj; - -use Bio::EnsEMBL::Ghost; -use Bio::EnsEMBL::Gene; -use Bio::EnsEMBL::Exon; -use Bio::EnsEMBL::Transcript; -use Bio::EnsEMBL::FeatureFactory; -use DBI; -use Bio::EnsEMBL::DBSQL::Utils; -use Bio::EnsEMBL::DBSQL::DummyStatement; - - -@ISA = qw(Bio::EnsEMBL::DBSQL::ObjForeign - Bio::Root::RootI - Bio::EnsEMBL::DBSQL::Feature_Obj - ); - -sub new { # comes from ObjForeign - my ($class, @args) = @_; - my $self= $class->SUPER::new(@_); - bless $self, $class; - $self; -} - -=head2 delete - - Title : delete - Usage : - Function: deletes all features from a contig; - Example : - Returns : - Args : - - -=cut - -sub delete { - my ($self,$contig) = @_; - - if (ref( $contig) && $contig->isa("Bio::EnsEMBL::DB::ContigI")) { - $self->throw("You have to give a contig id, not a contig object!"); - } - - - my $sth = $self->_db_obj->prepare("SELECT fs.feature,fs.fset " . - "from ".$self->_lookup_table_name('fset_feature')." as fs, " . - " ".$self->_lookup_table_name('feature')." as f " . - "where fs.feature = f.id " . - "and f.contig = '$contig'"); - - my $res = $sth->execute || $self->warn("Could not find features for contig $contig"); - - my %fset; - - while (my $rowhash = $sth->fetchrow_hashref) { - $fset{$rowhash->{fset}} = 1; - } - - my @fset = keys %fset; - - if ($#fset >= 0) { - my $fsstr = ""; - - foreach my $fs (@fset) { - $fsstr .= $fs . ","; - } - - chop($fsstr); - - - - $sth = $self->_db_obj->prepare("DELETE from ".$self->_lookup_table_name('fset')." where id in ($fsstr)"); - $res = $sth->execute; - - $sth = $self->_db_obj->prepare("DELETE from ".$self->_lookup_table_name('fset_feature')." where fset in ($fsstr)"); - $res = $sth->execute; - } - - #print(STDERR "Deleting features for contig $contig\n"); - - $sth = $self->_db_obj->prepare("DELETE from ".$self->_lookup_table_name('feature')." where contig = '$contig'"); - $res = $sth->execute; - - #print(STDERR "Deleting repeat features for contig $contig\n"); - - $sth = $self->_db_obj->prepare("DELETE from ".$self->_lookup_table_name('repeat_feature')." where contig = '$contig'"); - $res = $sth->execute; - -} - -=head2 write - - Title : write - Usage : $obj->write($contig,@features) - Function: Writes a feature on the genomic sequence of a contig into the database - Example : - Returns : nothing - Args : Bio::EnsEMBL::SeqFeatureI - - -=cut - -sub write { - my ($self,$contig,@features) = @_; - - # - # Yes - we need to rethink how we are writing features into the - # database. This is a little obtuse and clunky now - # - - $self->throw("$contig is not a Bio::EnsEMBL::DB::ContigI") - unless (defined($contig) && $contig->isa("Bio::EnsEMBL::DB::ContigI")); - - my $contigid = $contig->id; - my $analysis; - - - # Put the repeats in a different table, and also things we need to write - # as fsets. - my @repeats; - my @fset; - - FEATURE : - foreach my $feature ( @features ) { - - if( ! $feature->isa('Bio::EnsEMBL::SeqFeatureI') ) { - $self->throw("Feature $feature is not a feature!"); - } - - eval { - $feature->validate(); - }; - - if ($@) { - - next FEATURE; - } - - if($feature->isa('Bio::EnsEMBL::RepeatI')) { - push(@repeats,$feature); - } elsif ( $feature->sub_SeqFeature ) { - push(@fset,$feature); - } else { - if (!defined($feature->analysis)) { - $self->throw("Feature " . $feature->seqname . " " . $feature->source_tag ." doesn't have analysis. Can't write to database"); - } else { - $analysis = $feature->analysis; - } - - - my $analysisid = $self->write_Analysis($analysis); - - if ( $feature->isa('Bio::EnsEMBL::FeaturePair') ) { - my $homol = $feature->feature2; - my $sth = $self->_db_obj->prepare( - "INSERT into ".$self->_lookup_table_name('feature')."(id,contig,seq_start,seq_end,score,strand,name,analysis,hstart,hend,hid,perc_id,evalue,phase,end_phase) ". - "values ('NULL'," - .$contig->internal_id ."," - .$feature->start ."," - .$feature->end ."," - .$feature->score ."," - .$feature->strand ."," - ."'".$feature->source_tag."'," - .$analysisid ."," - .$homol->start ."," - .$homol->end ."," - ."'".$homol->seqname ."'," - .((defined $feature->percent_id) ? $feature->percent_id : 'NULL') ."," - .((defined $feature->p_value) ? &exponent($feature->p_value) : 'NULL') ."," - .((defined $feature->phase) ? $feature->phase : 'NULL') ."," - .((defined $feature->end_phase) ? $feature->end_phase : 'NULL') .")"); - - $sth->execute(); - - - } else { - my $sth = $self->_db_obj->prepare( "INSERT into ".$self->_lookup_table_name('feature')."(id,contig,seq_start,seq_end,score,strand,name,analysis,hstart,hend,hid,perc_id,evalue,phase,end_phase) ". - "values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"); - - $sth->execute('NULL', - $contig->internal_id, - $feature->start, - $feature->end, - $feature->score, - $feature->strand, - $feature->source_tag, - $analysisid, - -1, - -1, - "__NONE__", - 'NULL', - 'NULL', - 'NULL', - 'NULL' ); - } - } - } - - my $sth2 = $self->_db_obj->prepare("INSERT into ".$self->_lookup_table_name('repeat_feature')."(id,contig,seq_start,seq_end,score,strand,analysis,hstart,hend,hid) values(?,?,?,?,?,?,?,?,?,?)"); - - foreach my $feature (@repeats) { - if (!defined($feature->analysis)) { - $self->throw("Feature " . $feature->seqname . " " . $feature->source_tag ." doesn't have analysis. Can't write to database"); - } else { - $analysis = $feature->analysis; - } - - - my $analysisid = $self->write_Analysis($analysis); - my $homol = $feature->feature2; - - $sth2->execute('NULL', - $contig->internal_id, - $feature->start, - $feature->end, - $feature->score, - $feature->strand, - $analysisid, - $homol->start, - $homol->end, - $homol->seqname); - } - - - # Now the predictions - # we can't block do these as we need to get out the id wrt to the features - foreach my $feature ( @fset ) { -# print STDERR "Adding in a fset feature ",$feature->gff_string,"\n"; - - if (!defined($feature->analysis)) { - - $self->throw("Feature " . $feature->seqname . " " . - $feature->source_tag . - " doesn't have analysis. Can't write to database"); - } else { - $analysis = $feature->analysis; - } - - my $analysisid = $self->write_Analysis($analysis); - my $score = $feature->score(); - - if( !defined $score ) { $score = "-1000"; } - - my $sth3 = $self->_db_obj->prepare("INSERT into ".$self->_lookup_table_name('fset')."(id,score) values ('NULL',$score)"); - $sth3->execute(); - - # get out this id. This looks really clunk I know. Any better ideas... ? - - my $sth4 = $self->_db_obj->prepare("SELECT LAST_INSERT_ID()"); - $sth4->execute(); - - my $arr = $sth4->fetchrow_arrayref(); - my $fset_id = $arr->[0]; - - # now write each sub feature - my $rank = 1; - - foreach my $sub ( $feature->sub_SeqFeature ) { - my $sth5 = $self->_db_obj->prepare("INSERT into ".$self->_lookup_table_name('feature')."(id,contig,seq_start,seq_end,score,strand,analysis,name,hstart,hend,hid,evalue,perc_id,phase,end_phase) " - ."values('NULL','" - .$contig->internal_id ."'," - .$sub->start ."," - .$sub->end . "," - .$sub->score . "," - .$sub->strand . "," - .$analysisid . "," - ."'".$sub->source_tag ."'," - ."-1,-1," - ."'".($sub->primary_tag || "__NONE__")."'," - . ((defined $sub->p_value) ? &exponent($sub->p_value) : 'NULL') ."," - . ((defined $sub->percent_id) ? $sub->percent_id : 'NULL') ."," - . ((defined $sub->phase) ? $sub->phase : 'NULL') ."," - . ((defined $sub->end_phase) ? $sub->end_phase : 'NULL') .")"); - - $sth5->execute(); - my $sth6 = $self->_db_obj->prepare("INSERT into ".$self->_lookup_table_name('fset_feature')."(fset,feature,rank) values ($fset_id,LAST_INSERT_ID(),$rank)"); - $sth6->execute(); - $rank++; - } - } - - return 1; -} - -=head2 get_Protein_annseq - - Title : get_Protein_annseq - Usage : get_Protein_annseq ($ENSP); - Function: Creates an annseq object for a particular peptide, storing the peptide - sequence in $annseq->primary_seq, and adding all the protein features as generic - Seqfeatures - Example : - Returns : $annseq - Args : $ENSP - - -=cut - -sub get_Protein_annseq{ - my ($self,$ENSP) = @_; - - my $annseq = Bio::EnsEMBL::AnnSeq->new(); - - my $sth = $self->_db_obj->prepare("SELECT id from ".$self->_lookup_table_name('transcript')." where translation = '$ENSP'"); - my $res = $sth->execute(); - my $rowhash = $sth->fetchrow_hashref; - - my $gene_obj=Bio::EnsEMBL::DBSQL::Gene_Obj->new($self->_db_obj); - my $transcript = $gene_obj->get_Transcript($rowhash->{'id'}); - my $translation = $gene_obj->get_Translation($ENSP); - - $transcript->translation($translation); - - my $seq = $transcript->translate(); - $annseq->primary_seq($seq); - - $sth = $self->_db_obj->prepare("SELECT * from ".$self->_lookup_table_name('proteinfeature')." where translation = '$ENSP'"); - $res = $sth->execute(); - - while( my $rowhash = $sth->fetchrow_hashref) { - my $analysis = $rowhash->{'analysis'}; - my $sth2 = $self->_db_obj->prepare("SELECT * from ".$self->_lookup_table_name('analysis')." where id = '$analysis'"); - my $res2 = $sth2->execute(); - my $rowhash2 = $sth2->fetchrow_hashref; - - my $feature = new Bio::SeqFeature::Generic ( -start => $rowhash->{'seq_start'}, - -end => $rowhash->{'seq_end'}, - -score => $rowhash->{'score'}, - -primary => $rowhash2->{'gff_feature'}, - -source => $rowhash2->{'gff_source'}); - - $annseq->add_SeqFeature($feature); - } - - return $annseq; -} - -=head2 write_all_Protein_features - - Title : write_all_Protein_features - Usage : $obj->write_all_Protein_features($ENSP) - Function: writes all protein features of a particular peptide into the database - Example : - Returns : - Args : - - -=cut - -sub write_all_Protein_features { - my ($self,$prot_annseq,$ENSP) = @_; - - my $c=0; - foreach my $feature ($prot_annseq->all_SeqFeatures()) { - my $sth = $self->_db_obj->prepare("INSERT into ".$self->_lookup_table_name('proteinfeature')." (id,seq_start, seq_end, score, analysis, translation) values (NULL," - .$feature->start()."," - .$feature->end()."," - .$feature->score().",'" - .$c."','" - .$ENSP."')"); - $sth->execute(); - - my $sth2 = $self->_db_obj->prepare("INSERT into ".$self->_lookup_table_name('analysis')." (id,db,db_version,program,program_version,gff_source,gff_feature) values ('$c','testens',1,'elia_program',1,'" - .$feature->source_tag()."','" - .$feature->primary_tag()."')"); - $sth2->execute(); - $c++; - } -} - -=head2 write_Protein_feature - - Title : write_Protein_feature - Usage : $obj->write_Protein_feature($ENSP, $feature) - Function: writes a protein feature object of a particular peptide into the database - Example : - Returns : - Args : - - -=cut - -sub write_Protein_feature { - my ($self,$ENSP,$feature) = @_; - - my $sth = $self->_db_obj->prepare("INSERT into ".$self->_lookup_table_name('proteinfeature')." (seq_start, seq_end, score, translation) values (" - .$feature->start()." ," - .$feature->end()." ,'" - .$feature->score()." ,'" - .$ENSP."' - )"); - $sth->execute(); -} - -=head2 get_Analysis - - Title : get_Analysis - Usage : - Function: - Example : - Returns : - Args : - - -=cut - -sub get_Analysis { - my ($self,$id) = @_; - - my $sth = $self->_db_obj->prepare("SELECT db,db_version,program,program_version,gff_source,gff_feature,id from ".$self->_lookup_table_name('analysis')." where id = $id"); - my $rv = $sth->execute; - my $rh = $sth->fetchrow_hashref; - - if ($sth->rows) { - my $anal = Bio::EnsEMBL::FeatureFactory->new_analysis(); - - if( defined $rh->{'db'} ) { - $anal->db($rh->{'db'}); - } - if( defined $rh->{'db_version'} ) { - $anal->db_version($rh->{'db_version'}); - } - - $anal->program ($rh->{'program'}); - $anal->program_version($rh->{'program_version'}); - $anal->gff_source ($rh->{gff_source}); - $anal->gff_feature ($rh->{gff_feature}); - my $mid = $rh->{'id'}; - - $anal->dbID("$mid"); - return $anal; - } else { - $self->throw("Can't fetch analysis id $id\n"); - } - -} - -=head2 exists_Analysis - - Title : exists_Analysis - Usage : $obj->exists_Analysis($anal) - Function: Tests whether this feature already exists in the database - Example : - Returns : Analysis id if the entry exists - Args : Bio::EnsEMBL::Analysis - - -=cut - -sub exists_Analysis { - my ($self,$anal) = @_; - - - $self->throw("Object is not a Bio::EnsEMBL::AnalysisI") unless $anal->isa("Bio::EnsEMBL::AnalysisI"); - # If all the attributes of the analysis object are not set it's existence can't be tested - $self->throw("program property of analysis object not defined") unless ($anal->program); - $self->throw("program_version property of analysis object not defined") unless ($anal->program_version); - $self->throw("gff_source property of analysis object not defined") unless ($anal->gff_source); - $self->throw("gff_feature property of analysis object not defined") unless ($anal->gff_feature); - - my $query; - - if ($anal->has_database == 1) { - $query = "SELECT id from ".$self->_lookup_table_name('analysis')." where db = \"" . $anal->db . "\" and" . - " db_version = \"" . $anal->db_version . "\" and " . - " program = \"" . $anal->program . "\" and " . - " program_version = \"" . $anal->program_version . "\" and " . - " gff_source = \"" . $anal->gff_source . "\" and" . - " gff_feature = \"" . $anal->gff_feature . "\""; - } else { - $query = "SELECT id from ".$self->_lookup_table_name('analysis')." where " . - " program = \"" . $anal->program . "\" and " . - " program_version = \"" . $anal->program_version . "\" and " . - " gff_source = \"" . $anal->gff_source . "\" and" . - " gff_feature = \"" . $anal->gff_feature . "\""; - } - - if( exists $self->_db_obj->_analysis_cache->{$query} ) { - return $self->_db_obj->_analysis_cache->{$query}; - } - - my $sth = $self->_db_obj->prepare($query); - my $rv = $sth->execute(); - - if ($rv && $sth->rows > 0) { - my $rowhash = $sth->fetchrow_hashref; - my $anaid = $rowhash->{'id'}; - $self->_db_obj->_analysis_cache->{$query} = $anaid; - return $anaid; - } else { - return 0; - } -} - -=head2 write_Analysis - - Title : write_Analysis - Usage : $obj->write_Analysis($anal) - Function: Writes analysis details to the database - Checks first whether this analysis entry already exists - Example : - Returns : int - Args : Bio::EnsEMBL::AnalysisI - -=cut - -sub write_Analysis { - my ($self,$anal) = @_; - - $self->throw("Argument is not a Bio::EnsEMBL::AnalysisI") unless $anal->isa("Bio::EnsEMBL::AnalysisI"); - # If all the attributes of the analysis object are not set it shouldn't be written - $self->throw("program property of analysis object not defined") unless ($anal->program); - $self->throw("program_version property of analysis object not defined") unless ($anal->program_version); - $self->throw("gff_source property of analysis object not defined") unless ($anal->gff_source); - $self->throw("gff_feature property of analysis object not defined") unless ($anal->gff_feature); - - # First check whether this entry already exists. - my $query; - my $analysisid = $self->exists_Analysis($anal); - return $analysisid if $analysisid; - - - if ($anal->has_database == 1) { - local $^W = 0; - $query = "INSERT into ".$self->_lookup_table_name('analysis')."(id,db,db_version,program,program_version,gff_source,gff_feature) values (NULL,\"" . - $anal->db . "\",\"" . - $anal->db_version . "\",\"" . - $anal->program . "\",\"" . - $anal->program_version . "\",\"" . - $anal->gff_source . "\",\"" . - $anal->gff_feature . "\")"; - } else { - $query = "INSERT into ".$self->_lookup_table_name('analysis')."(id,program,program_version,gff_source,gff_feature) values (NULL,\"" . - $anal->program . "\",\"" . - $anal->program_version . "\",\"" . - $anal->gff_source . "\",\"" . - $anal->gff_feature . "\")"; - } - - my $sth = $self->_db_obj->prepare($query); - my $rv = $sth->execute; - - - $sth = $self->_db_obj->prepare("SELECT last_insert_id()"); - $rv = $sth->execute; - - if ($sth->rows == 1) { - my $rowhash = $sth->fetchrow_hashref; - return $rowhash->{'last_insert_id()'}; - } else { - $self->throw("Wrong number of rows returned : " . $sth->rows . " : should be 1"); - } - -} - -=head2 find_GenomeHits - - Title : find_GenomeHits - Usage : $obj->find_GenomeHits($hitid) - Function: - Example : - Returns : - Args : - - -=cut - - -sub find_GenomeHits { - my ($self,$arg) = @_; - - $self->throw("No hit id input") unless defined($arg); - - my $query = "SELECT c.id, " . - "f.seq_start, " . - "f.seq_end, " . - "f.score, " . - "f.strand, " . - "f.analysis, " . - "f.name, " . - "f.hstart, " . - "f.hend, " . - "f.hid " . - "from ".$self->_lookup_table_name('feature')." as f,".$self->_lookup_table_name('contig')." as c " . - "where f.hid = '$arg' and " . - "c.internal_id = f.contig"; - - my $sth = $self->_db_obj->prepare($query); - my $res = $sth->execute; - - my ($contig,$start,$end,$score,$strand,$analysisid,$name,$hstart,$hend,$hid); - - - $sth->bind_columns(undef,\$contig,\$start,\$end,\$score,\$strand,\$analysisid, - \$name,\$hstart,\$hend,\$hid); - - - my %analhash; # Stores all the analysis objects - my @features; - - while($sth->fetch) { - my $out; - my $analysis; - - if (!$analhash{$analysisid}) { - - $analysis = $self->get_Analysis($analysisid); - $analhash{$analysisid} = $analysis; - - } else { - $analysis = $analhash{$analysisid}; - } - - if( !defined $name ) { - $name = 'no_source'; - } - - $out = Bio::EnsEMBL::FeatureFactory->new_feature_pair(); - $out->set_all_fields($start,$end,$strand,$score,$name,'similarity',$contig, - $hstart,$hend,1,$score,$name,'similarity',$hid); - - $out->analysis($analysis); - $out->validate; - - push(@features,$out); - - } - - return @features; -} - - - -=head2 get_PredictionFeature_by_id - - Title : get_PredictionFeature_by_id - Usage : $obj->get_PredictionFeature_by_id($id) - Function: - Example : - Returns : - Args : - - -=cut - - - - - - -sub get_PredictionFeature_by_id { - my ($self,$genscan_id) = @_; - - unless ($genscan_id){$self->throw("I need a genscan id");} - - my $fsetid; - my %analhash; - - my $query = "SELECT f.id,f.seq_start,f.seq_end,f.strand,f.score,f.analysis,fset.id,c.id " . - "from ".$self->_lookup_table_name('feature')." f, ".$self->_lookup_table_name('fset')." fset,".$self->_lookup_table_name('fset_feature')." ff,".$self->_lookup_table_name('contig')." c where ff.feature = f.id and fset.id = ff.fset ". - " and c.internal_id=f.contig and ff.fset ='$genscan_id' and name = 'genscan'"; - - - my $sth = $self->_db_obj->prepare($query); - - $sth->execute(); - - my ($fid,$start,$end,$strand,$score,$analysisid,$contig); - - $sth->bind_columns(undef,\$fid,\$start,\$end,\$strand,\$score,\$analysisid,\$fsetid,\$contig); - - my $current_fset; - if( $sth->fetch ) { - my $out; - - my $analysis; - - if (!$analhash{$analysisid}) { - - - $analysis = $self->get_Analysis($analysisid); - - $analhash{$analysisid} = $analysis; - - } else { - $analysis = $analhash{$analysisid}; - } - - $current_fset = new Bio::EnsEMBL::SeqFeature; - $current_fset->source_tag('genscan'); - $current_fset->primary_tag('prediction'); - $current_fset->analysis($analysis); - $current_fset->seqname($contig); - $current_fset->id($fsetid); - - - $out = new Bio::EnsEMBL::SeqFeature; - - $out->seqname ($contig); - $out->start ($start); - $out->end ($end); - $out->strand ($strand); - - $out->source_tag('genscan'); - $out->primary_tag('prediction'); - - if( defined $score ) { - $out->score($score); - } - - $out->analysis($analysis); - - # Final check that everything is ok. - - $out->validate(); - $current_fset->add_sub_SeqFeature($out,'EXPAND'); - $current_fset->strand($strand); - } - - else { $self->throw("Fset $genscan_id does not exist in the database");} - - -return $current_fset; - -} - - - - -=head2 get_PredictionFeature_as_Transcript - - Title : get_PredictionFeature_as_Transcript - Usage : $obj->get_PredictionFeature_as_Transcript($id) - Function: - Example : - Returns : - Args : - - -=cut - - - - - -sub get_PredictionFeature_as_Transcript{ - my ($self,$genscan_id)=@_; - unless ($genscan_id){$self->throw("I need a genscan id");} - - my $ft=$self->get_PredictionFeature_by_id($genscan_id); - my $contig=$self->_db_obj->get_Contig($ft->seqname); - - &Bio::EnsEMBL::DBSQL::Utils::fset2transcript($ft,$contig); - - -} - - - -=head2 _db_obj - - Title : _db_obj - Usage : $obj->_db_obj($newval) - Function: - Example : - Returns : value of _db_obj - Args : newvalue (optional) - - -=cut - -sub _db_obj{ - my ($self,$value) = @_; - if( defined $value) { - $self->{'_db_obj'} = $value; - } - return $self->{'_db_obj'}; - -} - -sub exponent { - my ($number) = @_; - - my ($exp) = sprintf("%.3e", $number); - return $exp; -} diff --git a/modules/Bio/EnsEMBL/DBSQL/ObjForeign.pm b/modules/Bio/EnsEMBL/DBSQL/ObjForeign.pm deleted file mode 100644 index 9c3c6a977e..0000000000 --- a/modules/Bio/EnsEMBL/DBSQL/ObjForeign.pm +++ /dev/null @@ -1,146 +0,0 @@ -# EnsEMBL module for Bio::EnsEMBL::DBSQL::Foreign -# -# Cared for Philip lijnzaad@ebi.ac.uk -# -# Copyright EnsEMBL -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::EnsEMBL::DBSQL::ObjForeign - - -=head1 SYNOPSIS - - use Bio::EnsEMBL::DBSQL::Obj; - use Bio::EnsEMBL::DBSQL::Feature_ObjForeign; - - $db = new Bio::EnsEMBL::DBSQL::Obj( -user => 'root' - , -db => 'pog' - , -host => 'caldy' - , -driver => 'mysql' ); - - my $translations = { 'static_golden_path' => 'my_golden_path' , - 'exon' => 'ens075.exon'}; - my $feature_obj = - Bio::EnsEMBL::Feature_ObjForeign->new( - -dbobj => $db, - -table_name_translations => $translations); - - # rest as with Feature_Obj. - -=head1 DESCRIPTION - -This class is like a utility class for mixing in to other Obj classes -(e.g. Gene_Obj or Feature_Obj) resulting in an adaptor that translate -table names to other names on the fly. The intended use is for an database -that reuses part of another database in order to save on speed (no loading -of all the assembly related data) and disk usage. - -This class is typically to be sub-classed. - -=head1 CONTACT - -Philip lijnzaad@ebi.ac.uk - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. Internal -methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::EnsEMBL::DBSQL::ObjForeign; - -use vars qw(@ISA); -use strict; - - -sub new { - my ($class, @args) = @_; - my $self= {}; - bless $self, $class; - - my ($db_obj, $table_name_translations, $read_db_obj) - = $self->_rearrange([ qw( DBOBJ TABLE_NAME_TRANSLATIONS READ_DBOBJ) ] - ,@args); - $db_obj || $self->throw("I need a db obj ..."); - $self->_db_obj($db_obj); - $read_db_obj = $db_obj unless $db_obj; - $self->_read_db_obj($read_db_obj); - # table_name_translations are optional - $self->_table_name_translations($table_name_translations); - $self->use_delayed_insert(1); - return $self; # success - we hope! -} - -sub _read_db_obj { - my ($self, $value) = @_; - - if( defined $value) { - my $needed = 'Bio::EnsEMBL::DBSQL::Obj'; - if ( ref($value) ne $needed ) { - $self->throw("expecting a $needed"); - } - $self->{'_read_db_obj'} = $value; - } - return $self->{'_read_db_obj'}; -} - - - -=head2 _table_name_tranlations - - Title : table_name_tranlations - Usage : - Function: sets/gets the hash used for looking up table names - Example : $self->_table_name_tranlations - { 'static_golden_path' => 'my_golden_path' , # same db - 'exon' => 'ens075.exon' # different table - } - Returns : - Args : - - -=cut - -sub _table_name_translations { - my ($self,$value) = @_; - if( defined $value) { - if ( ref($value) ne 'HASH') { - $self->throw('expecting hash ref'); - } - $self->{'_table_name_translations'} = $value; - } - return $self->{'_table_name_translations'}; -} - -=head2 _lookup_table_name - - Title : _lookup_table_name - Usage : - Function: translate name to other name - Example : - Returns : new name if there is a translation for this table name, - the old name otherwise. - Args : the table name. - -=cut - -sub _lookup_table_name { - my ($self,$name) = @_; - - my $table = $self->_table_name_translations; - my $newname = $table->{$name}; - if (defined $table && defined $newname ) { - return $newname ; - } - return $name; -} - -1; diff --git a/modules/Bio/EnsEMBL/DBSQL/RawContig.pm b/modules/Bio/EnsEMBL/DBSQL/RawContig.pm deleted file mode 100755 index 4030890171..0000000000 --- a/modules/Bio/EnsEMBL/DBSQL/RawContig.pm +++ /dev/null @@ -1,2411 +0,0 @@ -# -# BioPerl module for Contig -# -# Cared for by Ewan Birney <birney@sanger.ac.uk> -# -# Copyright Ewan Birney -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::EnsEMBL::DB::RawContig - Handle onto a database stored raw contiguous DNA - -=head1 SYNOPSIS - - # get a contig object somehow,eg from an DB::Obj - - @genes = $contig->get_all_Genes(); - @sf = $contig->get_all_RepeatFeatures(); - @sf = $contig->get_all_SimilarityFeatures(); - - $contig->id(); - $contig->length(); - $primary_seq = $contig->primary_seq(); - -=head1 DESCRIPTION - -A RawContig is physical piece of DNA coming out a sequencing project, -ie a single product of an assembly process. A RawContig defines an atomic -coordinate system on which features and genes are placed (remember that -genes can cross between atomic coordinate systems). - -=head1 CONTACT - -Describe contact details here - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - - -package Bio::EnsEMBL::DBSQL::RawContig; -use vars qw(@ISA); -use strict; - -# Object preamble - inherits from Bio::Root::Object - -use Bio::Root::RootI; - -use Bio::EnsEMBL::DBSQL::DBAdaptor; -use Bio::EnsEMBL::DBSQL::AnalysisAdaptor; -use Bio::EnsEMBL::DBSQL::FeatureAdaptor; -use Bio::EnsEMBL::DB::RawContigI; - -use Bio::EnsEMBL::Repeat; -use Bio::EnsEMBL::ContigOverlap; -use Bio::EnsEMBL::FeatureFactory; -use Bio::EnsEMBL::Chromosome; -use Bio::EnsEMBL::DBSQL::DBPrimarySeq; -use Bio::PrimarySeq; - -@ISA = qw(Bio::EnsEMBL::DB::RawContigI Bio::Root::RootI); - -sub new { - my( $pkg, @args ) = @_; - - my $self = bless {}, $pkg; - - my ( - $dbobj, - $id, - $perlonlysequences, - $contig_overlap_source, - $overlap_distance_cutoff, - ) = $self->_rearrange([qw( - DBOBJ - ID - PERLONLYSEQUENCES - CONTIG_OVERLAP_SOURCE - OVERLAP_DISTANCE_CUTOFF - )], @args); - - $id || $self->throw("Cannot make contig db object without id"); - $dbobj || $self->throw("Cannot make contig db object without db object"); - if( !$dbobj->isa('Bio::EnsEMBL::DBSQL::Obj') && !$dbobj->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')) { - $self->throw("Cannot make contig db object with a $dbobj object"); - } - - - $self->id($id); - $self->dbobj($dbobj); - $self->fetch(); - $self->perl_only_sequences($perlonlysequences); - $self->overlap_distance_cutoff($overlap_distance_cutoff); - - return $self; -} - - -sub direct_new { - my( $pkg, @args ) = @_; - my $self = bless {}, $pkg; - - my ( - $dbobj, - $id, - $perlonlysequences, - $overlap_distance_cutoff, - $internal_id, - $dna_id, - $seq_version, - $cloneid, - $chr_start, - $chr_end, - $raw_start, - $raw_end, - $raw_ori, - $offset, - $contig_length - ) = $self->_rearrange([qw( - DBOBJ - ID - PERLONLYSEQUENCES - OVERLAP_DISTANCE_CUTOFF - INTERNAL_ID - DNA_ID - SEQ_VERSION - CLONEID - CHR_START - CHR_END - RAW_START - RAW_END - RAW_ORI - OFFSET - CONTIG_LENGTH - )], @args); - - $id || $self->throw("Cannot make contig db object without id"); - $dbobj || $self->throw("Cannot make contig db object without db object"); - - if( !$dbobj->isa('Bio::EnsEMBL::DBSQL::Obj') && !$dbobj->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')) { - $self->throw("Cannot make contig db object with a $dbobj object"); - } - - if( !$internal_id || !$dna_id || !defined($seq_version) || !$cloneid || !defined $chr_start || !defined $chr_end) { - $self->throw("you don't have all the data to make a direct new [$internal_id,$dna_id,$seq_version,$cloneid,$chr_start,$chr_end]!"); - } - - $self->id($id); - $self->dbobj($dbobj); - $self->internal_id($internal_id); - $self->dna_id($dna_id); - $self->seq_version($seq_version); - $self->cloneid ($cloneid); - $self->perl_only_sequences($perlonlysequences); - $self->overlap_distance_cutoff($overlap_distance_cutoff); - $self->_chr_start($chr_start); - $self->_chr_end($chr_end); - $self->static_golden_start($raw_start); - $self->static_golden_end($raw_end); - $self->static_golden_ori($raw_ori); - $self->embl_offset($offset); - $self->length($contig_length); - - return $self; -} - - -=head2 fetch - - Title : fetch - Usage : $contig->fetch($contig_id) - Function: fetches the data necessary to build a Rawcontig object - Example : $contig->fetch(1) - Returns : Bio::EnsEMBL::DBSQL::RawContig object - Args : $contig_id - - -=cut - -sub fetch { - my ($self) = @_; - - my $id=$self->id; - - my $query = - " SELECT contig.internal_id - , contig.dna - , clone.embl_version - , clone.id - , contig.offset - FROM contig - , clone - WHERE contig.clone = clone.internal_id - AND contig.id = '$id' - "; - - my $sth = $self->dbobj->prepare($query); - my $res = $sth->execute(); - - if (my $row = $sth->fetchrow_arrayref) { - $self->internal_id($row->[0]); - $self->dna_id ($row->[1]); - $self->seq_version($row->[2]); - $self->cloneid ($row->[3]); - $self->embl_offset ($row->[4]); - } else { - $self->throw("Contig $id does not exist in the database or does not have DNA sequence"); - } - - return $self; -} - -=head2 get_all_Genes - - Title : get_all_Genes - Usage : - Function: - Example : - Returns : - Args : - - -=cut - -sub get_all_Genes{ - my ($self, $supporting) = @_; - my @out; - my $contig_id = $self->internal_id(); - my %got; - # prepare the SQL statement - - -my $query=" - SELECT t.gene_id - FROM transcript t, - exon_transcript et, - exon e - WHERE e.contig_id = '$contig_id' - AND et.exon_id = e.exon_id - AND t.transcript_id = et.transcript_id - "; - - - return $self->_gene_query($query,$supporting); -} - -=head2 get_old_Genes - - Title : get_old_Genes - Usage : my @mapped_Genes=$rc->get_old_Genes - Function: Used to get out old Genes (not modifying coordinates) - Returns : an array of Bio::EnsEMBL::Exon objects - Args : none - - -=cut - -sub get_old_Genes { - my ($self,$mapref) = @_; - - my %map = %$mapref; - #This method requires a connection to a crossmatch database - if (!$self->_crossdb) { $self->throw("You need a crossmatch database to call get_old_Genes!");} - my $crossdb = $self->_crossdb; - - #The crossdb should be holding onto old and new dbs, we need the old one here... - my $old_db; - eval { - $old_db=$self->_crossdb->old_dbobj; - }; - if ($@) { - $self->throw("The crossmatch database has to hold the old dna database to be able to call get_old_Genes! $@"); - } - my $oldcontig; - my $oldid = $map{$self->id}; - eval { - $oldcontig = $old_db->get_Contig($oldid); - }; - - #If the clone does not exist, these are really new Genes - if ($@) { - #print STDERR "Contig ".$oldid." doesn't exist in old db ".$old_db->dbname.", returning empty array...\n"; - return (); - } - - my @genes=$oldcontig->get_all_Genes(); - my $size=scalar (@genes); - #print STDERR "Returning $size old Genes as they are for contig ".$oldid."\n"; - return @genes; -} - -=head2 get_all_Exons - - Title : get_all_Exons - Usage : - Function: returns all exons for this contig - Example : - Returns : - Args : - -=cut - -sub get_all_Exons { - - my ($self)=@_; - - - my $contig_id=$self->id; - - - my $query="SELECT e.id, e.seq_start,e.seq_end,e.strand,e.phase,e.created,e.modified,e.version - FROM exon e,contig c - WHERE c.internal_id=e.contig and c.id ='$contig_id'"; - - my $sth = $self->dbobj->prepare ($query); - $sth->execute; - - my ($id,$start,$end,$strand,$phase,$created,$modified,$version); - $sth->bind_columns (undef,\$id,\$start,\$end,\$strand,\$phase,\$created,\$modified,\$version); - - my @exons; - while ($sth->fetch){ - my $exon=Bio::EnsEMBL::Exon->new; - $exon->id($id); - $exon->start($start); - $exon->end($end); - $exon->strand($strand); - $exon->seqname($self->id); - $exon->contig_id($self->id); - $exon->phase($phase); - $exon->created($created); - $exon->modified($modified); - $exon->version($version); - $exon->sticky_rank(1); - - push @exons,$exon; - } - return @exons; -} - - -=head2 get_old_Exons - - Title : get_old_Exons - Usage : my @mapped_exons=$rc->get_old_Exons - Function: Used to get out exons in new coordinates - Returns : an array of Bio::EnsEMBL::Exon objects - Args : none - - -=cut - -sub get_old_Exons { - my ($self,$logfile,$maphref) = @_; - - my @unmapped; - - #This method requires a connection to a crossmatch database - if (!$self->_crossdb) { $self->throw("You need a crossmatch database to call get_old_exons!");} - my $crossdb = $self->_crossdb; - - my %map = %$maphref; - #The crossdb should be holding onto old and new dbs, we need the old one here... - my $old_db; - eval { - $old_db=$crossdb->old_dbobj; - }; - if ($@) { - $self->throw("The crossmatch database has to hold the old dna database to be able to call get_old_exons! $@"); - } - my $oldclone; - my $oldcontig; - eval { - $oldclone = $old_db->get_Clone($self->cloneid); - }; - - #If the clone does not exist, these are really new exons - if ($@) { - return (); - } - - my $newclone= $self->dbobj->get_Clone($self->cloneid); - #If the clones have the same version, the underlying dna hasn't changed, - #therefore we just return the old exons... - - #FIXME - the above is not necessarily true - contigs may have changed - #due to spliting on different boundaries and coordinates shifted accordingly - - if ($oldclone->embl_version == $newclone->embl_version) { - my $oldcontig; - my $oldid = $map{$self->id}; - eval { - $oldcontig = $oldclone->get_Contig($oldid); - }; - if ($@) { - print STDERR "Clones with id ".$oldclone->id." have the same version in old and new db, but contig ".$self->id." is not there! (CLONE VERSION BUG)\n"; - return (); - } - my @exons=$oldcontig->get_all_Exons(); - foreach my $exon (@exons) { - $exon->seqname($self->id); - $exon->contig_id($self->id); - } - my $size=scalar (@exons); - #print STDERR "Returning $size old exons as they are for contig ".$oldid." on clone ".$oldclone->id."\n"; - return @exons; - } - #We get out a SymmetricContigFeatureContainer from the crossdb and use it #to retrieve feature pairs for this contig, then sort them - my $sfpc = $crossdb->get_SymmetricContigFeatureContainer; - my @fp=$sfpc->get_FeaturePair_list_by_rawcontig_id($self->id,$newclone->embl_version); - my @sorted_fp= sort { $a->start <=> $b->start} @fp; - - my %validoldcontigs; - my %fphash; - my @old_exons; - foreach my $fp ( @sorted_fp ) { - my $contigid = $fp->hseqname; - my $oldcontig=$old_db->get_Contig($contigid); - push @old_exons, $oldcontig->get_all_Exons; - $validoldcontigs{$contigid} = $fp->hseqname; - if( !exists $fphash{$fp->hseqname} ) { - $fphash{$fp->hseqname} = []; - } - push(@{$fphash{$fp->hseqname}},$fp); - } - #We now need to get all the Genes for this clone on the old case - # now perform the mapping - - my @mapped_exons; - EXON:foreach my $exon (@old_exons) { - my $mapped=0; - foreach my $fp ( @{$fphash{$validoldcontigs{$exon->seqname}}} ) { - if( $fp->hstart < $exon->start && $fp->hend > $exon->start ) { - if( $fp->strand == $fp->hstrand ) { - # straightforward mapping - $exon->seqname($self->id); - $exon->contig_id($self->id); - $exon->start($fp->start + $exon->start - $fp->hstart); - $exon->end($fp->start + $exon->end - $fp->hstart); - } else { - # Grrr strand hell. - my $oldstart = $exon->start; - my $oldend = $exon->end; - $exon->seqname($self->id); - $exon->contig_id($self->id); - $exon->start($fp->hend - ($oldstart - $fp->hend)); - $exon->end ($fp->hend - ($oldend - $fp->hend)); - $exon->strand( -1 * $exon->strand); - } - $mapped=1; - push (@mapped_exons,$exon); - next EXON; - } - } - if ($mapped == 0) { - push (@unmapped,$exon->id); - print $logfile "LOST EXON: ".$exon->id." (In get_old_Exons)\n"; - } - } - $self->unmapped_exons(@unmapped); - return @mapped_exons; -} - - -=head2 get_Genes_by_Type - - Title : get_Genes_by_Type - Usage : - Function: - Example : - Returns : - Args : - - -=cut - -sub get_Genes_by_Type{ - my ($self,$type,$supporting) = @_; - my @out; - my $contig_id = $self->internal_id(); - my %got; - # prepare the SQL statement -unless ($type){$self->throw("I need a type argument e.g. ensembl")}; - -my $query=" - SELECT t.gene_id - FROM transcript t, - exon_transcript et, - exon e, gene g - WHERE e.contig_id = $contig_id - AND et.exon_id = e.exon_id - AND t.transcript_id = et.transcript_id - AND t.gene_id = g.gene_id - AND g.type = '$type' - "; - - - return $self->_gene_query($query,$supporting); -} - - -sub _gene_query{ - - my ($self, $query,$supporting) = @_; - - my @out; - my $contig_id = $self->internal_id(); - my %got; - # prepare the SQL statement - my $sth = $self->dbobj->prepare($query); - - my $res = $sth->execute(); - my $genea = $self->dbobj->get_GeneAdaptor(); - - while (my $rowhash = $sth->fetchrow_hashref) { - - if( ! exists $got{$rowhash->{'gene_id'}}) { - push(@out,$genea->fetch_by_dbID($rowhash->{'gene_id'})); - $got{$rowhash->{'gene_id'}} = 1; - } - } - - if(defined ($supporting) && $supporting eq 'evidence'){ - my $exona = $self->dbobj->get_ExonAdaptor(); - foreach my $g(@out){ - foreach my $exon($g->get_all_Exons){ - $exona->fetch_evidence_by_Exon($exon); - } - } - } - - if (@out) { - return @out; - } - return; -} - - - -=head2 has_genes - - Title : has_genes - Usage : - Function: returns 1 if there are genes, 0 otherwise. - Example : - Returns : - Args : - - -=cut - - - - -sub has_genes{ - my ($self,@args) = @_; - my $contig_id = $self->internal_id(); - - my $seen =0; - my $sth = $self->dbobj->prepare("select exon_id from exon where contig_id = '$contig_id' limit 1"); - $sth->execute(); - - my $rowhash; - while ( ($rowhash = $sth->fetchrow_hashref()) ) { - $seen = 1; - last; - } - return $seen; -} - -=head2 primary_seq - - Title : primary_seq - Usage : - Function: - Example : - Returns : - Args : - - -=cut - -sub primary_seq{ - my ($self,@args) = @_; - - if( $self->perl_only_sequences == 1 ) { - return $self->perl_primary_seq(); - } - return $self->db_primary_seq(); - -} - -=head2 db_primary_seq - - Title : db_primary_seq - Usage : $dbseq = $contig->db_primary_seq(); - Function: Gets a Bio::EnsEMBL::DBSQL::DBPrimarySeq object out from the contig - Example : - Returns : Bio::EnsEMBL::DBSQL::DBPrimarySeq object - Args : - - -=cut - -sub db_primary_seq { - my ($self) = @_; - - my $dbseq = Bio::EnsEMBL::DBSQL::DBPrimarySeq->new( - -dna => $self->dna_id, - -db_handle => $self->dbobj->dnadb, - ); - - return $dbseq; -} - - - -=head2 perl_primary_seq - - Title : seq - Usage : $seq = $contig->perl_primary_seq(); - Function: Gets a Bio::PrimarySeqI object out from the contig - Example : - Returns : Bio::PrimarySeqI object - Args : - - -=cut - -sub perl_primary_seq { - my ($self) = @_; - - if ( $self->_seq_cache() ) { - return $self->_seq_cache(); - } - - my $dna_id = $self->dna_id() - or $self->throw("No dna_id in RawContig ". $self->id); - my $sth = $self->dbobj->dnadb->prepare(q{ SELECT sequence FROM dna WHERE id = ? }); - my $res = $sth->execute($dna_id); - - my($str) = $sth->fetchrow - or $self->throw("No DNA sequence in RawContig " . $self->id . " for dna id " . $dna_id); - - # Shouldn't sequence integrity be checked on the way - # into the datbase instead of here? -# $str =~ /[^ABCDGHKMNRSTVWY]/ && $self->warn("Got some non standard DNA characters here! Yuk!"); -# $str =~ s/\s//g; -# $str =~ s/[^ABCDGHKMNRSTVWY]/N/g; - - my $ret = Bio::PrimarySeq->new( - -seq => $str, - -display_id => $self->id, # eg: AC004092.00001 - -primary_id => $self->internal_id, # eg: 874 - -moltype => 'dna', - ); - $self->_seq_cache($ret); - - return $ret; -} - -=head2 _seq_cache - - Title : _seq_cache - Usage : $obj->_seq_cache($newval) - Function: Used to cache the primary seq object to avoid - more than one trip to the database for the dna - Returns : value of _seq_cache - Args : newvalue (optional) - - -=cut - -sub _seq_cache{ - my $obj = shift; - if( @_ ) { - my $value = shift; - $obj->{'_seq_cache'} = $value; - } - return $obj->{'_seq_cache'}; - -} - - - -=head2 get_all_SeqFeatures - - Title : get_all_SeqFeatures - Usage : foreach my $sf ( $contig->get_all_SeqFeatures - Function: Gets all the sequence features on the whole contig - Example : - Returns : - Args : - - -=cut - -sub get_all_SeqFeatures { - my ($self) = @_; - - my @out; - - push(@out,$self->get_all_SimilarityFeatures); - push(@out,$self->get_all_RepeatFeatures); - # push(@out,$self->get_all_PredictionFeatures); - - return @out; -} - - -=head2 get_all_SimilarityFeatures_above_score - - Title : get_all_SimilarityFeatures_above_score - Usage : foreach my $sf ( $contig->get_all_SimilarityFeatures_above_score(analysis_type, score) ) - Function: - Example : - Returns : - Args : - - -=cut - -sub get_all_SimilarityFeatures_above_score{ - my ($self, $analysis_type, $score) = @_; - - $self->throw("Must supply analysis_type parameter") unless $analysis_type; - $self->throw("Must supply score parameter") unless $score; - - my @array; - - my $id = $self->internal_id(); - my $length = $self->length(); - - if( $self->use_rawcontig_acc() ) { - my $fplist = Bio::EnsEMBL::Ext::RawContigAcc::FeaturePairList_by_Score($id,$analysis_type,$score); - @array = $fplist->each_FeaturePair; - return @array; - } - - my %analhash; - - #First of all, get all features that are part of a feature set with high enough score and have the right type - - my $statement = "SELECT feature.id, seq_start, seq_end, strand, feature.score, analysis, name, " . - "hstart, hend, hid, evalue, perc_id, phase, end_phase, fset, rank, fset.score " . - "FROM feature, fset_feature, fset, analysisprocess " . - "WHERE feature.contig ='$id' " . - "AND fset_feature.feature = feature.id " . - "AND fset.id = fset_feature.fset " . - "AND feature.score > '$score' " . - "AND feature.analysis = analysisprocess.analysisId " . - "AND analysisprocess.db = '$analysis_type' " . - "ORDER BY fset"; - - my $sth = $self->dbobj->prepare($statement); - $sth->execute(); - - my ($fid,$start,$end,$strand,$f_score,$analysisid,$name,$hstart,$hend,$hid,$evalue,$perc_id,$phase,$end_phase,$fset,$rank,$fset_score); - my $seen = 0; - - # bind the columns - - $sth->bind_columns(undef,\$fid,\$start,\$end,\$strand,\$f_score,\$analysisid,\$name,\$hstart,\$hend,\$hid,\$evalue,\$perc_id,\$phase,\$end_phase,\$fset,\$rank,\$fset_score); - - my $out; - - my $fset_id_str = ""; - - while($sth->fetch) { - - my $analysis; - - if (!$analhash{$analysisid}) { - my $analysis_adp = new Bio::EnsEMBL::DBSQL::AnalysisAdaptor($self->dbobj); - $analysis = $analysis_adp->fetch_by_dbID($analysisid); - $analhash{$analysisid} = $analysis; - - } else { - $analysis = $analhash{$analysisid}; - } - - if( !defined $name ) { - $name = 'no_source'; - } - - #Build fset feature object if new fset found - if ($fset != $seen) { -# print(STDERR "Making new fset feature $fset\n"); - $out = new Bio::EnsEMBL::SeqFeature; - $out->id($fset); - $out->analysis($analysis); - $out->seqname ($self->id); - $out->score($fset_score); - $out->source_tag($name); - $out->primary_tag("FSET"); - - $seen = $fset; - push(@array,$out); - } - $fset_id_str = $fset_id_str . $fid . ","; - #Build Feature Object - my $feature = new Bio::EnsEMBL::SeqFeature; - $feature->seqname ($self->id); - $feature->start ($start); - $feature->end ($end); - $feature->strand ($strand); - $feature->source_tag ($name); - $feature->primary_tag('similarity'); - $feature->id ($fid); - $feature->p_value ($evalue) if (defined $evalue); - $feature->percent_id ($perc_id) if (defined $perc_id); - $feature->phase ($phase) if (defined $phase); - $feature->end_phase ($end_phase) if (defined $end_phase); - - if( defined $f_score ) { - $feature->score($f_score); - } - - $feature->analysis($analysis); - - # Final check that everything is ok. - $feature->validate(); - - #Add this feature to the fset - $out->add_sub_SeqFeature($feature,'EXPAND'); - - } - - #Then get the rest of the features, i.e. featurepairs and single features that are not part of a fset - $fset_id_str =~ s/\,$//; - - if ($fset_id_str) { - $statement = "SELECT feature.id, seq_start, seq_end, strand, score, analysis, name, hstart, hend, hid, evalue, perc_id, phase, end_phase " . - "FROM feature, analysisprocess " . - "WHERE feature.id not in (" . $fset_id_str . ") " . - "AND feature.score > '$score' " . - "AND feature.analysis = analysisprocess.analysisId " . - "AND analysisprocess.db = '$analysis_type' " . - "AND feature.contig = '$id' "; - - $sth = $self->dbobj->prepare($statement); - - } else { - $statement = "SELECT feature.id, seq_start, seq_end, strand, score, analysis, name, hstart, hend, hid, evalue, perc_id, phase, end_phase " . - "FROM feature, analysisprocess " . - "WHERE feature.score > '$score' " . - "AND feature.analysis = analysisprocess.analysisId " . - "AND analysisprocess.db = '$analysis_type' " . - "AND feature.contig = '$id' "; - - $sth = $self->dbobj->prepare($statement); - } - - $sth->execute(); - - # bind the columns - $sth->bind_columns(undef,\$fid,\$start,\$end,\$strand,\$f_score,\$analysisid,\$name,\$hstart,\$hend,\$hid,\$evalue,\$perc_id,\$phase,\$end_phase); - - while($sth->fetch) { - my $out; - my $analysis; - - if (!$analhash{$analysisid}) { - - my $analysis_adp = new Bio::EnsEMBL::DBSQL::AnalysisAdaptor($self->dbobj); - $analysis = $analysis_adp->fetch_by_dbID($analysisid); - $analhash{$analysisid} = $analysis; - - - } else { - $analysis = $analhash{$analysisid}; - } - - if( !defined $name ) { - $name = 'no_source'; - } - - if( $hid ne '__NONE__' ) { - # is a paired feature - # build EnsEMBL features and make the FeaturePair - - $out = Bio::EnsEMBL::FeatureFactory->new_feature_pair(); - - - $out->set_all_fields($start,$end,$strand,$f_score,$name,'similarity',$self->id, - $hstart,$hend,1,$f_score,$name,'similarity',$hid); - - $out->analysis ($analysis); - $out->id ($hid); # MC This is for Arek - but I don't - # really know where this method has come from. - } else { - $out = new Bio::EnsEMBL::SeqFeature; - $out->seqname ($self->id); - $out->start ($start); - $out->end ($end); - $out->strand ($strand); - $out->source_tag ($name); - $out->primary_tag('similarity'); - $out->id ($fid); - $out->p_value ($evalue) if (defined $evalue); - $out->percent_id ($perc_id) if (defined $perc_id); - $out->phase ($phase) if (defined $phase); - $out->end_phase ($end_phase) if (defined $end_phase); - - if( defined $f_score ) { - $out->score($f_score); - } - $out->analysis($analysis); - } - # Final check that everything is ok. - $out->validate(); - - push(@array,$out); - - } - - return @array; -} - - - -=head2 get_all_SimilarityFeatures - - Title : get_all_SimilarityFeatures - Usage : foreach my $sf ( $contig->get_all_SimilarityFeatures($start,$end) ) - Function: Gets all the sequence similarity features on the whole contig - Example : - Returns : - Args : - - -=cut - -sub get_all_SimilarityFeatures { - my ($self) = @_; - - my @array; - my @fps; - - my $id = $self->internal_id(); - my $length = $self->length(); - - #Removed genscan addition, now Virtual Contig adds it using properly get_all_PredictionFeatures - - #my @genscan = $self->get_all_PredictionFeatures; - - #push(@array,@genscan); - my %analhash; - - #Then get the rest of the features, i.e. featurepairs and single features that are not part of a fset - my ($fid,$start,$end,$strand,$f_score,$analysisid,$name,$hstart,$hend,$hid,$evalue,$perc_id,$phase,$end_phase); - - my $sth = $self->dbobj->prepare("select id,seq_start,seq_end,strand,score,analysis,name,hstart,hend,hid, evalue, perc_id, phase, end_phase ". - "from feature where contig = $id"); - - $sth->execute(); - - # bind the columns - $sth->bind_columns(undef,\$fid,\$start,\$end,\$strand,\$f_score,\$analysisid,\$name,\$hstart,\$hend,\$hid,\$evalue,\$perc_id,\$phase,\$end_phase); - - FEAT: while($sth->fetch) { - my $out; - my $analysis; - - #print STDERR "\nID $fid, START $start, END $end, STRAND $strand, SCORE $f_score, EVAL $evalue, PHASE $phase, EPHASE $end_phase, ANAL $analysisid HID $hid\n"; - - if (!$analhash{$analysisid}) { - - my $analysis_adp=Bio::EnsEMBL::DBSQL::AnalysisAdaptor->new($self->dbobj); - - eval { - $analysis = $analysis_adp->fetch_by_dbID($analysisid); - $analhash{$analysisid} = $analysis; - }; - if ($@) { - print STDERR "Error fetching analysis $analysisid. Skipping [$@]\n"; - next FEAT; - } - - } else { - $analysis = $analhash{$analysisid}; - } - - if( !defined $name ) { - $name = 'no_source'; - } elsif ($name eq "genscan" ||$name eq "fgenesh" ||$name eq "halfwise" ) { - next FEAT; - } - if( $hid ne '__NONE__' ) { - # is a paired feature - # build EnsEMBL features and make the FeaturePair - #print "making a feature pair\n"; - #print "name = ".$name." ID ".$hid."\n"; - $out = Bio::EnsEMBL::FeatureFactory->new_feature_pair(); - - - $out->set_all_fields($start,$end,$strand,$f_score,$name,'similarity',$self->id, - $hstart,$hend,1,$f_score,$name,'similarity',$hid, $evalue, $perc_id, $phase, $end_phase); - - $out->analysis ($analysis); - $out->id($fid); - # see comment below - #$out->id ($hid); # MC This is for Arek - but I don't - # really know where this method has come from. - } else { - #print "making a single feature"; - #print "name = ".$name." ID ".$hid."\n"; - $out = new Bio::EnsEMBL::SeqFeature; - $out->seqname ($self->id); - $out->start ($start); - $out->end ($end); - $out->strand ($strand); - $out->source_tag ($name); - $out->primary_tag('similarity'); - $out->id ($fid); - $out->p_value ($evalue) if (defined $evalue); - $out->percent_id ($perc_id) if (defined $perc_id); - $out->phase ($phase) if (defined $phase); - $out->end_phase ($end_phase) if (defined $end_phase); - $out->raw_seqname ($self->id); - - if( defined $f_score ) { - $out->score($f_score); - } - $out->analysis($analysis); - } - # Final check that everything is ok. - #print "gff string ".$out->gffstring."\n"; - $out->validate(); - if( $out->can('attach_seq') ) { - $out->attach_seq($self->primary_seq); - } - - push(@fps,$out); - - } - - push(@array,@fps); - - return @array; -} - -=head2 get_all_RepeatFeatures - - Title : get_all_RepeatFeatures - Usage : foreach my $sf ( $contig->get_all_RepeatFeatures ) - Function: Gets all the repeat features on a contig. - Example : - Returns : - Args : - - -=cut - -sub get_all_RepeatFeatures { - my ($self) = @_; - - my @array; - #print "getting all reapeat features\n"; - my $id = $self->internal_id(); - my $length = $self->length(); - - my %analhash; - - # make the SQL query - my $statement = "select id,seq_start,seq_end,strand,score,analysis,hstart,hend,hid " . - "from repeat_feature where contig = '$id'"; - - print "sql = ".$statement."\n"; - my $sth = $self->dbobj->prepare($statement); - - $sth->execute(); - - my ($fid,$start,$end,$strand,$score,$analysisid,$hstart,$hend,$hid); - - # bind the columns - $sth->bind_columns(undef,\$fid,\$start,\$end,\$strand,\$score,\$analysisid,\$hstart,\$hend,\$hid); - - while( $sth->fetch ) { - my $out; - my $analysis; - - if (!$analhash{$analysisid}) { - - my $analysis_adp = new Bio::EnsEMBL::DBSQL::AnalysisAdaptor($self->dbobj); - $analysis = $analysis_adp->fetch_by_dbID($analysisid); - $analhash{$analysisid} = $analysis; - - } else { - $analysis = $analhash{$analysisid}; - } - - - if( $hid ne '__NONE__' ) { - # is a paired feature - # build EnsEMBL features and make the FeaturePair - - $out = Bio::EnsEMBL::FeatureFactory->new_repeat(); - $out->set_all_fields($start,$end,$strand,$score,'repeatmasker','repeat',$self->id, - $hstart,$hend,1,$score,'repeatmasker','repeat',$hid); - - $out->analysis($analysis); - $out->id($fid); - } else { - $self->warn("Repeat feature does not have a hid. bad news...."); - } - - $out->validate(); - - push(@array,$out); - } - - return @array; -} # get_all_RepeatFeatures - -=head2 get_MarkerFeatures - - Title : get_MarkerFeatures - Usage : @fp = $contig->get_MarkerFeatures; - Function: Gets MarkerFeatures. MarkerFeatures can be asked for a Marker. - Its assumed, that when you can get MarkerFeatures, then you can - get the Map Code as well. - Example : - - Returns : - - Args : - - -=cut - -sub get_MarkerFeatures { - my ($self)=@_; - - my $id = $self->internal_id; - my @markers; - - eval { - require Bio::EnsEMBL::Map::MarkerFeature; - - my $statement="SELECT f.seq_start, f.seq_end, f.score, f.strand, f.name, - f.hstart, f.hend, f.hid, f.analysis - FROM feature f, analysis a - WHERE f.contig='$id' - AND f.analysis = a.id and a.db='mapprimer'"; - - @markers=$self->_create_MarkerFeatures($statement); - - }; - - if( $@ ) { - print STDERR ("Problems retrieving map data. Most likely not connected to maps db\n$@\n" ); - } - - return @markers; -} # get_MarkerFeatures - - -=head2 get_landmark_MarkerFeatures - - Title : get_landmark_MarkerFeatures - Usage : @fp = $contig->get_landmark_MarkerFeatures; - Function: Gets MarkerFeatures with identifiers like D8S509. - MarkerFeatures can be asked for a Marker. - Its assumed, that when you can get MarkerFeatures, then you can - get the Map Code as well. - Example : - - Returns : - - Args : - - -=cut - - -sub get_landmark_MarkerFeatures { - my ($self)=@_; - - - my $dbname=$self->dbobj->dbname; - my $mapsdbname=$self->dbobj->mapdbname; - - my $id = $self->internal_id; - my @markers; - - - eval { - require Bio::EnsEMBL::Map::MarkerFeature; - - my $statement="SELECT f.seq_start, f.seq_end, f.score, f.strand, f.name, - f.hstart, f.hend, s.name, f.analysis - FROM $dbname.feature f, $dbname.analysis a, - $mapsdbname.MarkerSynonym s,$mapsdbname.Marker m - WHERE f.contig='$id' - AND f.analysis = a.id - AND a.db='mapprimer' - AND m.marker=s.marker - AND f.hid=m.marker - AND s.name regexp '^D[0-9,X,Y][0-9]?S'"; - - @markers=$self->_create_MarkerFeatures($statement); - - }; - - if( $@ ) { - print STDERR ("Problems retrieving map data. Most likely not connected to maps db\n$@\n" ); - } - - return @markers; -} # get_landmark_MarkerFeatures - - - -sub _create_MarkerFeatures -{ -my ($self,$statement)=@_; - -my @result; -my $analysis; -my %analhash; - - -my $sth = $self->dbobj->prepare($statement); -$sth->execute; - -my ($start, $end, $score, $strand, $hstart, - $name, $hend, $hid, $analysisid ); - - -$sth->bind_columns - ( undef, \$start, \$end, \$score, \$strand, \$name, - \$hstart, \$hend, \$hid, \$analysisid); - - -while( $sth->fetch ) { - - my ( $out, $seqf1, $seqf2 ); - - if (!$analhash{$analysisid}) { - - my $analysis_adp = new Bio::EnsEMBL::DBSQL::AnalysisAdaptor($self->dbobj); - $analysis = $analysis_adp->fetch_by_dbID($analysisid); - $analhash{$analysisid} = $analysis; - - } else { - $analysis = $analhash{$analysisid}; - } - - $seqf1 = Bio::EnsEMBL::SeqFeature->new(); - $seqf2 = Bio::EnsEMBL::SeqFeature->new(); - $out = Bio::EnsEMBL::Map::MarkerFeature->new - ( -feature1 => $seqf1, -feature2 => $seqf2 ); - $out->set_all_fields - ( $start,$end,$strand,$score, - $name,'similarity',$self->id, - $hstart,$hend,1,$score,$name,'similarity',$hid); - $out->analysis($analysis); - $out->mapdb( $self->dbobj->mapdb ); - $out->id ($hid); - - push( @result, $out ); -} - -return @result; - -} # _create_MarkerFeatures - -=head2 get_all_PredictionFeatures - - Title : get_all_PredictionFeatures - Usage : foreach my $sf ( $contig->get_all_RepeatFeatures ) - Function: Gets all the repeat features on a contig. - Example : - Returns : - Args : - - -=cut - -sub get_all_PredictionFeatures { - my ($self) = @_; - - my @array; - - my $id = $self->internal_id(); - my $length = $self->length(); - my $fsetid; - my $previous; - my %analhash; - - # make the SQL query - - - my $query = "select f.id,f.seq_start,f.seq_end,f.strand,f.score,f.evalue,f.perc_id,f.phase,f.end_phase,f.analysis,f.hid,f.name ". - "from feature f where contig = $id and name in ('genscan', 'fgenesh') order by name, hid"; - - - my $sth = $self->dbobj->prepare($query); - - $sth->execute(); - - my ($fid,$start,$end,$strand,$score,$evalue,$perc_id,$phase,$end_phase,$analysisid,$hid, $name); - - # bind the columns - $sth->bind_columns(undef,\$fid,\$start,\$end,\$strand,\$score,\$evalue,\$perc_id,\$phase,\$end_phase,\$analysisid,\$hid,\$name); - - $previous = -1; - my $current_fset; - my $fsetstart; - my $count=1; - my $prev; - - while( $sth->fetch ) { - my $out; - - print STDERR "PHASE $phase\n"; - - my $analysis; - - if (!$analhash{$analysisid}) { - - my $analysis_adp = new Bio::EnsEMBL::DBSQL::AnalysisAdaptor($self->dbobj); - $analysis = $analysis_adp->fetch_by_dbID($analysisid); - $analhash{$analysisid} = $analysis; - - - } else { - $analysis = $analhash{$analysisid}; - } - # Oh boyoboy. Yet another genscan hack to avoid duplicate genscans - if (defined($prev) && $start == $prev->start && $end == $prev->end) { - next; - } - #MC. This has been temporarily changed back to the old way of genscans - if( $hid =~ /Initial/ || $hid =~ /Single Exon/ || $previous =~ /Single/ || $previous =~ /Terminal/ || $previous eq -1 ) { - #if( $hid ne $previous || $previous eq -1 ) { - $current_fset = new Bio::EnsEMBL::SeqFeature; - $current_fset->source_tag($name); - $current_fset->primary_tag('prediction'); - $current_fset->analysis($analysis); - $current_fset->seqname($self->id); - $current_fset->id($count); - $current_fset->score(0.0); - $count++; - $current_fset->raw_seqname($self->id); - $fsetstart = $start; - push(@array,$current_fset); - } - - $out = new Bio::EnsEMBL::SeqFeature; - - $out->seqname ($self->id); - $out->raw_seqname($self->id); - - $out->start ($start); - $out->end ($end); - $out->strand ($strand); - $out->score ($score) if (defined $score); - $out->p_value ($evalue) if (defined $evalue); - $out->percent_id($perc_id) if (defined $perc_id); - $out->phase ($phase) if (defined $phase); - $out->end_phase ($end_phase) if (defined $end_phase); - - - my $query="select fset from fset_feature where feature=$fid"; - my $sth = $self->dbobj->prepare($query); - $sth->execute(); - -# $fsetid=$arr_ref->[0]; - my $fsetid = $self->internal_id . "." . $fsetstart; - $out->id($fsetid); # to make genscan peptide work - #print STDERR "\t\t===> get_pred_features fsetid: $fsetid\n"; - $out->source_tag($name); - $out->primary_tag('prediction'); - - if( defined $score ) { - $out->score($score); - } - - $out->analysis($analysis); - - # Final check that everything is ok. - - $out->validate(); - - $current_fset->add_sub_SeqFeature($out,'EXPAND'); - $current_fset->strand($strand); - $previous = $hid; - $prev = $out; - - } - - return @array; -} - - -=head2 get_genscan_peptides - - Title : get_genscan_peptides - Usage : - Function: Returns genscan predictions as peptides - Example : - Returns : - Args : - - -=cut - -#have written this to use the new phase and end_phase tag in SeqFeature -#Therefore this won't work with the older features, after all the old system was pretty ropey. -sub get_genscan_peptides { - my ($self) = @_; - my @transcripts; - foreach my $fset ($self->get_all_PredictionFeatures) { - my $trans = &Bio::EnsEMBL::DBSQL::Utils::fset2transcript($fset,$self); - push(@transcripts,$trans); - } - - return @transcripts; -} - -=head2 get_all_ExternalFeatures - - Title : get_all_ExternalFeatures - Usage : - Function: - Example : - Returns : - Args : - - -=cut - -sub get_all_ExternalFeatures{ - my ($self) = @_; - - my @out; - my $acc; - - $acc = $self->cloneid(); - - my $embl_offset = $self->embl_offset(); - - foreach my $extf ( $self->dbobj->_each_ExternalFeatureFactory ) { - - if( $extf->can('get_Ensembl_SeqFeatures_contig') ) { - - my @tmp = $extf->get_Ensembl_SeqFeatures_contig($self->internal_id, - $self->seq_version, - 1, - $self->length, - $self->id); - - push(@out,@tmp); - } - if( $extf->can('get_Ensembl_SeqFeatures_clone') ) { - - foreach my $sf ( $extf->get_Ensembl_SeqFeatures_clone($acc,$self->seq_version,$self->embl_offset,$self->embl_offset+$self->length()) ) { - - my $start = $sf->start - $embl_offset+1; - my $end = $sf->end - $embl_offset+1; - $sf->start($start); - $sf->end($end); - push(@out,$sf); - } - } - } - my $id = $self->id(); - foreach my $f ( @out ) { - $f->seqname($id); - } - - return @out; - -} # get_all_ExternalFeatures - - -=head2 get_all_ExternalGenes - - Title : get_all_ExternalGenes - Usage : - Function: - Example : - Returns : - Args : - - -=cut - -sub get_all_ExternalGenes { - my ($self) = @_; - my @out; - my $acc; - - $acc = $self->cloneid(); - my $embl_offset = $self->embl_offset(); - - foreach my $extf ( $self->dbobj->_each_ExternalFeatureFactory ) { - if( $extf->can('get_Ensembl_Genes_clone') ) { - my @genes = $extf->get_Ensembl_Genes_clone($acc); - foreach my $gene (@genes){ - foreach my $exon ( $gene->all_Exon_objects ) { - $exon->start($exon->start - $embl_offset+1); - $exon->end($exon->end - $embl_offset+1); - } - push(@out,@genes); - } - - - } - } - - return @out; -} - - -=head2 length - - Title : length - Usage : - Function: - Example : - Returns : - Args : - - -=cut - -sub length{ - my ($self,$length) = @_; - - if( defined $length ) { - $self->{'_length'} = $length; - return $length; - } - - my $id= $self->internal_id(); - $self->throw("Internal ID not set") unless $id; - if (! defined ($self->{'_length'})) { - my $sth = $self->dbobj->prepare("select length from contig where internal_id = \"$id\" "); - $sth->execute(); - - my $rowhash = $sth->fetchrow_hashref(); - - $self->{'_length'} = $rowhash->{'length'}; - } - - return $self->{'_length'}; -} - -sub cloneid { - my ($self,$arg) = @_; - - if (defined($arg)) { - $self->{_cloneid} = $arg; - } - - return $self->{_cloneid}; -} - -=head2 old_chromosome - - Title : chromosome - Usage : $chr = $contig->chromosome( [$chromosome] ) - Function: get/set the chromosome of the contig. - Example : - Returns : the chromsome object - Args : - -=cut - -sub old_chromosome { - my ($self,$chromosome ) = @_; - my $id= $self->internal_id(); - - if( defined( $chromosome )) { - $self->{_chromosome} = $chromosome; - } else { - if (! defined ($self->{_chromosome})) { - my $sth = $self->dbobj->prepare("select chromosomeId from contig where internal_id = \"$id\" "); - $sth->execute(); - - my $rowhash = $sth->fetchrow_hashref(); - my $chrId = $rowhash->{'chromosomeId'}; - $self->{_chromosome} = Bio::EnsEMBL::Chromosome->get_by_id - ( $chrId ); - } - } - return $self->{_chromosome}; -} - -=head2 seq_version - - Title : seq_version - Usage : $obj->seq_version($newval) - Function: - Example : - Returns : value of seq_version - Args : newvalue (optional) - - -=cut - -sub seq_version{ - my ($obj,$value) = @_; - if( defined $value) { - $obj->{'seq_version'} = $value; - } - return $obj->{'seq_version'}; - -} - -=head2 embl_order - - Title : order - Usage : $obj->embl_order - Function: - Returns : - Args : - - -=cut - -sub embl_order{ - my $self = shift; - my $id = $self->id(); - my $sth = $self->dbobj->prepare("select corder from contig where id = \"$id\" "); - $sth->execute(); - my $rowhash = $sth->fetchrow_hashref(); - return $rowhash->{'corder'}; - -} - - - - -=head2 embl_offset - - Title : embl_offset - Usage : - Returns : - Args : - - -=cut - -sub embl_offset{ - my ( $self, $arg ) = @_; - my $id = $self->id(); - if( defined $arg ) { - $self->{_embl_offset} = $arg; - return; - } - - if( defined $self->{_embl_offset} ) { - return $self->{_embl_offset}; - } else { - my $sth = $self->dbobj->prepare("select offset from contig where id = \"$id\" "); - $sth->execute(); - my $rowhash = $sth->fetchrow_hashref(); - $self->{_embl_offset} = $rowhash->{'offset'}; - return $self->{_embl_offset}; - } -} - -=head2 embl_accession - - Title : embl_accession - Usage : - Function: - Example : - Returns : - Args : - - -=cut - -sub embl_accession{ - my $self = shift; - - return "AL000000"; -} - - -=head2 id - - Title : id - Usage : $obj->id($newval) - Function: - Example : - Returns : value of id - Args : newvalue (optional) - - -=cut - -sub id { - my ($self,$value) = @_; - if( defined $value) { - $self->{'id'} = $value; - } - return $self->{'id'}; - -} - -=head2 internal_id - - Title : internal_id - Usage : $obj->internal_id($newval) - Function: - Example : - Returns : value of database internal id - Args : newvalue (optional) - -=cut - -sub internal_id { - my ($self,$value) = @_; - if( defined $value) { - $self->{'internal_id'} = $value; - } - return $self->{'internal_id'}; - -} - -=head2 dna_id - - Title : dna_id - Usage : $obj->dna_id($newval) - Function: Get or set the id for this contig in the dna table - Example : - Returns : value of dna id - Args : newvalue (optional) - - -=cut - -sub dna_id { - my ($self,$value) = @_; - if( defined $value) { - $self->{'dna_id'} = $value; - } - return $self->{'dna_id'}; - -} - - -=head2 seq_date - - Title : seq_date - Usage : $contig->seq_date() - Function: Gives the unix time value of the dna table created datetime field, which indicates - the original time of the dna sequence data - Example : $contig->seq_date() - Returns : unix time - Args : none - - -=cut - -sub seq_date { - my ($self) = @_; - - my $id = $self->internal_id(); - my $query = "select UNIX_TIMESTAMP(d.created) from dna as d,contig as c where c.internal_id = $id and c.dna = d.id"; - my $sth = $self->dbobj->dnadb->prepare($query); - $sth->execute(); - my $rowhash = $sth->fetchrow_hashref(); - return $rowhash->{'UNIX_TIMESTAMP(d.created)'}; -} - - - - -sub _db_obj { - my ($self,@args) = @_; - $self->warn("Someone is using a deprecated _db_obj call!"); - return $self->dbobj(@args); -} - -=head2 dbobj - - Title : dbobj - Usage : - Function: - Example : - Returns : The Bio::EnsEMBL::DBSQL::ObjI object - Args : - - -=cut - -sub dbobj { - my ($self,$arg) = @_; - - if (defined($arg)) { - $self->throw("[$arg] is not a Bio::EnsEMBL::DBSQL::Obj") unless ($arg->isa("Bio::EnsEMBL::DBSQL::Obj") || $arg->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')); - $self->{'_dbobj'} = $arg; - } - return $self->{'_dbobj'}; -} - -=head2 crossdb - - Title : crossdb - Usage : - Function: - Example : - Returns : The Bio::EnsEMBL::DBSQL::CrossMatchAdaptor object - Args : - - -=cut - -sub _crossdb { - my ($self,$arg) = @_; - - return $self->dbobj->_crossdb; -} - - - - -# -# Static golden path tables -# - - -=head2 chromosome - - Title : chromosome - Usage : $self->chromosome($newval) - Function: - Returns : value of chromosome - Args : - - -=cut - -sub chromosome{ - my $self = shift; - - if( defined $self->_chromosome) { return $self->_chromosome;} - - my $id = $self->internal_id; - my $type = $self->dbobj->static_golden_path_type(); - my $sth = $self->dbobj->prepare("select chr_name from static_golden_path where raw_id = $id and type = '$type'"); - $sth->execute; - my ($value) = $sth->fetchrow_array(); - if( !defined $value) { return undef; } - $self->_chromosome($value); - return $value; - -} - -=head2 _chromosome - - Title : chromosome - Usage : $self->_chromosome($newval) - Function: - Returns : value of _chromosome - Args : newvalue (optional) - - -=cut - -sub _chromosome{ - my $self = shift; - if( @_ ) { - my $value = shift; - $self->{'_chromosome'} = $value; - } - return $self->{'_chromosome'}; - -} - -=head2 fpc_contig_name - - Title : fpc_contig_name - Usage : $self->fpc_contig_name() - Function: - Returns : value of fpc_contig - Args : - - -=cut - -sub fpc_contig_name { - my $self = shift; - - if( defined $self->_fpc_contig) { return $self->_fpc_contig;} - - my $id = $self->internal_id; - my $type = $self->dbobj->static_golden_path_type(); - my $sth = $self->dbobj->prepare("select fpcctg_name from static_golden_path where raw_id = $id and type = '$type'"); - $sth->execute; - my ($value) = $sth->fetchrow_array(); - if( !defined $value) { return undef; } - $self->_fpc_contig($value); - return $value; - - -} - -=head2 _fpc_contig - - Title : fpc_contig - Usage : $self->_fpc_contig($newval) - Function: - Returns : value of _fpc_contig - Args : newvalue (optional) - - -=cut - -sub _fpc_contig{ - my $self = shift; - if( @_ ) { - my $value = shift; - $self->{'_fpc_contig'} = $value; - } - return $self->{'_fpc_contig'}; - -} - -=head2 chr_start - - Title : chr_start - Usage : $self->chr_start($newval) - Function: - Returns : value of chr_start - Args : - - -=cut - -sub chr_start{ - my $self = shift; - - if( defined $self->_chr_start) { return $self->_chr_start;} - - my $id = $self->internal_id; - my $type = $self->dbobj->static_golden_path_type(); - my $sth = $self->dbobj->prepare("select chr_start from static_golden_path where raw_id = $id and type = '$type'"); - $sth->execute; - my ($value) = $sth->fetchrow_array(); - if( !defined $value) { return undef; } - $self->_chromosome($value); - return $value; - - -} - -=head2 _chr_start - - Title : chr_start - Usage : $self->_chr_start($newval) - Function: - Returns : value of _chr_start - Args : newvalue (optional) - - -=cut - -sub _chr_start{ - my $self = shift; - if( @_ ) { - my $value = shift; - $self->{'_chr_start'} = $value; - } - return $self->{'_chr_start'}; - -} - -=head2 chr_end - - Title : chr_end - Usage : $self->chr_end($newval) - Function: - Returns : value of chr_end - Args : - - -=cut - -sub chr_end{ - my $self = shift; - - if( defined $self->_chr_end) { return $self->_chr_end;} - - my $id = $self->internal_id; - my $type = $self->dbobj->static_golden_path_type(); - my $sth = $self->dbobj->prepare("select chr_end from static_golden_path where raw_id = $id and type = '$type'"); - $sth->execute; - my ($value) = $sth->fetchrow_array(); - if( !defined $value) { return undef; } - $self->_chr_end($value); - return $value; - -} - -=head2 _chr_end - - Title : chr_end - Usage : $self->_chr_end($newval) - Function: - Returns : value of _chr_end - Args : newvalue (optional) - - -=cut - -sub _chr_end{ - my $self = shift; - if( @_ ) { - my $value = shift; - $self->{'_chr_end'} = $value; - } - return $self->{'_chr_end'}; - -} - - -=head2 fpc_contig_start - - Title : fpc_contig_start - Usage : $self->fpc_contig_start($newval) - Function: - Returns : value of fpc_contig_start - Args : - - -=cut - -sub fpc_contig_start { - my $self = shift; - - if( defined $self->_fpc_contig_start) { return $self->_fpc_contig_start;} - - my $id = $self->internal_id; - my $type = $self->dbobj->static_golden_path_type(); - my $sth = $self->dbobj->prepare("select fpcctg_start from static_golden_path where raw_id = $id and type = '$type'"); - $sth->execute; - my ($value) = $sth->fetchrow_array(); - if( !defined $value) { return undef; } - $self->_chromosome($value); - return $value; - - -} - -=head2 _fpc_contig_start - - Title : fpc_contig_start - Usage : $self->_fpc_contig_start($newval) - Function: - Returns : value of _fpc_contig_start - Args : newvalue (optional) - - -=cut - -sub _fpc_contig_start{ - my $self = shift; - if( @_ ) { - my $value = shift; - $self->{'_fpc_contig_start'} = $value; - } - return $self->{'_fpc_contig_start'}; - -} - -=head2 fpc_contig_end - - Title : fpc_contig_end - Usage : $self->fpc_contig_end($newval) - Function: - Returns : value of fpc_contig_end - Args : - - -=cut - -sub fpc_contig_end{ - my $self = shift; - - if( defined $self->_fpc_contig_end) { return $self->_fpc_contig_end;} - - my $id = $self->internal_id; - my $type = $self->dbobj->static_golden_path_type(); - my $sth = $self->dbobj->prepare("select fpcctg_end from static_golden_path where raw_id = $id and type = '$type'"); - $sth->execute; - my ($value) = $sth->fetchrow_array(); - if( !defined $value) { return undef; } - $self->_fpc_contig_end($value); - return $value; - -} - -=head2 _fpc_contig_end - - Title : fpc_contig_end - Usage : $self->_fpc_contig_end($newval) - Function: - Returns : value of _fpc_contig_end - Args : newvalue (optional) - - -=cut - -sub _fpc_contig_end{ - my $self = shift; - if( @_ ) { - my $value = shift; - $self->{'_fpc_contig_end'} = $value; - } - return $self->{'_fpc_contig_end'}; - -} - - - -=head2 static_golden_start - - Title : static_golden_start - Usage : $self->static_golden_start($newval) - Function: - Returns : value of static_golden_start (in RawContig coordinates) - Args : - - -=cut - -sub static_golden_start{ - my ($self,$static_golden_start) = @_; - - if( defined $static_golden_start ) { - $self->{'_static_golden_start'} = $static_golden_start; - return $static_golden_start; - } - - my $id= $self->internal_id(); - $self->throw("Internal ID not set") unless $id; - - my $type = $self->dbobj->static_golden_path_type(); - if (! defined ($self->{'_static_golden_start'})) { - my $sth = $self->dbobj->prepare("select raw_start from static_golden_path where raw_id = $id and type = '$type'"); - $sth->execute(); - - my $rowhash = $sth->fetchrow_hashref(); - - $self->{'_static_golden_start'} = $rowhash->{'raw_start'}; - } - return $self->{'_static_golden_start'}; -} - -=head2 static_golden_end - - Title : static_golden_end - Usage : $self->static_golden_end($newval) - Function: - Returns : value of static_golden_end (in RawContig coordinates) - Args : - - -=cut - -sub static_golden_end{ - my ($self,$static_golden_end) = @_; - - if( defined $static_golden_end ) { - $self->{'_static_golden_end'} = $static_golden_end; - return $static_golden_end; - } - - my $id= $self->internal_id(); - $self->throw("Internal ID not set") unless $id; - - my $type = $self->dbobj->static_golden_path_type(); - if (! defined ($self->{'_static_golden_end'})) { - my $sth = $self->dbobj->prepare("select raw_end from static_golden_path where raw_id = $id and type = '$type'"); - $sth->execute(); - - my $rowhash = $sth->fetchrow_hashref(); - - $self->{'_static_golden_end'} = $rowhash->{'raw_end'}; - } - return $self->{'_static_golden_end'}; -} - -=head2 static_golden_ori - - Title : static_golden_ori - Usage : $self->static_golden_ori($newval) - Function: - Returns : value of static_golden_ori - Args : - - -=cut - -sub static_golden_ori{ - my ($self,$static_golden_ori) = @_; - - if( defined $static_golden_ori ) { - $self->{'_static_golden_ori'} = $static_golden_ori; - return $static_golden_ori; - } - - my $id= $self->internal_id(); - $self->throw("Internal ID not set") unless $id; - - my $type = $self->dbobj->static_golden_path_type(); - if (! defined ($self->{'_static_golden_ori'})) { - my $sth = $self->dbobj->prepare("select raw_ori from static_golden_path where raw_id = $id and type = '$type'"); - $sth->execute(); - - my $rowhash = $sth->fetchrow_hashref(); - - $self->{'_static_golden_ori'} = $rowhash->{'raw_ori'}; - } - return $self->{'_static_golden_ori'}; -} - - -=head2 static_golden_type - - Title : static_golden_type - Usage : $self->static_golden_type($newval) - Function: - Returns : value of static_golden_type - Args : - - -=cut - -sub static_golden_type{ - my $self = shift; - - return $self->dbobj->static_golden_path_type(); - -} - - -=head2 is_static_golden - - Title : is_static_golden - Usage : - Function: - Example : - Returns : - Args : - - -=cut - -sub is_static_golden{ - my ($self,@args) = @_; - - if( defined $self->fpc_contig_name ) { - return 1; - } - -} - - -=head2 perl_only_sequences - - Title : perl_only_sequences - Usage : $obj->perl_only_sequences($newval) - Function: - Returns : value of perl_only_sequences - Args : newvalue (optional) - - -=cut - -sub perl_only_sequences{ - my $obj = shift; - if( @_ ) { - my $value = shift; - $obj->{'perl_only_sequences'} = $value; - } - return $obj->{'perl_only_sequences'}; - -} - -=head2 use_rawcontig_acc - - Title : use_rawcontig_acc - Usage : $obj->use_rawcontig_acc($newval) - Function: - Returns : value of use_rawcontig_acc - Args : newvalue (optional) - - -=cut - -sub use_rawcontig_acc{ - my $obj = shift; - if( @_ ) { - my $value = shift; - $obj->{'use_rawcontig_acc'} = $value; - } - return $obj->{'use_rawcontig_acc'}; - -} - -=head2 overlap_distance_cutoff - - Title : overlap_distance_cutoff - Usage : my $cutoff = $contig->overlap_distance_cutoff() - Function: Gets or sets an integer which is used when building - VirtualContigs. If the distance in a contig overlap - is greater than the cutoff, then the overlap will - not be returned. - Returns : value of overlap_distance_cutoff - Args : positive integer - - -=cut - - -sub overlap_distance_cutoff { - my( $self, $cutoff ) = @_; - - if (defined $cutoff) { - if( $cutoff !~ /^\d+$/ && $cutoff != -1 ) { - $self->throw("'$cutoff' is not an positive integer"); - } - $self->{'_overlap_distance_cutoff'} = $cutoff; - } - return $self->{'_overlap_distance_cutoff'}; -} - - -sub is_golden { - my $self = shift; - - if( defined $self->get_left_overlap || defined $self->get_right_overlap ) { - return 1; - } - return 0; -} - - -=head2 unmapped_exons - - Title : unmapped_exons - Usage : $obj->unmapped_exons($newval) - Function: Getset for unmapped_exons value - Returns : value of unmapped_exons - Args : newvalue (optional) - - -=cut - -sub unmapped_exons{ - my $obj = shift; - if( @_ ) { - my $value = shift; - $obj->{'unmapped_exons'} = $value; - } - return $obj->{'unmapped_exons'}; - -} - - -=head2 SeqI implementing functions - -=head2 species - - Title : species - Usage : - Function: - Example : - Returns : - Args : - - -=cut - -sub species{ - my ($self,@args) = @_; - - return undef; -} - - -# AS: on the way to contexts -# we need temporarily subseq -sub subseq { - my ( $self, $start, $end ) = @_; - my $length = $end-$start+1; - my $id = $self->dna_id(); - my $sth = $self->dbobj->dnadb->prepare(" - SELECT SUBSTRING(sequence,$start,$length) - FROM dna - WHERE id = $id - "); - $sth->execute(); - my ($value) = $sth->fetchrow_array(); - return $value; -} - - - -1; diff --git a/modules/Bio/EnsEMBL/DBSQL/SymmetricContigFeatureContainer.pm b/modules/Bio/EnsEMBL/DBSQL/SymmetricContigFeatureContainer.pm deleted file mode 100755 index 6f30dcbbfb..0000000000 --- a/modules/Bio/EnsEMBL/DBSQL/SymmetricContigFeatureContainer.pm +++ /dev/null @@ -1,147 +0,0 @@ - -# -# Ensembl module for Bio::EnsEMBL::DBSQL::SymmetricContigFeatureContainer -# -# Cared for by Ewan Birney <birney@ebi.ac.uk> -# -# Copyright Ewan Birney -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::EnsEMBL::DBSQL::SymmetricContigFeatureContainer - Binds to SymmetricContigFeature table - -=head1 SYNOPSIS - -Give standard usage here - -=head1 DESCRIPTION - -This module is a container for symmetric contig feature pairs, i.e. pairs of features between contigs that have identical sequence in two versions of a database. The pairs are stored symmetrically, i.e. each feature on each contig is stored in the contig_feature table, and each pair is stored with an id in a separate table. - -The method which fetches teh feature pairs breaks the symmetry by asking for all the feature pairs with a certain version of the clone (on which the contig is sitting). The crosmmatching at the moment relies on the sv version of the clones. - -=head1 AUTHOR - Ewan Birney - -This module is part of the Ensembl project http://www.ensembl.org - -Email birney@ebi.ac.uk - -Describe contact details here - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - - -package Bio::EnsEMBL::DBSQL::SymmetricContigFeatureContainer; -use vars qw(@ISA); -use strict; - -# Object preamble - inherits from Bio::Root::RootI - - -use Bio::EnsEMBL::DBSQL::BaseAdaptor; -use Bio::EnsEMBL::FeatureFactory; - -@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor); - -=head2 get_FeaturePair_list_by_rawcontig_id - - Title : get_FeaturePair_list_by_rawcontig_id - Usage : $scfc->get_FeaturePair_list_by_rawcontig_id($rid,15) - Function: gets all the feature pairs for a specific rawcontig id - and clone version - Example : $scfc->get_FeaturePair_list_by_rawcontig_id('AC000043.12',5) - Returns : array of Bio::EnsEMBL::FeaturePair - Args : id of the rawcontig,sv version of the clone it is on - - -=cut - -sub get_FeaturePair_list_by_rawcontig_id{ - my ($self,$id,$version) = @_; - - if( !defined $id ) { - $self->throw("Must have a raw contig id"); - } - if( !defined $version ) { - $self->throw("Must have a raw contig version"); - } - - my $sth = $self->prepare("select a.seq_start,a.seq_end,a.strand,b.seq_start,b.seq_end,b.strand,b.rawcontigid,p.score from symmetric_contig_feature a, symmetric_contig_pair_hit p,symmetric_contig_feature b where a.symchid = p.symchid and p.symchid = b.symchid and a.symcfid != b.symcfid and a.rawcontigid = '$id' and a.rawversion=$version"); - - - $sth->execute || $self->throw("Unable to retrieve versions!"); - my @out; - - while( my $aref = $sth->fetchrow_arrayref ) { - my ($start,$end,$strand,$hstart,$hend,$hstrand,$hname,$score) = @{$aref}; - my $out = Bio::EnsEMBL::FeatureFactory->new_feature_pair(); - $out->set_all_fields($start,$end,$strand,$score,$id,'symmetric',$id, - $hstart,$hend,$hstrand,$score,$hname,'symmetric',$hname); - push(@out,$out); - } - - return @out; -} - - -=head2 write_FeaturePair_List - - Title : write_FeaturePair_List - Usage : $scfc->write_FeaturePair_List(@fp) - Function: Writes an array of feature pairs to the db - Example : $scfc0>write_FeaturePair_List(@fp) - Returns : nothing - Args : array of Bio::EnsEMBL::FeaturePair - - -=cut - -sub write_FeaturePair_List{ - my ($self,@fp) = @_; - - foreach my $fp ( @fp ) { - my $score = $fp->score; - my $sth = $self->prepare("INSERT INTO symmetric_contig_pair_hit (symchid,score) VALUES (NULL,$score)"); - $sth->execute; - $sth = $self->prepare("select LAST_INSERT_ID()"); - $sth->execute; - my ($hitid) = $sth->fetchrow_array; - - my $seqname = $fp->feature1->seqname; - print STDERR "Parsing $seqname\n"; - $seqname =~ /(\w+)\.(\d+)\.(\S+)/ || $self->throw("Feature pair name does not conform to acc.version.number sequence"); - my $version = $2; - my $contigid = "$1.$3"; - print STDERR "Got $contigid\n"; - my $clone=$1; - $sth = $self->prepare("INSERT INTO symmetric_contig_feature (symcfid,symchid,rawcontigid,rawversion,clone,seq_start,seq_end,strand) VALUES (NULL,$hitid,'".$contigid."',".$version.",'".$clone."',".$fp->feature1->start.",".$fp->feature1->end.",".$fp->feature1->strand.")"); - $sth->execute; - - $seqname = $fp->feature2->seqname; - print STDERR "And $seqname\n"; - $seqname =~ /(\w+)\.(\d+)\.(\S+)/ || $self->throw("Feature pair name does not conform to acc.version.number sequence"); - $version = $2; - $contigid = "$1.$3"; - print STDERR "Got $contigid\n"; - $clone=$1; - $sth = $self->prepare("INSERT INTO symmetric_contig_feature (symcfid,symchid,rawcontigid,rawversion,clone,seq_start,seq_end,strand) VALUES (NULL,$hitid,'".$contigid."',".$version.",'".$clone."',".$fp->feature2->start.",".$fp->feature2->end.",".$fp->feature2->strand.")"); - $sth->execute; - } - -} - - -1; - - diff --git a/modules/Bio/EnsEMBL/RawContig.pm b/modules/Bio/EnsEMBL/RawContig.pm index d3a1f57f84..bb35922e6d 100644 --- a/modules/Bio/EnsEMBL/RawContig.pm +++ b/modules/Bio/EnsEMBL/RawContig.pm @@ -78,6 +78,9 @@ sub new { ( $international_name ); } + + + # things to take over from DBSQL/RawContig ? # fetch - done in DBSQL/RawContigAdaptor # get_all_Genes - should be in GeneAdaptor -- GitLab