From 75b70e78a7693a78e201394e17deb4eea081346f Mon Sep 17 00:00:00 2001 From: Kevin Howe <klh@ebi.ac.uk> Date: Thu, 8 Mar 2007 17:17:53 +0000 Subject: [PATCH] On storage of supporting features, check that the feature isn't already present, and reuse it if it is. --- modules/Bio/EnsEMBL/DBSQL/ExonAdaptor.pm | 35 +---- .../EnsEMBL/DBSQL/SupportingFeatureAdaptor.pm | 141 +++++++++++++++++ .../Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm | 36 +---- .../TranscriptSupportingFeatureAdaptor.pm | 142 ++++++++++++++++++ 4 files changed, 287 insertions(+), 67 deletions(-) diff --git a/modules/Bio/EnsEMBL/DBSQL/ExonAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/ExonAdaptor.pm index c258ce69c5..6673f3d56b 100644 --- a/modules/Bio/EnsEMBL/DBSQL/ExonAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/ExonAdaptor.pm @@ -299,39 +299,8 @@ sub store { } # Now the supporting evidence - # should be stored from featureAdaptor - my $sql = "insert into supporting_feature (exon_id, feature_id, feature_type) - values(?, ?, ?)"; - - my $sf_sth = $self->prepare($sql); - - my $anaAdaptor = $self->db->get_AnalysisAdaptor(); - my $dna_adaptor = $self->db->get_DnaAlignFeatureAdaptor(); - my $pep_adaptor = $self->db->get_ProteinAlignFeatureAdaptor(); - my $type; - - foreach my $sf (@{$exon->get_all_supporting_features}) { - if(!$sf->isa("Bio::EnsEMBL::BaseAlignFeature")){ - throw("$sf must be an align feature otherwise" . - "it can't be stored"); - } - - if($sf->isa("Bio::EnsEMBL::DnaDnaAlignFeature")){ - $dna_adaptor->store($sf); - $type = 'dna_align_feature'; - }elsif($sf->isa("Bio::EnsEMBL::DnaPepAlignFeature")){ - $pep_adaptor->store($sf); - $type = 'protein_align_feature'; - } else { - warning("Supporting feature of unknown type. Skipping : [$sf]\n"); - next; - } - - $sf_sth->bind_param(1, $exonId, SQL_INTEGER); - $sf_sth->bind_param(2, $sf->dbID, SQL_INTEGER); - $sf_sth->bind_param(3, $type, SQL_VARCHAR); - $sf_sth->execute(); - } + my $esf_adaptor = $db->get_SupportingFeatureAdaptor; + $esf_adaptor->store($exonId, $exon->get_all_supporting_features); # # Finally, update the dbID and adaptor of the exon (and any component exons) diff --git a/modules/Bio/EnsEMBL/DBSQL/SupportingFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/SupportingFeatureAdaptor.pm index 53668130b1..0072171b33 100644 --- a/modules/Bio/EnsEMBL/DBSQL/SupportingFeatureAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/SupportingFeatureAdaptor.pm @@ -95,5 +95,146 @@ sub fetch_all_by_Exon { return $out; } +=head2 store + Arg [2] : Int $transID + The dbID of an EnsEMBL transcript to associate with supporting + features + Arg [1] : Ref to array of Bio::EnsEMBL::BaseAlignFeature (the support) + Example : $sfa->store($exon_id, \@features); + Description: Stores a set of alignment features and associates an EnsEMBL transcript + with them + Returntype : none + Exceptions : thrown when invalid dbID is passed to this method + Caller : TranscriptAdaptor + Status : Stable + +=cut + +sub store { + my ( $self, $exon_dbID, $aln_objs ) = @_; + + my $pep_check_sql = + "SELECT protein_align_feature_id " . + "FROM protein_align_feature " . + "WHERE seq_region_id = ? " . + "AND seq_region_start = ? " . + "AND seq_region_end = ? " . + "AND seq_region_strand = ? " . + "AND hit_name = ? " . + "AND hit_start = ? " . + "AND hit_end = ? " . + "AND analysis_id = ? " . + "AND cigar_line = ? "; + + my $dna_check_sql = + "SELECT dna_align_feature_id " . + "FROM dna_align_feature " . + "WHERE seq_region_id = ? " . + "AND seq_region_start = ? " . + "AND seq_region_end = ? " . + "AND seq_region_strand = ? " . + "AND hit_name = ? " . + "AND hit_start = ? " . + "AND hit_end = ? " . + "AND analysis_id = ? " . + "AND cigar_line = ? " . + "AND hit_strand = ? "; + + my $assoc_check_sql = + "SELECT * " . + "FROM supporting_feature " . + "WHERE exon_id = $exon_dbID " . + "AND feature_type = ? " . + "AND feature_id = ? "; + + my $assoc_write_sql = "INSERT into supporting_feature " . + "(exon_id, feature_id, feature_type) " . + "values(?, ?, ?)"; + + my $pep_check_sth = $self->prepare($pep_check_sql); + my $dna_check_sth = $self->prepare($dna_check_sql); + my $assoc_check_sth = $self->prepare($assoc_check_sql); + my $sf_sth = $self->prepare($assoc_write_sql); + + my $dna_adaptor = $self->db->get_DnaAlignFeatureAdaptor(); + my $pep_adaptor = $self->db->get_ProteinAlignFeatureAdaptor(); + + foreach my $f (@$aln_objs) { + # check that the feature is in toplevel coords + + if($f->slice->start != 1 || $f->slice->strand != 1) { + #move feature onto a slice of the entire seq_region + my $tls = $self->db->get_sliceAdaptor->fetch_by_region($f->slice->coord_system->name(), + $f->slice->seq_region_name(), + undef, #start + undef, #end + undef, #strand + $f->slice->coord_system->version()); + $f = $f->transfer($tls); + + if(!$f) { + throw('Could not transfer Feature to slice of ' . + 'entire seq_region prior to storing'); + } + } + + if(!$f->isa("Bio::EnsEMBL::BaseAlignFeature")){ + throw("$f must be an align feature otherwise" . + "it can't be stored"); + } + + my ($sf_dbID, $type, $adap, $check_sth); + + my @check_args = ($self->db->get_SliceAdaptor->get_seq_region_id($f->slice), + $f->start, + $f->end, + $f->strand, + $f->hseqname, + $f->hstart, + $f->hend, + $f->analysis->dbID, + $f->cigar_string); + + if($f->isa("Bio::EnsEMBL::DnaDnaAlignFeature")){ + $adap = $dna_adaptor; + $check_sth = $dna_check_sth; + $type = 'dna_align_feature'; + push @check_args, $f->hstrand; + } elsif($f->isa("Bio::EnsEMBL::DnaPepAlignFeature")){ + $adap = $pep_adaptor; + $check_sth = $pep_check_sth; + $type = 'protein_align_feature'; + } else { + warning("Supporting feature of unknown type. Skipping : [$f]\n"); + next; + } + + $check_sth->execute(@check_args); + $sf_dbID = $check_sth->fetchrow_array; + if (not $sf_dbID) { + $adap->store($f); + $sf_dbID = $f->dbID; + } + + # now check association + $assoc_check_sth->execute($type, + $sf_dbID); + if (not $assoc_check_sth->fetchrow_array) { + $sf_sth->bind_param(1, $exon_dbID, SQL_INTEGER); + $sf_sth->bind_param(2, $sf_dbID, SQL_INTEGER); + $sf_sth->bind_param(3, $type, SQL_VARCHAR); + $sf_sth->execute(); + } + } + + $dna_check_sth->finish; + $pep_check_sth->finish; + $assoc_check_sth->finish; + $sf_sth->finish; + +} + + + 1; diff --git a/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm index 4a8dc03732..c03d2ee767 100644 --- a/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm @@ -766,40 +766,8 @@ sub store { } # Now the supporting evidence - # should be stored from featureAdaptor - my $sql = "insert into transcript_supporting_feature - (transcript_id, feature_id, feature_type) - values(?, ?, ?)"; - - my $sf_sth = $self->prepare($sql); - - my $anaAdaptor = $self->db->get_AnalysisAdaptor(); - my $dna_adaptor = $self->db->get_DnaAlignFeatureAdaptor(); - my $pep_adaptor = $self->db->get_ProteinAlignFeatureAdaptor(); - my $type; - - foreach my $sf (@{$transcript->get_all_supporting_features}) { - if(!$sf->isa("Bio::EnsEMBL::BaseAlignFeature")){ - throw("$sf must be an align feature otherwise" . - "it can't be stored"); - } - - if($sf->isa("Bio::EnsEMBL::DnaDnaAlignFeature")){ - $dna_adaptor->store($sf); - $type = 'dna_align_feature'; - }elsif($sf->isa("Bio::EnsEMBL::DnaPepAlignFeature")){ - $pep_adaptor->store($sf); - $type = 'protein_align_feature'; - } else { - warning("Supporting feature of unknown type. Skipping : [$sf]\n"); - next; - } - - $sf_sth->bind_param(1, $transc_dbID, SQL_INTEGER); - $sf_sth->bind_param(2, $sf->dbID, SQL_INTEGER); - $sf_sth->bind_param(3, $type, SQL_VARCHAR); - $sf_sth->execute(); - } + my $tsf_adaptor = $db->get_TranscriptSupportingFeatureAdaptor; + $tsf_adaptor->store($transc_dbID, $transcript->get_all_supporting_features); # store transcript attributes if there are any my $attr_adaptor = $db->get_AttributeAdaptor(); diff --git a/modules/Bio/EnsEMBL/DBSQL/TranscriptSupportingFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/TranscriptSupportingFeatureAdaptor.pm index 323b3d3f28..a50dc319bd 100644 --- a/modules/Bio/EnsEMBL/DBSQL/TranscriptSupportingFeatureAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/TranscriptSupportingFeatureAdaptor.pm @@ -96,5 +96,147 @@ sub fetch_all_by_Transcript { return $out; } + + +=head2 store + Arg [2] : Int $transID + The dbID of an EnsEMBL transcript to associate with supporting + features + Arg [1] : Ref to array of Bio::EnsEMBL::BaseAlignFeature (the support) + Example : $dbea->store($transcript_id, \@features); + Description: Stores a set of alignment features and associates an EnsEMBL transcript + with them + Returntype : none + Exceptions : thrown when invalid dbID is passed to this method + Caller : TranscriptAdaptor + Status : Stable + +=cut + +sub store { + my ( $self, $tran_dbID, $aln_objs ) = @_; + + my $pep_check_sql = + "SELECT protein_align_feature_id " . + "FROM protein_align_feature " . + "WHERE seq_region_id = ? " . + "AND seq_region_start = ? " . + "AND seq_region_end = ? " . + "AND seq_region_strand = ? " . + "AND hit_name = ? " . + "AND hit_start = ? " . + "AND hit_end = ? " . + "AND analysis_id = ? " . + "AND cigar_line = ? "; + + my $dna_check_sql = + "SELECT dna_align_feature_id " . + "FROM dna_align_feature " . + "WHERE seq_region_id = ? " . + "AND seq_region_start = ? " . + "AND seq_region_end = ? " . + "AND seq_region_strand = ? " . + "AND hit_name = ? " . + "AND hit_start = ? " . + "AND hit_end = ? " . + "AND analysis_id = ? " . + "AND cigar_line = ? " . + "AND hit_strand = ? "; + + my $assoc_check_sql = + "SELECT * " . + "FROM transcript_supporting_feature " . + "WHERE transcript_id = $tran_dbID " . + "AND feature_type = ? " . + "AND feature_id = ? "; + + my $assoc_write_sql = "INSERT into transcript_supporting_feature " . + "(transcript_id, feature_id, feature_type) " . + "values(?, ?, ?)"; + + my $pep_check_sth = $self->prepare($pep_check_sql); + my $dna_check_sth = $self->prepare($dna_check_sql); + my $assoc_check_sth = $self->prepare($assoc_check_sql); + my $sf_sth = $self->prepare($assoc_write_sql); + + my $dna_adaptor = $self->db->get_DnaAlignFeatureAdaptor(); + my $pep_adaptor = $self->db->get_ProteinAlignFeatureAdaptor(); + + foreach my $f (@$aln_objs) { + # check that the feature is in toplevel coords + + if($f->slice->start != 1 || $f->slice->strand != 1) { + #move feature onto a slice of the entire seq_region + my $tls = $self->db->get_sliceAdaptor->fetch_by_region($f->slice->coord_system->name(), + $f->slice->seq_region_name(), + undef, #start + undef, #end + undef, #strand + $f->slice->coord_system->version()); + $f = $f->transfer($tls); + + if(!$f) { + throw('Could not transfer Feature to slice of ' . + 'entire seq_region prior to storing'); + } + } + + if(!$f->isa("Bio::EnsEMBL::BaseAlignFeature")){ + throw("$f must be an align feature otherwise" . + "it can't be stored"); + } + + my ($sf_dbID, $type, $adap, $check_sth); + + my @check_args = ($self->db->get_SliceAdaptor->get_seq_region_id($f->slice), + $f->start, + $f->end, + $f->strand, + $f->hseqname, + $f->hstart, + $f->hend, + $f->analysis->dbID, + $f->cigar_string); + + if($f->isa("Bio::EnsEMBL::DnaDnaAlignFeature")){ + $adap = $dna_adaptor; + $check_sth = $dna_check_sth; + $type = 'dna_align_feature'; + push @check_args, $f->hstrand; + } elsif($f->isa("Bio::EnsEMBL::DnaPepAlignFeature")){ + $adap = $pep_adaptor; + $check_sth = $pep_check_sth; + $type = 'protein_align_feature'; + } else { + warning("Supporting feature of unknown type. Skipping : [$f]\n"); + next; + } + + $check_sth->execute(@check_args); + $sf_dbID = $check_sth->fetchrow_array; + if (not $sf_dbID) { + $adap->store($f); + $sf_dbID = $f->dbID; + } + + # now check association + $assoc_check_sth->execute($type, + $sf_dbID); + if (not $assoc_check_sth->fetchrow_array) { + $sf_sth->bind_param(1, $tran_dbID, SQL_INTEGER); + $sf_sth->bind_param(2, $sf_dbID, SQL_INTEGER); + $sf_sth->bind_param(3, $type, SQL_VARCHAR); + $sf_sth->execute(); + } + } + + $dna_check_sth->finish; + $pep_check_sth->finish; + $assoc_check_sth->finish; + $sf_sth->finish; + +} + + 1; -- GitLab