From 75b70e78a7693a78e201394e17deb4eea081346f Mon Sep 17 00:00:00 2001
From: Kevin Howe <klh@ebi.ac.uk>
Date: Thu, 8 Mar 2007 17:17:53 +0000
Subject: [PATCH] On storage of supporting features, check that the feature
 isn't already present, and reuse it if it is.

---
 modules/Bio/EnsEMBL/DBSQL/ExonAdaptor.pm      |  35 +----
 .../EnsEMBL/DBSQL/SupportingFeatureAdaptor.pm | 141 +++++++++++++++++
 .../Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm    |  36 +----
 .../TranscriptSupportingFeatureAdaptor.pm     | 142 ++++++++++++++++++
 4 files changed, 287 insertions(+), 67 deletions(-)

diff --git a/modules/Bio/EnsEMBL/DBSQL/ExonAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/ExonAdaptor.pm
index c258ce69c5..6673f3d56b 100644
--- a/modules/Bio/EnsEMBL/DBSQL/ExonAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/ExonAdaptor.pm
@@ -299,39 +299,8 @@ sub store {
   }
 
   # Now the supporting evidence
-  # should be stored from featureAdaptor
-  my $sql = "insert into supporting_feature (exon_id, feature_id, feature_type)
-             values(?, ?, ?)";
-
-  my $sf_sth = $self->prepare($sql);
-
-  my $anaAdaptor = $self->db->get_AnalysisAdaptor();
-  my $dna_adaptor = $self->db->get_DnaAlignFeatureAdaptor();
-  my $pep_adaptor = $self->db->get_ProteinAlignFeatureAdaptor();
-  my $type;
-
-  foreach my $sf (@{$exon->get_all_supporting_features}) {
-    if(!$sf->isa("Bio::EnsEMBL::BaseAlignFeature")){
-      throw("$sf must be an align feature otherwise" .
-            "it can't be stored");
-    }
-
-    if($sf->isa("Bio::EnsEMBL::DnaDnaAlignFeature")){
-      $dna_adaptor->store($sf);
-      $type = 'dna_align_feature';
-    }elsif($sf->isa("Bio::EnsEMBL::DnaPepAlignFeature")){
-      $pep_adaptor->store($sf);
-      $type = 'protein_align_feature';
-    } else {
-      warning("Supporting feature of unknown type. Skipping : [$sf]\n");
-      next;
-    }
-
-    $sf_sth->bind_param(1, $exonId, SQL_INTEGER);
-    $sf_sth->bind_param(2, $sf->dbID, SQL_INTEGER);
-    $sf_sth->bind_param(3, $type, SQL_VARCHAR);
-    $sf_sth->execute();
-  }
+  my $esf_adaptor = $db->get_SupportingFeatureAdaptor;
+  $esf_adaptor->store($exonId, $exon->get_all_supporting_features);
 
   #
   # Finally, update the dbID and adaptor of the exon (and any component exons)
diff --git a/modules/Bio/EnsEMBL/DBSQL/SupportingFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/SupportingFeatureAdaptor.pm
index 53668130b1..0072171b33 100644
--- a/modules/Bio/EnsEMBL/DBSQL/SupportingFeatureAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/SupportingFeatureAdaptor.pm
@@ -95,5 +95,146 @@ sub fetch_all_by_Exon {
   return $out;
 }
 
+=head2 store
+  Arg [2]    : Int $transID
+               The dbID of an EnsEMBL transcript to associate with supporting
+               features
+  Arg [1]    : Ref to array of Bio::EnsEMBL::BaseAlignFeature (the support)
+  Example    : $sfa->store($exon_id, \@features);
+  Description: Stores a set of alignment features and associates an EnsEMBL transcript
+               with them
+  Returntype : none
+  Exceptions : thrown when invalid dbID is passed to this method
+  Caller     : TranscriptAdaptor
+  Status     : Stable
+
+=cut
+
+sub store {
+  my ( $self, $exon_dbID, $aln_objs ) = @_;
+
+  my $pep_check_sql = 
+      "SELECT protein_align_feature_id " . 
+      "FROM protein_align_feature " . 
+      "WHERE seq_region_id = ? " . 
+      "AND   seq_region_start = ? " . 
+      "AND   seq_region_end   = ? " .
+      "AND   seq_region_strand = ? " . 
+      "AND   hit_name = ? " . 
+      "AND   hit_start = ? " . 
+      "AND   hit_end   = ? " . 
+      "AND   analysis_id = ? " . 
+      "AND   cigar_line = ? ";
+
+  my $dna_check_sql = 
+      "SELECT dna_align_feature_id " . 
+      "FROM dna_align_feature " . 
+      "WHERE seq_region_id = ? " . 
+      "AND   seq_region_start = ? " . 
+      "AND   seq_region_end   = ? " .
+      "AND   seq_region_strand = ? " . 
+      "AND   hit_name = ? " . 
+      "AND   hit_start = ? " . 
+      "AND   hit_end   = ? " . 
+      "AND   analysis_id = ? " . 
+      "AND   cigar_line = ? " . 
+      "AND   hit_strand = ? ";
+
+  my $assoc_check_sql = 
+      "SELECT * " .  
+      "FROM  supporting_feature " . 
+      "WHERE exon_id = $exon_dbID " . 
+      "AND   feature_type = ? " . 
+      "AND   feature_id   = ? ";
+
+  my $assoc_write_sql = "INSERT into supporting_feature " . 
+      "(exon_id, feature_id, feature_type) " . 
+      "values(?, ?, ?)";
+
+  my $pep_check_sth = $self->prepare($pep_check_sql);
+  my $dna_check_sth = $self->prepare($dna_check_sql);
+  my $assoc_check_sth = $self->prepare($assoc_check_sql);
+  my $sf_sth = $self->prepare($assoc_write_sql);
+
+  my $dna_adaptor = $self->db->get_DnaAlignFeatureAdaptor();
+  my $pep_adaptor = $self->db->get_ProteinAlignFeatureAdaptor();
+
+  foreach my $f (@$aln_objs) {
+    # check that the feature is in toplevel coords
+
+    if($f->slice->start != 1 || $f->slice->strand != 1) {
+    #move feature onto a slice of the entire seq_region
+      my $tls = $self->db->get_sliceAdaptor->fetch_by_region($f->slice->coord_system->name(),
+                                                             $f->slice->seq_region_name(),
+                                                             undef, #start
+                                                             undef, #end
+                                                             undef, #strand
+                                                             $f->slice->coord_system->version());
+      $f = $f->transfer($tls);
+
+      if(!$f) {
+        throw('Could not transfer Feature to slice of ' .
+              'entire seq_region prior to storing');
+      }
+    }
+
+    if(!$f->isa("Bio::EnsEMBL::BaseAlignFeature")){
+      throw("$f must be an align feature otherwise" .
+            "it can't be stored");
+    }
+    
+    my ($sf_dbID, $type, $adap, $check_sth);
+    
+    my @check_args = ($self->db->get_SliceAdaptor->get_seq_region_id($f->slice),
+                      $f->start,
+                      $f->end,
+                      $f->strand,
+                      $f->hseqname,
+                      $f->hstart,
+                      $f->hend,
+                      $f->analysis->dbID,
+                      $f->cigar_string);
+
+    if($f->isa("Bio::EnsEMBL::DnaDnaAlignFeature")){
+      $adap = $dna_adaptor;      
+      $check_sth = $dna_check_sth;
+      $type = 'dna_align_feature';
+      push @check_args, $f->hstrand;
+    } elsif($f->isa("Bio::EnsEMBL::DnaPepAlignFeature")){
+      $adap = $pep_adaptor;
+      $check_sth = $pep_check_sth;
+      $type = 'protein_align_feature';
+    } else {
+      warning("Supporting feature of unknown type. Skipping : [$f]\n");
+      next;
+    }
+
+    $check_sth->execute(@check_args);
+    $sf_dbID = $check_sth->fetchrow_array;
+    if (not $sf_dbID) {
+      $adap->store($f);
+      $sf_dbID = $f->dbID;
+    }
+
+    # now check association
+    $assoc_check_sth->execute($type,
+                              $sf_dbID);
+    if (not $assoc_check_sth->fetchrow_array) {    
+      $sf_sth->bind_param(1, $exon_dbID, SQL_INTEGER);
+      $sf_sth->bind_param(2, $sf_dbID, SQL_INTEGER);
+      $sf_sth->bind_param(3, $type, SQL_VARCHAR);
+      $sf_sth->execute();
+    }
+  }
+
+  $dna_check_sth->finish;
+  $pep_check_sth->finish;
+  $assoc_check_sth->finish;
+  $sf_sth->finish;
+  
+}
+
+
+
 1;
 
diff --git a/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm
index 4a8dc03732..c03d2ee767 100644
--- a/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm
@@ -766,40 +766,8 @@ sub store {
   }
 
   # Now the supporting evidence
-  # should be stored from featureAdaptor
-  my $sql = "insert into transcript_supporting_feature
-             (transcript_id, feature_id, feature_type)
-             values(?, ?, ?)";
-
-  my $sf_sth = $self->prepare($sql);
-
-  my $anaAdaptor = $self->db->get_AnalysisAdaptor();
-  my $dna_adaptor = $self->db->get_DnaAlignFeatureAdaptor();
-  my $pep_adaptor = $self->db->get_ProteinAlignFeatureAdaptor();
-  my $type;
-
-  foreach my $sf (@{$transcript->get_all_supporting_features}) {
-    if(!$sf->isa("Bio::EnsEMBL::BaseAlignFeature")){
-      throw("$sf must be an align feature otherwise" .
-            "it can't be stored");
-    }
-
-    if($sf->isa("Bio::EnsEMBL::DnaDnaAlignFeature")){
-      $dna_adaptor->store($sf);
-      $type = 'dna_align_feature';
-    }elsif($sf->isa("Bio::EnsEMBL::DnaPepAlignFeature")){
-      $pep_adaptor->store($sf);
-      $type = 'protein_align_feature';
-    } else {
-      warning("Supporting feature of unknown type. Skipping : [$sf]\n");
-      next;
-    }
-
-    $sf_sth->bind_param(1, $transc_dbID, SQL_INTEGER);
-    $sf_sth->bind_param(2, $sf->dbID, SQL_INTEGER);
-    $sf_sth->bind_param(3, $type, SQL_VARCHAR);
-    $sf_sth->execute();
-  }
+  my $tsf_adaptor = $db->get_TranscriptSupportingFeatureAdaptor;
+  $tsf_adaptor->store($transc_dbID, $transcript->get_all_supporting_features);
 
   # store transcript attributes if there are any
   my $attr_adaptor = $db->get_AttributeAdaptor();
diff --git a/modules/Bio/EnsEMBL/DBSQL/TranscriptSupportingFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/TranscriptSupportingFeatureAdaptor.pm
index 323b3d3f28..a50dc319bd 100644
--- a/modules/Bio/EnsEMBL/DBSQL/TranscriptSupportingFeatureAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/TranscriptSupportingFeatureAdaptor.pm
@@ -96,5 +96,147 @@ sub fetch_all_by_Transcript {
   return $out;
 }
 
+
+
+=head2 store
+  Arg [2]    : Int $transID
+               The dbID of an EnsEMBL transcript to associate with supporting
+               features
+  Arg [1]    : Ref to array of Bio::EnsEMBL::BaseAlignFeature (the support)
+  Example    : $dbea->store($transcript_id, \@features);
+  Description: Stores a set of alignment features and associates an EnsEMBL transcript
+               with them
+  Returntype : none
+  Exceptions : thrown when invalid dbID is passed to this method
+  Caller     : TranscriptAdaptor
+  Status     : Stable
+
+=cut
+
+sub store {
+  my ( $self, $tran_dbID, $aln_objs ) = @_;
+
+  my $pep_check_sql = 
+      "SELECT protein_align_feature_id " . 
+      "FROM protein_align_feature " . 
+      "WHERE seq_region_id = ? " . 
+      "AND   seq_region_start = ? " . 
+      "AND   seq_region_end   = ? " .
+      "AND   seq_region_strand = ? " . 
+      "AND   hit_name = ? " . 
+      "AND   hit_start = ? " . 
+      "AND   hit_end   = ? " . 
+      "AND   analysis_id = ? " . 
+      "AND   cigar_line = ? ";
+
+  my $dna_check_sql = 
+      "SELECT dna_align_feature_id " . 
+      "FROM  dna_align_feature " . 
+      "WHERE seq_region_id = ? " . 
+      "AND   seq_region_start = ? " . 
+      "AND   seq_region_end   = ? " .
+      "AND   seq_region_strand = ? " . 
+      "AND   hit_name = ? " . 
+      "AND   hit_start = ? " . 
+      "AND   hit_end   = ? " . 
+      "AND   analysis_id = ? " . 
+      "AND   cigar_line = ? " . 
+      "AND   hit_strand = ? ";
+
+  my $assoc_check_sql = 
+      "SELECT * " .  
+      "FROM  transcript_supporting_feature " . 
+      "WHERE transcript_id = $tran_dbID " . 
+      "AND   feature_type = ? " . 
+      "AND   feature_id   = ? ";
+
+  my $assoc_write_sql = "INSERT into transcript_supporting_feature " . 
+      "(transcript_id, feature_id, feature_type) " . 
+      "values(?, ?, ?)";
+
+  my $pep_check_sth = $self->prepare($pep_check_sql);
+  my $dna_check_sth = $self->prepare($dna_check_sql);
+  my $assoc_check_sth = $self->prepare($assoc_check_sql);
+  my $sf_sth = $self->prepare($assoc_write_sql);
+
+  my $dna_adaptor = $self->db->get_DnaAlignFeatureAdaptor();
+  my $pep_adaptor = $self->db->get_ProteinAlignFeatureAdaptor();
+
+  foreach my $f (@$aln_objs) {
+    # check that the feature is in toplevel coords
+
+    if($f->slice->start != 1 || $f->slice->strand != 1) {
+    #move feature onto a slice of the entire seq_region
+      my $tls = $self->db->get_sliceAdaptor->fetch_by_region($f->slice->coord_system->name(),
+                                                             $f->slice->seq_region_name(),
+                                                             undef, #start
+                                                             undef, #end
+                                                             undef, #strand
+                                                             $f->slice->coord_system->version());
+      $f = $f->transfer($tls);
+
+      if(!$f) {
+        throw('Could not transfer Feature to slice of ' .
+              'entire seq_region prior to storing');
+      }
+    }
+
+    if(!$f->isa("Bio::EnsEMBL::BaseAlignFeature")){
+      throw("$f must be an align feature otherwise" .
+            "it can't be stored");
+    }
+    
+    my ($sf_dbID, $type, $adap, $check_sth);
+    
+    my @check_args = ($self->db->get_SliceAdaptor->get_seq_region_id($f->slice),
+                      $f->start,
+                      $f->end,
+                      $f->strand,
+                      $f->hseqname,
+                      $f->hstart,
+                      $f->hend,
+                      $f->analysis->dbID,
+                      $f->cigar_string);
+    
+    if($f->isa("Bio::EnsEMBL::DnaDnaAlignFeature")){
+      $adap = $dna_adaptor;      
+      $check_sth = $dna_check_sth;
+      $type = 'dna_align_feature';
+      push @check_args, $f->hstrand;
+    } elsif($f->isa("Bio::EnsEMBL::DnaPepAlignFeature")){
+      $adap = $pep_adaptor;
+      $check_sth = $pep_check_sth;
+      $type = 'protein_align_feature';
+    } else {
+      warning("Supporting feature of unknown type. Skipping : [$f]\n");
+      next;
+    }
+
+    $check_sth->execute(@check_args);
+    $sf_dbID = $check_sth->fetchrow_array;
+    if (not $sf_dbID) {
+      $adap->store($f);
+      $sf_dbID = $f->dbID;
+    }
+
+    # now check association
+    $assoc_check_sth->execute($type,
+                              $sf_dbID);
+    if (not $assoc_check_sth->fetchrow_array) {    
+      $sf_sth->bind_param(1, $tran_dbID, SQL_INTEGER);
+      $sf_sth->bind_param(2, $sf_dbID, SQL_INTEGER);
+      $sf_sth->bind_param(3, $type, SQL_VARCHAR);
+      $sf_sth->execute();
+    }
+  }
+
+  $dna_check_sth->finish;
+  $pep_check_sth->finish;
+  $assoc_check_sth->finish;
+  $sf_sth->finish;
+  
+}
+
+
 1;
 
-- 
GitLab