From 99fd531fffdb67f86365176c0d82fef536003c14 Mon Sep 17 00:00:00 2001
From: Arne Stabenau <stabenau@sanger.ac.uk>
Date: Mon, 14 Nov 2005 17:53:53 +0000
Subject: [PATCH] density type should now support features per seq_region via
 the region_features attribute

---
 .../EnsEMBL/DBSQL/DensityFeatureAdaptor.pm    | 23 ++++++--
 .../Bio/EnsEMBL/DBSQL/DensityTypeAdaptor.pm   | 32 +++++------
 modules/Bio/EnsEMBL/DensityType.pm            | 35 ++++++++++--
 modules/t/densityFeatureAdaptor.t             | 53 +++++++++++++++++--
 4 files changed, 115 insertions(+), 28 deletions(-)

diff --git a/modules/Bio/EnsEMBL/DBSQL/DensityFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DensityFeatureAdaptor.pm
index cb3bf471c9..91d76b4a15 100644
--- a/modules/Bio/EnsEMBL/DBSQL/DensityFeatureAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/DensityFeatureAdaptor.pm
@@ -164,7 +164,17 @@ sub fetch_all_by_Slice {
   my $density_type_large = undef;
 
   foreach my $dt (@dtypes) {
-    my $ratio = $wanted_block_size/$dt->block_size();
+
+    my $ratio;
+    if( $dt->block_size() > 0 ) {
+      $ratio = $wanted_block_size/$dt->block_size();
+    } else {
+      # This is only valid if the requested seq_region is the one the 
+      # features are stored on. Please use sensibly. Or find better implementation.
+
+      my $block_size = $slice->seq_region_length() / $dt->region_features();
+      $ratio = $wanted_block_size / $block_size;
+    }
 
     # we prefer to use a block size that's smaller than the required one
     # (better results on interpolation). remember larger block sizes though
@@ -181,8 +191,9 @@ sub fetch_all_by_Slice {
       }
     }
   }
-  # fall back to larger block size
-  unless ($best_ratio) {
+  # fall back to larger block size if there is no smaller black size 
+  # or it would require retrieving too many features
+  if( !$best_ratio || $best_ratio > 30 ) {
     $best_ratio = $best_ratio_large;
     $density_type = $density_type_large;
   }
@@ -200,7 +211,7 @@ sub fetch_all_by_Slice {
     @{$self->fetch_all_by_Slice_constraint($slice,$constraint)};
 
   #we don't want to interpolate if the ratio was very close
-  $interpolate = 0 if($best_ratio < 1.05);
+  $interpolate = 0 if($best_ratio < 1.05 && $best_ratio > 0.9 );
 
   return \@features if(!$interpolate);
 
@@ -527,7 +538,9 @@ sub store{
       throw("DensityFeature must be an Ensembl DensityFeature, " .
             "not a [".ref($df)."]");
     }
-
+    
+    # we dont store 0 value density features
+    next if( $df->density_value == 0 );
     if($df->is_stored($db)) {
       warning("DensityFeature [".$df->dbID."] is already stored" .
               " in this database.");
diff --git a/modules/Bio/EnsEMBL/DBSQL/DensityTypeAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DensityTypeAdaptor.pm
index 20266cdb6a..4b16d8cae0 100644
--- a/modules/Bio/EnsEMBL/DBSQL/DensityTypeAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/DensityTypeAdaptor.pm
@@ -82,13 +82,13 @@ sub fetch_all {
   my @out;
 
   my $sth = $self->prepare("SELECT density_type_id, analysis_id, block_size,".
-                           "       value_type " .
+                           "       value_type, region_features " .
                            "FROM density_type");
 
   $sth->execute();
 
-  my($dbID, $analysis_id, $blk_size, $vtype);
-  $sth->bind_columns(\$dbID, \$analysis_id, \$blk_size, \$vtype);
+  my($dbID, $analysis_id, $blk_size, $vtype, $region_features );
+  $sth->bind_columns(\$dbID, \$analysis_id, \$blk_size, \$vtype, \$region_features );
 
   my $analysis_adaptor = $self->db->get_AnalysisAdaptor();
 
@@ -104,6 +104,7 @@ sub fetch_all {
                                             -DBID    => $dbID,
                                             -ANALYSIS => $analysis,
                                             -BLOCK_SIZE => $blk_size,
+					    -REGION_FEATURES => $region_features,
                                             -VALUE_TYPE => $vtype);
 
     $self->{'dbID_cache'}->{$dbID} = $dt;
@@ -173,27 +174,23 @@ sub fetch_all_by_logic_name {
   return [] if(!$analysis);
 
   my $sth = $self->prepare("SELECT density_type_id, block_size,".
-                           "       value_type " .
+                           "       value_type, region_features " .
                            "FROM density_type " .
                            "WHERE analysis_id = ?");
   $sth->execute($analysis->dbID());
 
-  my($dbID, $blk_size, $vtype);
-  $sth->bind_columns(\$dbID, \$blk_size, \$vtype);
+  my($dbID, $blk_size, $vtype, $region_features );
+  $sth->bind_columns(\$dbID, \$blk_size, \$vtype, \$region_features);
 
   my @out;
 
   while($sth->fetch()) {
 
-    if($blk_size < 1) {
-      warning("density_type table contains invalid block_size=$blk_size.");
-      $blk_size = 1;
-    }
-
     my $dt = Bio::EnsEMBL::DensityType->new(-ADAPTOR => $self,
                                             -DBID    => $dbID,
                                             -ANALYSIS => $analysis,
                                             -BLOCK_SIZE => $blk_size,
+					    -REGION_FEATURES => $region_features,
                                             -VALUE_TYPE => $vtype);
 
     $self->{'dbID_cache'}->{$dbID} = $dt;
@@ -228,8 +225,8 @@ sub store {
 
   my $sth = $self->prepare
     ("INSERT IGNORE INTO density_type (analysis_id,".
-                                  "block_size, value_type) ". 
-    "VALUES (?,?,?)");
+                                  "block_size, value_type, region_features ) ". 
+    "VALUES (?, ?, ?, ?)");
 
   my $db = $self->db();
   my $analysis_adaptor = $db->get_AnalysisAdaptor();
@@ -253,8 +250,13 @@ sub store {
       $analysis_adaptor->store($dt->analysis());
     }
 	
-    my $inserted = $sth->execute($dt->analysis->dbID(), $dt->block_size(),
-                                 $dt->value_type());
+    my $block_size = $dt->block_size();
+    $block_size |= 0;
+    my $region_features = $dt->region_features();
+    $region_features |= 0;
+
+    my $inserted = $sth->execute($dt->analysis->dbID(), $block_size,
+                                 $dt->value_type(), $region_features);
 
     my $dbID;
 
diff --git a/modules/Bio/EnsEMBL/DensityType.pm b/modules/Bio/EnsEMBL/DensityType.pm
index e9ca13a4a0..4fffb1f881 100644
--- a/modules/Bio/EnsEMBL/DensityType.pm
+++ b/modules/Bio/EnsEMBL/DensityType.pm
@@ -71,8 +71,8 @@ sub new {
 
   my $self = $class->SUPER::new(@_);
 
-  my ($analysis, $block_size, $value_type) = 
-    rearrange(['ANALYSIS','BLOCK_SIZE','VALUE_TYPE'],@_);
+  my ($analysis, $block_size, $value_type, $region_features) = 
+    rearrange(['ANALYSIS','BLOCK_SIZE','VALUE_TYPE','REGION_FEATURES'],@_);
 
   if($analysis) {
     if(!ref($analysis) || !$analysis->isa('Bio::EnsEMBL::Analysis')) {
@@ -86,13 +86,22 @@ sub new {
 	  $value_type."*");
   }
 
-  if($block_size <=0 ){
-    throw('-BLOCK_SIZE must be greater than 0'); 
+  $block_size |= 0;
+  $region_features |= 0;
+
+  if(! ($block_size xor $region_features )){
+    throw('Set either -BLOCK_SIZE or -REGION_FEATURES, not both'); 
+  }
+
+  if( $block_size <0 or $region_features < 0 ) {
+    throw( 'No negative values for -BLOCK_SIZE or -REGION_FEATURES' );
   }
 
+
   $self->{'analysis'} = $analysis;
   $self->{'block_size'} = $block_size;
   $self->{'value_type'} = $value_type;
+  $self->{'region_features'} = $region_features;
 
   return $self;
 }
@@ -160,4 +169,22 @@ sub block_size{
 }
 
 
+=head2 region_features
+
+  Arg [1]    : int $region_features
+  Example    : The number of features per seq_region inside this density_type..
+  Description: get/set for attribute region_features
+  Returntype : string
+  Exceptions : none
+  Caller     : general
+
+=cut
+
+sub region_features {
+   my $self = shift;
+  $self->{'region_features'} = shift if( @_ );
+  return $self->{'region_features'};
+}
+
+
 1;
diff --git a/modules/t/densityFeatureAdaptor.t b/modules/t/densityFeatureAdaptor.t
index 2531c6938b..6955ff24a0 100644
--- a/modules/t/densityFeatureAdaptor.t
+++ b/modules/t/densityFeatureAdaptor.t
@@ -20,7 +20,7 @@ BEGIN { $| = 1;
 
 
 
-our $verbose = 0;
+our $verbose = 1;
 verbose('WARNING');
 
 my $multi = Bio::EnsEMBL::Test::MultiTestDB->new;
@@ -98,20 +98,65 @@ ok(scalar( @density_features) == 10);
 
 ok(!$density_features[0]->is_stored($db));
 $dfa->store(@density_features);		
-ok($density_features[0]->is_stored($db));
 
 #
 # get back from database and check
 #
 
+my @filtered_features = grep { $_->density_value() != 0 } @density_features;
+
 my @stored_features = @{$dfa->fetch_all_by_Slice($slice,'GeneDensityTest', 10)};
 
-for (my $i=0; $i< scalar(@density_features); $i++){
-  ok($density_features[0]->density_value() == $stored_features[0]->density_value());
+for (my $i=0; $i< scalar(@filtered_features); $i++){
+  ok($filtered_features[0]->density_value() == $stored_features[0]->density_value());
 }
 
 
 
+#
+# Now some density features with region_feature count set on density type
+# Lets say we want 300 features on our seq_regions
+# 
+
+
+debug( "Region Features enabled densities" );
+
+$dt = Bio::EnsEMBL::DensityType->new(-analysis   => $analysis,
+				     -region_features => 300,
+				     -value_type => 'sum');
+
+ok(!$dt->is_stored($db));
+$dta->store($dt);	
+ok($dt->is_stored($db));
+
+@density_features = ();
+my $chr_20_slice = $slice_adaptor->fetch_by_region( "chromosome", 20 );
+my $step = $chr_20_slice->length() / 300;
+$start = 1;
+while( $start < $chr_20_slice->length() ) {
+  my $end = int( $start + $step );
+  push @density_features, Bio::EnsEMBL::DensityFeature->new(-seq_region    => $chr_20_slice,
+							     -start         => int($start),
+							     -end           => int( $start+$step),
+							     -density_type  => $dt,
+							     -density_value => 5 );
+  $start += $step + 1;
+}
+
+$dfa->store( @density_features );
+ok($density_features[-1]->is_stored($db));
+debug( "Created ".scalar( @density_features )." density features on chr 20" );
+
+@stored_features = @{$dfa->fetch_all_by_Slice($chr_20_slice,'GeneDensityTest', 100, "interpolate" )};
+ok( scalar( @stored_features ) == 100 );
+debug( "Interpolated retreived ".scalar( @stored_features ));
+
+ok( abs( $stored_features[0]->density_value() - 15) < 0.0001 );
+debug( "Density value = ".$stored_features[0]->density_value() );
+
+
+
+
 $multi->restore('core', 'analysis');
 $multi->restore('core', 'density_type');
 $multi->restore('core', 'density_feature');
-- 
GitLab