diff --git a/modules/Bio/EnsEMBL/DBSQL/DensityFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DensityFeatureAdaptor.pm index cb3bf471c9f99a721bd73f1ab4c7539736bfa11d..91d76b4a1590176885a1af19c9416747e017c2e2 100644 --- a/modules/Bio/EnsEMBL/DBSQL/DensityFeatureAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/DensityFeatureAdaptor.pm @@ -164,7 +164,17 @@ sub fetch_all_by_Slice { my $density_type_large = undef; foreach my $dt (@dtypes) { - my $ratio = $wanted_block_size/$dt->block_size(); + + my $ratio; + if( $dt->block_size() > 0 ) { + $ratio = $wanted_block_size/$dt->block_size(); + } else { + # This is only valid if the requested seq_region is the one the + # features are stored on. Please use sensibly. Or find better implementation. + + my $block_size = $slice->seq_region_length() / $dt->region_features(); + $ratio = $wanted_block_size / $block_size; + } # we prefer to use a block size that's smaller than the required one # (better results on interpolation). remember larger block sizes though @@ -181,8 +191,9 @@ sub fetch_all_by_Slice { } } } - # fall back to larger block size - unless ($best_ratio) { + # fall back to larger block size if there is no smaller black size + # or it would require retrieving too many features + if( !$best_ratio || $best_ratio > 30 ) { $best_ratio = $best_ratio_large; $density_type = $density_type_large; } @@ -200,7 +211,7 @@ sub fetch_all_by_Slice { @{$self->fetch_all_by_Slice_constraint($slice,$constraint)}; #we don't want to interpolate if the ratio was very close - $interpolate = 0 if($best_ratio < 1.05); + $interpolate = 0 if($best_ratio < 1.05 && $best_ratio > 0.9 ); return \@features if(!$interpolate); @@ -527,7 +538,9 @@ sub store{ throw("DensityFeature must be an Ensembl DensityFeature, " . "not a [".ref($df)."]"); } - + + # we dont store 0 value density features + next if( $df->density_value == 0 ); if($df->is_stored($db)) { warning("DensityFeature [".$df->dbID."] is already stored" . " in this database."); diff --git a/modules/Bio/EnsEMBL/DBSQL/DensityTypeAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DensityTypeAdaptor.pm index 20266cdb6abf74c8e2fb87f97097fa35ba336007..4b16d8cae0eb6acede61ca12acbdedc150a78ba9 100644 --- a/modules/Bio/EnsEMBL/DBSQL/DensityTypeAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/DensityTypeAdaptor.pm @@ -82,13 +82,13 @@ sub fetch_all { my @out; my $sth = $self->prepare("SELECT density_type_id, analysis_id, block_size,". - " value_type " . + " value_type, region_features " . "FROM density_type"); $sth->execute(); - my($dbID, $analysis_id, $blk_size, $vtype); - $sth->bind_columns(\$dbID, \$analysis_id, \$blk_size, \$vtype); + my($dbID, $analysis_id, $blk_size, $vtype, $region_features ); + $sth->bind_columns(\$dbID, \$analysis_id, \$blk_size, \$vtype, \$region_features ); my $analysis_adaptor = $self->db->get_AnalysisAdaptor(); @@ -104,6 +104,7 @@ sub fetch_all { -DBID => $dbID, -ANALYSIS => $analysis, -BLOCK_SIZE => $blk_size, + -REGION_FEATURES => $region_features, -VALUE_TYPE => $vtype); $self->{'dbID_cache'}->{$dbID} = $dt; @@ -173,27 +174,23 @@ sub fetch_all_by_logic_name { return [] if(!$analysis); my $sth = $self->prepare("SELECT density_type_id, block_size,". - " value_type " . + " value_type, region_features " . "FROM density_type " . "WHERE analysis_id = ?"); $sth->execute($analysis->dbID()); - my($dbID, $blk_size, $vtype); - $sth->bind_columns(\$dbID, \$blk_size, \$vtype); + my($dbID, $blk_size, $vtype, $region_features ); + $sth->bind_columns(\$dbID, \$blk_size, \$vtype, \$region_features); my @out; while($sth->fetch()) { - if($blk_size < 1) { - warning("density_type table contains invalid block_size=$blk_size."); - $blk_size = 1; - } - my $dt = Bio::EnsEMBL::DensityType->new(-ADAPTOR => $self, -DBID => $dbID, -ANALYSIS => $analysis, -BLOCK_SIZE => $blk_size, + -REGION_FEATURES => $region_features, -VALUE_TYPE => $vtype); $self->{'dbID_cache'}->{$dbID} = $dt; @@ -228,8 +225,8 @@ sub store { my $sth = $self->prepare ("INSERT IGNORE INTO density_type (analysis_id,". - "block_size, value_type) ". - "VALUES (?,?,?)"); + "block_size, value_type, region_features ) ". + "VALUES (?, ?, ?, ?)"); my $db = $self->db(); my $analysis_adaptor = $db->get_AnalysisAdaptor(); @@ -253,8 +250,13 @@ sub store { $analysis_adaptor->store($dt->analysis()); } - my $inserted = $sth->execute($dt->analysis->dbID(), $dt->block_size(), - $dt->value_type()); + my $block_size = $dt->block_size(); + $block_size |= 0; + my $region_features = $dt->region_features(); + $region_features |= 0; + + my $inserted = $sth->execute($dt->analysis->dbID(), $block_size, + $dt->value_type(), $region_features); my $dbID; diff --git a/modules/Bio/EnsEMBL/DensityType.pm b/modules/Bio/EnsEMBL/DensityType.pm index e9ca13a4a063a9d668c1248a5112322225a85061..4fffb1f881c370788d8516b55fd26dff70d69556 100644 --- a/modules/Bio/EnsEMBL/DensityType.pm +++ b/modules/Bio/EnsEMBL/DensityType.pm @@ -71,8 +71,8 @@ sub new { my $self = $class->SUPER::new(@_); - my ($analysis, $block_size, $value_type) = - rearrange(['ANALYSIS','BLOCK_SIZE','VALUE_TYPE'],@_); + my ($analysis, $block_size, $value_type, $region_features) = + rearrange(['ANALYSIS','BLOCK_SIZE','VALUE_TYPE','REGION_FEATURES'],@_); if($analysis) { if(!ref($analysis) || !$analysis->isa('Bio::EnsEMBL::Analysis')) { @@ -86,13 +86,22 @@ sub new { $value_type."*"); } - if($block_size <=0 ){ - throw('-BLOCK_SIZE must be greater than 0'); + $block_size |= 0; + $region_features |= 0; + + if(! ($block_size xor $region_features )){ + throw('Set either -BLOCK_SIZE or -REGION_FEATURES, not both'); + } + + if( $block_size <0 or $region_features < 0 ) { + throw( 'No negative values for -BLOCK_SIZE or -REGION_FEATURES' ); } + $self->{'analysis'} = $analysis; $self->{'block_size'} = $block_size; $self->{'value_type'} = $value_type; + $self->{'region_features'} = $region_features; return $self; } @@ -160,4 +169,22 @@ sub block_size{ } +=head2 region_features + + Arg [1] : int $region_features + Example : The number of features per seq_region inside this density_type.. + Description: get/set for attribute region_features + Returntype : string + Exceptions : none + Caller : general + +=cut + +sub region_features { + my $self = shift; + $self->{'region_features'} = shift if( @_ ); + return $self->{'region_features'}; +} + + 1; diff --git a/modules/t/densityFeatureAdaptor.t b/modules/t/densityFeatureAdaptor.t index 2531c6938bfc6570bbe9cfd0c6907fa2e0ea4b84..6955ff24a01afd851a9eb7456559fea9217feeb8 100644 --- a/modules/t/densityFeatureAdaptor.t +++ b/modules/t/densityFeatureAdaptor.t @@ -20,7 +20,7 @@ BEGIN { $| = 1; -our $verbose = 0; +our $verbose = 1; verbose('WARNING'); my $multi = Bio::EnsEMBL::Test::MultiTestDB->new; @@ -98,20 +98,65 @@ ok(scalar( @density_features) == 10); ok(!$density_features[0]->is_stored($db)); $dfa->store(@density_features); -ok($density_features[0]->is_stored($db)); # # get back from database and check # +my @filtered_features = grep { $_->density_value() != 0 } @density_features; + my @stored_features = @{$dfa->fetch_all_by_Slice($slice,'GeneDensityTest', 10)}; -for (my $i=0; $i< scalar(@density_features); $i++){ - ok($density_features[0]->density_value() == $stored_features[0]->density_value()); +for (my $i=0; $i< scalar(@filtered_features); $i++){ + ok($filtered_features[0]->density_value() == $stored_features[0]->density_value()); } +# +# Now some density features with region_feature count set on density type +# Lets say we want 300 features on our seq_regions +# + + +debug( "Region Features enabled densities" ); + +$dt = Bio::EnsEMBL::DensityType->new(-analysis => $analysis, + -region_features => 300, + -value_type => 'sum'); + +ok(!$dt->is_stored($db)); +$dta->store($dt); +ok($dt->is_stored($db)); + +@density_features = (); +my $chr_20_slice = $slice_adaptor->fetch_by_region( "chromosome", 20 ); +my $step = $chr_20_slice->length() / 300; +$start = 1; +while( $start < $chr_20_slice->length() ) { + my $end = int( $start + $step ); + push @density_features, Bio::EnsEMBL::DensityFeature->new(-seq_region => $chr_20_slice, + -start => int($start), + -end => int( $start+$step), + -density_type => $dt, + -density_value => 5 ); + $start += $step + 1; +} + +$dfa->store( @density_features ); +ok($density_features[-1]->is_stored($db)); +debug( "Created ".scalar( @density_features )." density features on chr 20" ); + +@stored_features = @{$dfa->fetch_all_by_Slice($chr_20_slice,'GeneDensityTest', 100, "interpolate" )}; +ok( scalar( @stored_features ) == 100 ); +debug( "Interpolated retreived ".scalar( @stored_features )); + +ok( abs( $stored_features[0]->density_value() - 15) < 0.0001 ); +debug( "Density value = ".$stored_features[0]->density_value() ); + + + + $multi->restore('core', 'analysis'); $multi->restore('core', 'density_type'); $multi->restore('core', 'density_feature');