Commit ade22676 authored by Andreas Kusalananda Kähäri's avatar Andreas Kusalananda Kähäri
Browse files

Adding the feature collection code from branch-featurecollection-dev.

parent 219a8f2f
# $Id$
package Bio::EnsEMBL::Collection;
use strict;
use warnings;
use Bio::EnsEMBL::Utils::Argument ('rearrange');
use Bio::EnsEMBL::Utils::Exception ('throw');
use constant { FEATURE_DBID => 0,
FEATURE_SEQREGIONID => 1,
FEATURE_START => 2,
FEATURE_END => 3,
FEATURE_STRAND => 4 };
# ======================================================================
# The public interface added by Feature Collection
sub fetch_bins_by_Slice {
my $this = shift;
my ( $slice, $method, $nbins, $logic_name, $lightweight ) =
rearrange( [ 'SLICE', 'METHOD',
'NBINS', 'LOGIC_NAME',
'LIGHTWEIGHT'
],
@_ );
# Temporarily set the colleciton to be lightweight.
my $old_value = $this->_lightweight();
if ( defined($lightweight) ) { $this->_lightweight($lightweight) }
else { $this->_lightweight(1) }
my $bins =
$this->_bin_features( -slice => $slice,
-nbins => $nbins,
-method => $method,
-features =>
$this->fetch_all_by_Slice_constraint(
$slice, undef, $logic_name
) );
# Reset the lightweightness to whatever it was before.
$this->_lightweight($old_value);
return $bins;
}
# ======================================================================
# Specialization of methods in Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor
sub _remap {
my ( $this, $features, $mapper, $slice ) = @_;
if ( scalar( @{$features} ) > 0 ) {
if ( $slice->get_seq_region_id() !=
$features->[0][FEATURE_SEQREGIONID] )
{
throw( '_remap() in Bio::EnsEMBL::Collection '
. 'was unexpectedly expected to be intelligent' );
}
}
return $features;
}
sub _create_feature {
my ( $this, $feature_type, $args ) = @_;
my ( $dbid, $start, $end, $strand, $slice ) =
rearrange( [ 'DBID', 'START', 'END', 'STRAND', 'SLICE' ],
%{$args} );
my $feature =
[ $dbid, $slice->get_seq_region_id(), $start, $end, $strand ];
return $feature;
}
sub _create_feature_fast {
my ( $this, $feature_type, $args ) = @_;
if ( !defined( $this->{'recent_slice'} )
|| $this->{'recent_slice'} ne $args->{'slice'} )
{
$this->{'recent_slice'} = $args->{'slice'};
$this->{'recent_slice_seq_region_id'} =
$args->{'slice'}->get_seq_region_id();
}
my $feature = [ $args->{'dbID'},
$this->{'recent_slice_seq_region_id'},
$args->{'start'},
$args->{'end'},
$args->{'strand'} ];
return $feature;
}
# ======================================================================
# Private methods
sub _lightweight {
my ( $this, $value ) = @_;
if ( defined($value) ) {
$this->{'lightweight'} = ( $value != 0 );
}
return $this->{'lightweight'} || 0;
}
our %VALID_BINNING_METHODS = (
'count' => 0,
'density' => 0, # Same as 'count'.
'indices' => 1,
'index' => 1, # Same as 'indices'.
'features' => 2,
'feature' => 2, # Same as 'features'.
'fractional_count' => 3,
'weight' => 3, # Same as 'fractional_count'.
'coverage' => 4 );
sub _bin_features {
my $this = shift;
my ( $slice, $nbins, $method_name, $features ) =
rearrange( [ 'SLICE', 'NBINS', 'METHOD', 'FEATURES' ], @_ );
if ( !defined($features) || !@{$features} ) { return [] }
if ( !defined($nbins) ) {
throw('Missing NBINS argument');
} elsif ( $nbins <= 0 ) {
throw('Negative or zero NBINS argument');
}
$method_name ||= 'count';
if ( !exists( $VALID_BINNING_METHODS{$method_name} ) ) {
throw(
sprintf(
"Invalid binning method '%s', valid methods are:\n\t%s\n",
$method_name,
join( "\n\t", sort( keys(%VALID_BINNING_METHODS) ) ) ) );
}
my $method = $VALID_BINNING_METHODS{$method_name};
my $slice_start = $slice->start();
my $bin_length = ( $slice->end() - $slice_start + 1 )/$nbins;
my @bins;
if ( $method == 0 || # 'count' or 'density'
$method == 3 || # 'fractional_count' or 'weight'
$method == 4 # 'coverage'
)
{
# For binning methods where each bin contain numerical values.
@bins = map( 0, 1 .. $nbins );
} else {
# For binning methods where each bin does not contain numerical
# values.
@bins = map( undef, 1 .. $nbins );
}
my $feature_index = 0;
my @bin_masks;
foreach my $feature ( @{$features} ) {
my $start_bin =
int( ( $feature->[FEATURE_START] - $slice_start )/$bin_length );
my $end_bin =
int( ( $feature->[FEATURE_END] - $slice_start )/$bin_length );
if ( $end_bin >= $nbins ) {
# This might happen for the very last entry.
$end_bin = $nbins - 1;
}
if ( $method == 0 ) {
# ----------------------------------------------------------------
# For 'count' and 'density'.
for ( my $bin_index = $start_bin ;
$bin_index <= $end_bin ;
++$bin_index )
{
++$bins[$bin_index];
}
} elsif ( $method == 1 ) {
# ----------------------------------------------------------------
# For 'indices' and 'index'
for ( my $bin_index = $start_bin ;
$bin_index <= $end_bin ;
++$bin_index )
{
push( @{ $bins[$bin_index] }, $feature_index );
}
++$feature_index;
} elsif ( $method == 2 ) {
# ----------------------------------------------------------------
# For 'features' and 'feature'.
for ( my $bin_index = $start_bin ;
$bin_index <= $end_bin ;
++$bin_index )
{
push( @{ $bins[$bin_index] }, $feature );
}
} elsif ( $method == 3 ) {
# ----------------------------------------------------------------
# For 'fractional_count' and 'weight'.
if ( $start_bin == $end_bin ) {
++$bins[$start_bin];
} else {
my $feature_length =
$feature->[FEATURE_END] - $feature->[FEATURE_START] + 1;
# The first bin...
$bins[$start_bin] +=
( ( $start_bin + 1 )*$bin_length -
( $feature->[FEATURE_START] - $slice_start ) )/
$feature_length;
# The intermediate bins (if there are any)...
for ( my $bin_index = $start_bin + 1 ;
$bin_index <= $end_bin - 1 ;
++$bin_index )
{
$bins[$bin_index] += $bin_length/$feature_length;
}
# The last bin...
$bins[$end_bin] +=
( ( $feature->[FEATURE_END] - $slice_start ) -
$end_bin*$bin_length +
1 )/$feature_length;
} ## end else [ if ( $start_bin == $end_bin)
} elsif ( $method == 4 ) {
# ----------------------------------------------------------------
# For 'coverage'.
my $feature_start = $feature->[FEATURE_START] - $slice_start;
my $feature_end = $feature->[FEATURE_END] - $slice_start;
if ( !defined( $bin_masks[$start_bin] )
|| ( defined( $bin_masks[$start_bin] )
&& $bin_masks[$start_bin] != 1 ) )
{
# Mask the $start_bin from the start of the feature to the end
# of the bin, or to the end of the feature (whichever occurs
# first).
my $bin_start = int( $start_bin*$bin_length );
my $bin_end = int( ( $start_bin + 1 )*$bin_length - 1 );
for ( my $pos = $feature_start ;
$pos <= $bin_end && $pos <= $feature_end ;
++$pos )
{
$bin_masks[$start_bin][ $pos - $bin_start ] = 1;
}
}
for ( my $bin_index = $start_bin + 1 ;
$bin_index <= $end_bin - 1 ;
++$bin_index )
{
# Mark the middle bins between $start_bin and $end_bin as fully
# masked out.
$bin_masks[$bin_index] = 1;
}
if ( $end_bin != $start_bin ) {
if ( !defined( $bin_masks[$end_bin] )
|| ( defined( $bin_masks[$end_bin] )
&& $bin_masks[$end_bin] != 1 ) )
{
# Mask the $end_bin from the start of the bin to the end of
# the feature, or to the end of the bin (whichever occurs
# first).
my $bin_start = int( $end_bin*$bin_length );
my $bin_end = int( ( $end_bin + 1 )*$bin_length - 1 );
for ( my $pos = $bin_start ;
$pos <= $feature_end && $pos <= $bin_end ;
++$pos )
{
$bin_masks[$end_bin][ $pos - $bin_start ] = 1;
}
}
}
} ## end elsif ( $method == 4 )
} ## end foreach my $feature ( @{$features...
if ( $method == 4 ) {
# ------------------------------------------------------------------
# For the 'coverage' method: Finish up by going through @bin_masks
# and sum up the arrays.
for ( my $bin_index = 0 ; $bin_index < $nbins ; ++$bin_index ) {
if ( defined( $bin_masks[$bin_index] ) ) {
if ( !ref( $bin_masks[$bin_index] ) ) {
$bins[$bin_index] = 1;
} else {
$bins[$bin_index] =
scalar( grep ( defined($_), @{ $bin_masks[$bin_index] } ) )/
$bin_length;
}
}
}
}
return \@bins;
} ## end sub _bin_features
1;
# $Id$
package Bio::EnsEMBL::Collection::Exon;
use strict;
use warnings;
use Bio::EnsEMBL::Utils::Argument ('rearrange');
use Bio::EnsEMBL::Utils::Exception ('throw');
use base( 'Bio::EnsEMBL::Collection',
'Bio::EnsEMBL::DBSQL::ExonAdaptor' );
sub _create_feature {
my ( $this, $feature_type, $args ) = @_;
my $feature = $this->SUPER::_create_feature( $feature_type, $args );
if ( !$this->_lightweight() ) {
my ( $analysis, $stable_id, $version ) =
rearrange( [ 'STABLE_ID', 'VERSION', ], %{$args} );
push( @{$feature}, $stable_id, $version );
}
return $feature;
}
sub _create_feature_fast {
my ( $this, $feature_type, $args ) = @_;
my $feature =
$this->SUPER::_create_feature_fast( $feature_type, $args );
return $feature;
}
sub _tables {
my ($this) = @_;
my @tables = $this->SUPER::_tables();
if ( $this->_lightweight() ) {
return ( $tables[0] );
}
return @tables;
}
sub _columns {
my ($this) = @_;
my @columns = $this->SUPER::_columns();
if ( $this->_lightweight() ) {
@columns[ 5 .. $#columns ] = map( 1, 5 .. $#columns );
}
return @columns;
}
sub _default_where_clause {
my ($this) = @_;
if ( $this->_lightweight() ) {
return '';
}
return $this->SUPER::_default_where_clause();
}
1;
# $Id$
package Bio::EnsEMBL::Collection::RepeatFeature;
use strict;
use warnings;
use Bio::EnsEMBL::Utils::Argument ('rearrange');
use Bio::EnsEMBL::Utils::Exception ('throw');
use base( 'Bio::EnsEMBL::Collection',
'Bio::EnsEMBL::DBSQL::RepeatFeatureAdaptor' );
sub _create_feature {
my ( $this, $feature_type, $args ) = @_;
my $feature = $this->SUPER::_create_feature( $feature_type, $args );
if ( !$this->_lightweight() ) {
my ( $hstart, $hend, $score, $repeat_consensus, $analysis ) =
rearrange( [ 'HSTART', 'HEND',
'SCORE', 'REPEAT_CONSENSUS',
'ANALYSIS'
],
@{$args} );
push( @{$feature},
$hstart, $hend, $score, $repeat_consensus->dbID(),
$analysis->dbID() );
}
return $feature;
}
sub _create_feature_fast {
my ( $this, $feature_type, $args ) = @_;
my $feature =
$this->SUPER::_create_feature_fast( $feature_type, $args );
if ( !$this->_lightweight() ) {
push( @{$feature},
$args->{'hstart'}, $args->{'hend'}, $args->{'score'},
$args->{'repeat_consensus'}->dbID(),
$args->{'analysis'}->dbID() );
}
return $feature;
}
sub _tables {
my ($this) = @_;
my @tables = $this->SUPER::_tables();
if ( $this->_lightweight() ) {
return ( $tables[0] );
}
return @tables;
}
sub _columns {
my ($this) = @_;
my @columns = $this->SUPER::_columns();
if ( $this->_lightweight() ) {
@columns[ 5 .. $#columns ] = map( 1, 5 .. $#columns );
}
return @columns;
}
sub _default_where_clause {
my ($this) = @_;
if ( $this->_lightweight() ) {
return '';
}
return $this->SUPER::_default_where_clause();
}
1;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment