Skip to content
Snippets Groups Projects
Commit b9242a6e authored by Arne Stabenau's avatar Arne Stabenau
Browse files

use region features, removed small scale gc features to save time

parent e5d968a4
No related branches found
No related tags found
No related merge requests found
...@@ -37,7 +37,8 @@ my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(-host => $host, ...@@ -37,7 +37,8 @@ my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(-host => $host,
-dbname => $dbname); -dbname => $dbname);
my $small_blocksize = 500; my $bin_count = 150;
my $long_slice_count = 100;
# #
...@@ -62,13 +63,7 @@ my $dta = $db->get_DensityTypeAdaptor(); ...@@ -62,13 +63,7 @@ my $dta = $db->get_DensityTypeAdaptor();
my $aa = $db->get_AnalysisAdaptor(); my $aa = $db->get_AnalysisAdaptor();
my $slices = $slice_adaptor->fetch_all( "toplevel" ); my $slices = $slice_adaptor->fetch_all( "toplevel" );
my @sorted_slices = sort { $b->seq_region_length() <=> $a->seq_region_length()} @$slices;
my ( $large_blocksize, $genome_size );
for my $slice ( @$slices ) {
$genome_size += $slice->length();
}
$large_blocksize = int( $genome_size / 4000 );
# #
...@@ -88,54 +83,61 @@ $aa->store($analysis); ...@@ -88,54 +83,61 @@ $aa->store($analysis);
# Create new density type. # Create new density type.
# #
my $small_density_type = Bio::EnsEMBL::DensityType->new
(-analysis => $analysis,
-block_size => $small_blocksize,
-value_type => 'ratio');
my $large_density_type = Bio::EnsEMBL::DensityType->new my $density_type = Bio::EnsEMBL::DensityType->new
(-analysis => $analysis, (-analysis => $analysis,
-block_size => $large_blocksize, -region_features => $bin_count,
-value_type => 'ratio'); -value_type => 'ratio');
$dta->store($small_density_type); $dta->store($density_type);
$dta->store($large_density_type);
my ( $current_start, $current_end ); my ( $current_start, $current_end, $current );
my $slice_count = 0;
my $block_size;
foreach my $slice ( @$slices ) { foreach my $slice (@sorted_slices){
# $block_size = $slice->length() / $bin_count;
# do it for small and large blocks
#
for my $density_type ( $large_density_type, $small_density_type ) { my @density_features=();
my $blocksize = $density_type->block_size(); print "GC percentage for ".$slice->seq_region_name().
$current_start = 1; " with block size $block_size\n";
while($current_start <= $slice->end()) { $current_end = 0;
$current_end = $current_start+$blocksize-1; $current = 0;
if( $current_end > $slice->end() ) {
$current_end = $slice->end();
}
my $sub_slice = $slice->sub_Slice( $current_start, $current_end ); while ($current_end < $slice->end()) {
my $gc = $sub_slice->get_base_count()->{'%gc'}; $current += $block_size;
my $df = Bio::EnsEMBL::DensityFeature->new $current_start = $current_end+1;
(-seq_region => $slice, $current_end = int( $current + 1 );
-start => $current_start,
-end => $current_end,
-density_type => $density_type,
-density_value => $gc);
$dfa->store($df); if ( $current_end < $current_start ) {
$current_end = $current_start;
}
$current_start = $current_end+1; if ( $current_end > $slice->end() ) {
$current_end = $slice->end();
} }
my $sub_slice = $slice->sub_Slice( $current_start , $current_end );
my $gc = $sub_slice->get_base_count()->{'%gc'};
my $df = Bio::EnsEMBL::DensityFeature->new
(-seq_region => $slice,
-start => $current_start,
-end => $current_end,
-density_type => $density_type,
-density_value => $gc);
$dfa->store($df);
} }
last if( $slice_count++ > $long_slice_count );
} }
...@@ -144,6 +146,7 @@ foreach my $slice ( @$slices ) { ...@@ -144,6 +146,7 @@ foreach my $slice ( @$slices ) {
# #
# helper to draw an ascii representation of the density features # helper to draw an ascii representation of the density features
# #
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment