From b9242a6ecd871f93e7d45723bddcbb99caf97124 Mon Sep 17 00:00:00 2001 From: Arne Stabenau <stabenau@sanger.ac.uk> Date: Thu, 24 Nov 2005 14:28:03 +0000 Subject: [PATCH] use region features, removed small scale gc features to save time --- .../density_feature/percent_gc_calc.pl | 81 ++++++++++--------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/misc-scripts/density_feature/percent_gc_calc.pl b/misc-scripts/density_feature/percent_gc_calc.pl index cff31e409e..d2d968e29e 100644 --- a/misc-scripts/density_feature/percent_gc_calc.pl +++ b/misc-scripts/density_feature/percent_gc_calc.pl @@ -37,7 +37,8 @@ my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(-host => $host, -dbname => $dbname); -my $small_blocksize = 500; +my $bin_count = 150; +my $long_slice_count = 100; # @@ -62,13 +63,7 @@ my $dta = $db->get_DensityTypeAdaptor(); my $aa = $db->get_AnalysisAdaptor(); my $slices = $slice_adaptor->fetch_all( "toplevel" ); - -my ( $large_blocksize, $genome_size ); -for my $slice ( @$slices ) { - $genome_size += $slice->length(); -} - -$large_blocksize = int( $genome_size / 4000 ); +my @sorted_slices = sort { $b->seq_region_length() <=> $a->seq_region_length()} @$slices; # @@ -88,54 +83,61 @@ $aa->store($analysis); # Create new density type. # -my $small_density_type = Bio::EnsEMBL::DensityType->new - (-analysis => $analysis, - -block_size => $small_blocksize, - -value_type => 'ratio'); -my $large_density_type = Bio::EnsEMBL::DensityType->new +my $density_type = Bio::EnsEMBL::DensityType->new (-analysis => $analysis, - -block_size => $large_blocksize, + -region_features => $bin_count, -value_type => 'ratio'); -$dta->store($small_density_type); -$dta->store($large_density_type); +$dta->store($density_type); -my ( $current_start, $current_end ); +my ( $current_start, $current_end, $current ); +my $slice_count = 0; +my $block_size; -foreach my $slice ( @$slices ) { +foreach my $slice (@sorted_slices){ -# -# do it for small and large blocks -# + $block_size = $slice->length() / $bin_count; - for my $density_type ( $large_density_type, $small_density_type ) { + my @density_features=(); - my $blocksize = $density_type->block_size(); - $current_start = 1; + print "GC percentage for ".$slice->seq_region_name(). + " with block size $block_size\n"; - while($current_start <= $slice->end()) { - $current_end = $current_start+$blocksize-1; - if( $current_end > $slice->end() ) { - $current_end = $slice->end(); - } + $current_end = 0; + $current = 0; - my $sub_slice = $slice->sub_Slice( $current_start, $current_end ); + while ($current_end < $slice->end()) { - my $gc = $sub_slice->get_base_count()->{'%gc'}; - my $df = Bio::EnsEMBL::DensityFeature->new - (-seq_region => $slice, - -start => $current_start, - -end => $current_end, - -density_type => $density_type, - -density_value => $gc); + $current += $block_size; + $current_start = $current_end+1; + $current_end = int( $current + 1 ); - $dfa->store($df); + if ( $current_end < $current_start ) { + $current_end = $current_start; + } - $current_start = $current_end+1; + if ( $current_end > $slice->end() ) { + $current_end = $slice->end(); } + + + my $sub_slice = $slice->sub_Slice( $current_start , $current_end ); + + my $gc = $sub_slice->get_base_count()->{'%gc'}; + my $df = Bio::EnsEMBL::DensityFeature->new + (-seq_region => $slice, + -start => $current_start, + -end => $current_end, + -density_type => $density_type, + -density_value => $gc); + + $dfa->store($df); + } + + last if( $slice_count++ > $long_slice_count ); } @@ -144,6 +146,7 @@ foreach my $slice ( @$slices ) { + # # helper to draw an ascii representation of the density features # -- GitLab