Skip to content
Snippets Groups Projects
Commit 5633b94e authored by Arne Stabenau's avatar Arne Stabenau
Browse files

use region features

parent b9242a6e
No related branches found
No related tags found
No related merge requests found
......@@ -5,9 +5,12 @@
#
# It will only run on databases with genes ...
# boundary condition: on average there should be 2 genes per block
#
# I think the right thing here is too generate densities on the biggest 50-100 toplevel slices ...
# The website will be happy with about 150 bins I think.
use strict;
use Bio::EnsEMBL::DBSQL::DBAdaptor;
......@@ -15,6 +18,10 @@ use Bio::EnsEMBL::DensityType;
use Bio::EnsEMBL::DensityFeature;
use Getopt::Long;
my $bin_count = 150;
my $long_slice_count = 100;
my ( $host, $user, $pass, $port, $dbname );
$port = 3306 ;
......@@ -49,8 +56,6 @@ my ( $gene_count ) = $sth->fetchrow_array();
if( ! $gene_count ) {
print STDERR "No gene density for $dbname.\n";
exit();
} else {
$block_count = $gene_count >> 1;
}
#
......@@ -68,9 +73,9 @@ if( ! $seq_region_count ) {
print STDERR "No gene density for $dbname, no seq_regions.\n";
exit();
if( ($dbname =~ /_estgene_/ ) || ( $dbname =~ /_vega_/ )) {
if( ($dbname =~ /_est_/ ) || ( $dbname =~ /_vega_/ ) || ( $dbname =~ /_cdna_/ ) ) {
my $dna_db_name = $dbname;
$dna_db_name =~ s/(_estgene_|_vega_)/_core_/;
$dna_db_name =~ s/(_estgene_|_vega_|_cdna_)/_core_/;
my $dna_db = new Bio::EnsEMBL::DBSQL::DBAdaptor
(-host => $host,
-user => $user,
......@@ -99,11 +104,8 @@ my $slice_adaptor = $db->get_SliceAdaptor();
#
my $top_slices = $slice_adaptor->fetch_all('toplevel');
for my $slice ( @$top_slices ) {
$genome_size += $slice->length();
}
my @sorted_slices = sort { $b->seq_region_length() <=> $a->seq_region_length()} @$top_slices;
$block_size = int( $genome_size / $block_count );
my $analysis = new Bio::EnsEMBL::Analysis (-program => "gene_density_calc.pl",
-database => "ensembl",
......@@ -142,30 +144,40 @@ foreach my $known (1, 0) {
#
my $dt = Bio::EnsEMBL::DensityType->new(-analysis => $analysis,
-block_size => $block_size,
-region_features => $bin_count,
-value_type => 'sum');
$dta->store($dt);
my ( $current_start, $current_end );
foreach my $slice (@$top_slices){
my $slice_count = 0;
my ( $current, $current_start, $current_end );
foreach my $slice (@sorted_slices){
$current_start = 1;
$block_size = $slice->length() / $bin_count;
my @density_features=();
print "Gene densities for ".$slice->seq_region_name().
" with block size $block_size\n";
$current_end = 0;
$current = 0;
while($current_end < $slice->end()) {
$current += $block_size;
$current_start = $current_end+1;
$current_end = int( $current + 1 );
if( $current_end < $current_start ) {
$current_end = $current_start;
}
while($current_start <= $slice->end()) {
$current_end = $current_start+$block_size-1;
if( $current_end > $slice->end() ) {
$current_end = $slice->end();
}
my $sub_slice = $slice->sub_Slice( $current_start, $current_end );
my $sub_slice = $slice->sub_Slice( $current_start , $current_end );
my $count =0;
......@@ -185,13 +197,12 @@ foreach my $known (1, 0) {
-end => $current_end,
-density_type => $dt,
-density_value => $count);
$current_start = $current_end + 1;
# print STDERR ".";
}
$dfa->store(@density_features);
print "Created ", scalar @density_features, " gene density features.\n";
# print_features(\@density_features);
last if( $slice_count++ > $long_slice_count );
}
}
......
#
# script to calculate the snp density with help of
# attached lite database. Only works if argument database
# is a core database and lite can be found by substituting
# s/_core_/_lite_/
# calculates the variation density from given core database
# It finds Variation database by itself using naming convention s/core/variation/
#
# blocksize condition is 4_000 per genome?
#
use strict;
......@@ -18,6 +17,9 @@ use Getopt::Long;
use Data::Dumper;
$Data::Dumper::Maxdepth = 2;
my $bin_count = 150;
my $long_slice_count = 100;
my ( $host, $user, $pass, $port, $dbname );
my ( $block_count, $genome_size, $block_size );
......@@ -53,14 +55,9 @@ my $aa = $db->get_AnalysisAdaptor();
my $slice_adaptor = $db->get_SliceAdaptor();
my $top_slices = $slice_adaptor->fetch_all( "toplevel" );
my @sorted_slices = sort { $b->seq_region_length() <=> $a->seq_region_length()} @$top_slices;
my ( $block_size, $genome_size );
for my $slice ( @$top_slices ) {
$genome_size += $slice->length();
}
$block_size = int( $genome_size / 4000 );
my $analysis = new Bio::EnsEMBL::Analysis (-program => "variation_density.pl",
-database => "ensembl",
......@@ -75,7 +72,7 @@ $aa->store( $analysis );
# Create new density type.
#
my $dt = Bio::EnsEMBL::DensityType->new(-analysis => $analysis,
-block_size => $block_size,
-region_features => $bin_count,
-value_type => 'sum');
$dta->store($dt);
......@@ -86,17 +83,32 @@ $dta->store($dt);
my ( $current_start, $current_end );
my $slice_count = 0;
my ( $current, $current_start, $current_end );
foreach my $slice (@$top_slices){
foreach my $slice (@sorted_slices){
$block_size = $slice->length() / $bin_count;
$current_start = 1;
print "SNP densities for ".$slice->seq_region_name().
" with block size $block_size\n";
while($current_start <= $slice->end()) {
$current_end = $current_start+$block_size-1;
$current_end = 0;
$current = 0;
while($current_end < $slice->end()) {
$current += $block_size;
$current_start = $current_end+1;
$current_end = int( $current + 1 );
if( $current_end < $current_start ) {
$current_end = $current_start;
}
if( $current_end > $slice->end() ) {
$current_end = $slice->end();
}
......@@ -111,7 +123,7 @@ foreach my $slice (@$top_slices){
#
foreach my $varf (@{$sub_slice->get_all_VariationFeatures()}){
if( $varf->start >= 1 ) {
$count++
$count++
}
}
......@@ -122,10 +134,11 @@ foreach my $slice (@$top_slices){
-density_type => $dt,
-density_value => $count);
$current_start = $current_end + 1;
$dfa->store($df);
}
last if( $slice_count++ > $long_slice_count );
}
......@@ -142,7 +155,7 @@ sub variation_attach {
return 0;
}
#
# get a lost of all databases on that server
# get a list of all databases on that server
#
my $sth = $db->dbc->prepare( "show databases" );
$sth->execute();
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment