Newer
Older
Andreas Kusalananda Kähäri
committed
Glenn Proctor
committed
# Calculates the variation density from given core database
use strict;
use Bio::EnsEMBL::DBSQL::DBAdaptor;
use Bio::EnsEMBL::DensityType;
use Bio::EnsEMBL::DensityFeature;
use Bio::EnsEMBL::Variation::DBSQL::DBAdaptor;
use Getopt::Long;
use Data::Dumper;
$Data::Dumper::Maxdepth = 2;
Andreas Kusalananda Kähäri
committed
my $max_slices = 100;
Glenn Proctor
committed
my $bin_count = 150;
Glenn Proctor
committed
my ($host, $user, $pass, $port, $species);
Glenn Proctor
committed
my ($block_count, $genome_size, $block_size );
Glenn Proctor
committed
GetOptions( "host=s", \$host,
"user=s", \$user,
"pass=s", \$pass,
"port=i", \$port,
"species=s", \$species );
Glenn Proctor
committed
Bio::EnsEMBL::Registry->load_registry_from_db(-host => $host, -user => $user, -pass => $pass, -port => $port);
Glenn Proctor
committed
my $density_feature_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "DensityFeature") || die "Can't create density feature adaptor";
my $density_type_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "DensityType") || die "Can't create density type adaptor";
my $analysis_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "analysis") || die "Can't create analysis adaptor";
my $slice_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "slice") || die "Can't create slice adaptor";
Glenn Proctor
committed
my $variation_feature_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "variation", "VariationFeature") || die "Can't create variation feature adaptor";
Glenn Proctor
committed
# TODO - variation from registry
Glenn Proctor
committed
# Clean up old features first. Also remove analysis and density type entry as these are recreated
my $sth = $slice_adaptor->dbc->prepare("DELETE df, dt, a, ad FROM analysis_description ad, density_feature df, density_type dt, analysis a WHERE ad.analysis_id = a.analysis_id AND a.analysis_id=dt.analysis_id AND dt.density_type_id=df.density_type_id AND a.logic_name='snpdensity'");
Glenn Proctor
committed
# Sort slices by coordinate system rank, then by length
Andreas Kusalananda Kähäri
committed
my @sorted_slices = sort( {
$a->coord_system()->rank() <=> $b->coord_system()->rank()
|| $b->seq_region_length() <=> $a->seq_region_length()
} @{ $slice_adaptor->fetch_all('toplevel') } );
my $analysis =
new Bio::EnsEMBL::Analysis(
-program => "variation_density.pl",
-database => "ensembl",
-gff_source => "variation_density.pl",
-gff_feature => "density",
-logic_name => "snpdensity",
-description => 'Density of Single Nucleotide Polymorphisms (SNPs) calculated by variation_density.pl (see scripts at the <a rel="external" href="http://cvs.sanger.ac.uk/cgi-bin/viewvc.cgi/?root=ensembl">Sanger Institute CVS</a> repository).',
-display_label => 'SNP Density',
-displayable => 1 );
Glenn Proctor
committed
$analysis_adaptor->store($analysis);
$analysis_adaptor->update($analysis);
Glenn Proctor
committed
# Create and store new density type
Andreas Kusalananda Kähäri
committed
Glenn Proctor
committed
my $dt = Bio::EnsEMBL::DensityType->new(-analysis => $analysis,
Glenn Proctor
committed
-value_type => 'sum');
$density_type_adaptor->store($dt);
# Now the actual feature calculation loop
Glenn Proctor
committed
my ($current, $current_start, $current_end);
Glenn Proctor
committed
# prepare statement outside of loop
$sth = $variation_feature_adaptor->prepare("SELECT COUNT(*) FROM variation_feature WHERE seq_region_id = ? AND seq_region_start < ? AND seq_region_end > ?");
foreach my $slice (@sorted_slices){
$block_size = $slice->length() / $bin_count;
Glenn Proctor
committed
print "Calculating SNP densities for ". $slice->seq_region_name() . " with block size $block_size\n";
Susan Fairley
committed
while($current_end < $slice->length) {
$current += $block_size;
$current_start = $current_end+1;
$current_end = int( $current + 1 );
Glenn Proctor
committed
if ($current_end < $current_start) {
Glenn Proctor
committed
if ($current_end > $slice->end()) {
$current_end = $slice->end();
}
Glenn Proctor
committed
# Get count of SNPs in this region
$sth->execute($slice->get_seq_region_id(), $current_end, $current_start);
my $count = ($sth->fetchrow_array())[0];
Glenn Proctor
committed
my $df = Bio::EnsEMBL::DensityFeature->new(-seq_region => $slice,
-start => $current_start,
-end => $current_end,
-density_type => $dt,
-density_value => $count);
$density_feature_adaptor->store($df);
Andreas Kusalananda Kähäri
committed
last if ( $slice_count++ > $max_slices );