Skip to content
Snippets Groups Projects
Commit 8b658086 authored by Monika Komorowska's avatar Monika Komorowska
Browse files

Don't delete the analysis and analysis description entries, update analysis creation date

parent 06430119
No related branches found
No related tags found
No related merge requests found
......@@ -17,6 +17,7 @@ use Bio::EnsEMBL::Analysis;
use Bio::EnsEMBL::DensityType;
use Bio::EnsEMBL::DensityFeature;
use Getopt::Long;
use Bio::EnsEMBL::Utils::ConversionSupport;
my ( $host, $user, $pass, $port, $dbname );
......@@ -52,14 +53,12 @@ if( ! $dna_count ) {
}
print STDOUT "Deleting old PercentGC features\n";
$sth = $db->dbc->prepare(
qq(
DELETE df, dt, a, ad
FROM density_feature df, density_type dt, analysis a, analysis_description ad
DELETE df, dt
FROM density_feature df, density_type dt, analysis a
WHERE a.analysis_id=dt.analysis_id
AND ad.analysis_id = a.analysis_id
AND dt.density_type_id=df.density_type_id
AND a.logic_name='percentgc') );
$sth->execute();
......@@ -79,29 +78,20 @@ my @sorted_slices =
|| $b->seq_region_length() <=> $a->seq_region_length()
} @{ $slice_adaptor->fetch_all('toplevel') } );
#
# Create new analysis object for density calculation.
# Update creation date of analysis.
#
my $analysis =
new Bio::EnsEMBL::Analysis(
-program => "percent_gc_calc.pl",
-database => "ensembl",
-gff_source => "percent_gc_calc.pl",
-gff_feature => "density",
-logic_name => "percentgc",
-description => 'Percentage of G/C bases in the sequence.',
-display_label => 'GC content',
-displayable => 1 );
$aa->store($analysis);
my $support = 'Bio::EnsEMBL::Utils::ConversionSupport';
my $analysis = $aa->fetch_by_logic_name('percentgc');
$analysis->created($support->date());
$aa->update($analysis);
#
# Create new density type.
#
my $density_type = Bio::EnsEMBL::DensityType->new
(-analysis => $analysis,
-region_features => $bin_count,
......@@ -210,7 +200,7 @@ from longest to shortest. Each slice is divided into 150 bins
calculated.
Input data: dna sequence, top level seq regions
Output tables: analysis (logic_name: percentgc), analysis description,
Output tables: updates analysis creation date,
density_type, density_feature
......
......@@ -16,6 +16,7 @@ use Bio::EnsEMBL::DensityType;
use Bio::EnsEMBL::DensityFeature;
use Bio::EnsEMBL::Mapper::RangeRegistry;
use Bio::EnsEMBL::Utils::Exception qw(warning throw);
use Bio::EnsEMBL::Utils::ConversionSupport;
use POSIX;
......@@ -66,7 +67,7 @@ if( ! $repeat_count ) {
#
print STDOUT "Deleting old PercentageRepeat features\n";
$sth = $db->dbc->prepare("DELETE df, dt, a, ad FROM analysis_description ad, density_feature df, density_type dt, analysis a WHERE ad.analysis_id = a.analysis_id AND a.analysis_id=dt.analysis_id AND dt.density_type_id=df.density_type_id AND a.logic_name='rercentagerepeat'");
$sth = $db->dbc->prepare("DELETE df, dt FROM density_feature df, density_type dt, analysis a WHERE a.analysis_id=dt.analysis_id AND dt.density_type_id=df.density_type_id AND a.logic_name='percentagerepeat'");
$sth->execute();
......@@ -77,22 +78,11 @@ my $aa = $db->get_AnalysisAdaptor();
#
# Create new analysis object for density calculation.
# Update creation date of analysis.
#
my $analysis =
new Bio::EnsEMBL::Analysis(
-program => "repeat_coverage_calc.pl",
-database => "ensembl",
-gff_source => "repeat_coverage_calc.pl",
-gff_feature => "density",
-logic_name => "percentagerepeat",
-description =>
'Percentage of repetitive elements for top level sequences (such as chromosomes, scaffolds, etc.)',
-display_label => 'Repeats (percent)',
-displayable => 1 );
$aa->store($analysis);
my $support = 'Bio::EnsEMBL::Utils::ConversionSupport';
my $analysis = $aa->fetch_by_logic_name('percentagerepeat');
$analysis->created($support->date());
$aa->update($analysis);
my $slices = $slice_adaptor->fetch_all( "toplevel" );
......@@ -294,7 +284,7 @@ sub_slice within the 1 MB, calculate the %repeat for that sub_slice.
Variable repeats are only found for the 100 longest toplevel slices.
Input data: repeat features, top level seq regions
Output tables: analysis (logic_name: percentagerepeat), analysis description,
Output tables: updates analysis creation date,
density_type (two entries, one for small_density type of
length 1 KB and one for variable_density_type of length 1MB),
density_feature
......
......@@ -4,10 +4,9 @@
use strict;
use Bio::EnsEMBL::Registry;
use Getopt::Long;
use Data::Dumper;
use Bio::EnsEMBL::Utils::ConversionSupport;
$Data::Dumper::Maxdepth = 2;
my $max_slices = 100;
......@@ -38,8 +37,6 @@ my $variation_feature_adaptor = $reg->get_adaptor($species, "variation", "Variat
# TODO - variation from registry
# Clean up old features first. Also remove analysis and density type entry as these are recreated
#my $sth = $slice_adaptor->dbc->prepare("DELETE df, dt, a, ad FROM analysis_description ad, density_feature df, density_type dt, analysis a WHERE ad.analysis_id = a.analysis_id AND a.analysis_id=dt.analysis_id AND dt.density_type_id=df.density_type_id AND a.logic_name='snpdensity'");
# release 63: do not delete analysis, as this is synchronized with production!
my $sth = $slice_adaptor->dbc->prepare("DELETE df, dt FROM analysis_description ad, density_feature df, density_type dt, analysis a WHERE ad.analysis_id = a.analysis_id AND a.analysis_id=dt.analysis_id AND dt.density_type_id=df.density_type_id AND a.logic_name='snpdensity'");
......@@ -56,17 +53,9 @@ my @sorted_slices = sort( {
my $analysis = $analysis_adaptor->fetch_by_logic_name('snpdensity');
# new Bio::EnsEMBL::Analysis(
# -program => "variation_density.pl",
# -database => "ensembl",
# -gff_source => "variation_density.pl",
# -gff_feature => "density",
# -logic_name => "snpdensity",
# -description => 'Density of Single Nucleotide Polymorphisms (SNPs) calculated by variation_density.pl (see scripts at the <a rel="external" href="http://cvs.sanger.ac.uk/cgi-bin/viewvc.cgi/?root=ensembl">Sanger Institute CVS</a> repository).',
# -display_label => 'SNP Density',
# -displayable => 1 );
#$analysis_adaptor->store($analysis);
#$analysis_adaptor->update($analysis);
my $support = 'Bio::EnsEMBL::Utils::ConversionSupport';
$analysis->created($support->date());
$analysis_adaptor->update($analysis);
# Create and store new density type
......@@ -150,7 +139,7 @@ Attach variation db if exists. All toplevel slices are fetched.
For each slice, count the number of SNPs.
Input data: top level seq regions, variation db
Output tables: analysis, analysis_description, density_feature, density_type
Output tables: updates analysis creation date, density_feature, density_type
The script requires ensembl-variation in perl5lib.
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment