Skip to content
Snippets Groups Projects
Commit 2c59e354 authored by Steve Trevanion's avatar Steve Trevanion
Browse files

only study core, est, cdna and coreexpression databases, better checking of...

only study core, est, cdna and coreexpression databases, better checking of analysis.description file
parent 0a39642c
No related branches found
No related tags found
No related merge requests found
......@@ -48,7 +48,10 @@
if you want to update all databases for a type
perl load_analysis_descriptions.pl -dbhost my_host -dbuser user -dbpass *****
-pattern '%core_55_%' -file analysis.descriptions
-pattern '%_55_%' -file analysis.descriptions > & load_analysis_descriptions.log
to identify logic_names mising from the definition file:
grep 'MSG' load_analysis_descriptions.log
=cut
......@@ -56,6 +59,7 @@ use strict;
use Data::Dumper;
use Getopt::Long;
use DBI;
use Bio::EnsEMBL::Utils::Exception qw(warning throw);
use Bio::EnsEMBL::DBSQL::DBAdaptor;
use Bio::EnsEMBL::Gene;
......@@ -74,25 +78,26 @@ my $noupdate;
my $help = 0;
my $pattern;
&GetOptions (
'host|dbhost=s' => \$dbhost,
'dbname=s' => \$dbname,
'user|dbuser=s' => \$dbuser,
'pass|dbpass=s' => \$dbpass,
'port|dbport=s' => \$dbport,
'file|descriptions=s' => \@files,
'noupdate' => \$noupdate,
'pattern=s' => \$pattern,
'h|help!' => \$help
);
'host|dbhost=s' => \$dbhost,
'dbname=s' => \$dbname,
'user|dbuser=s' => \$dbuser,
'pass|dbpass=s' => \$dbpass,
'port|dbport=s' => \$dbport,
'file|descriptions=s' => \@files,
'noupdate' => \$noupdate,
'pattern=s' => \$pattern,
'h|help!' => \$help
);
if (!$dbhost){
print ("Need to pass a dbhost\n");
$help =1;
print ("Need to pass a dbhost\n");
$help =1;
}
if (!$dbname and !$pattern){
print("Need to enter either a database name in -dbname or a pattern in -pattern\n");
$help = 1;
print("Need to enter either a database name in -dbname or a pattern in -pattern\n");
$help = 1;
}
unless(@files){
......@@ -110,7 +115,7 @@ if($help){
#connect to database
$dsn = "DBI:mysql:host=" . $dbhost . ";port=" . $dbport;
eval{
eval{
$dbh = DBI->connect($dsn, $dbuser, $dbpass,
{'RaiseError' => 1,
'PrintError' => 0});
......@@ -124,92 +129,100 @@ $sth = $dbh->prepare($sql);
$sth->execute;
while (my ($dbname) = $sth->fetchrow_array){
print "Looking at ... $dbname\n";
my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(
-host => $dbhost,
-user => $dbuser,
-dbname => $dbname,
-pass => $dbpass,
-port => $dbport
);
# Pre-fetch all analyses in the database
my $aa = $db->get_AnalysisAdaptor();
my $analyses = $aa->fetch_all();
my (%hash,%reference);
foreach my $analysis(@$analyses){
$hash{lc($analysis->logic_name())} = $analysis;
}
# Parse the description files
foreach my $file( @files ){
open(FH, $file) or throw("Failed to open $file $@");
next unless $dbname =~ /core|cdna|otherfeatures/;
next if $dbname =~ /coreexpression/;
print "\n\nLooking at ... $dbname\n";
my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(
-host => $dbhost,
-user => $dbuser,
-dbname => $dbname,
-pass => $dbpass,
-port => $dbport
);
# Pre-fetch all analyses in the database
my $aa = $db->get_AnalysisAdaptor();
my $analyses = $aa->fetch_all();
my (%hash,%reference);
foreach my $analysis(@$analyses){
$hash{lc($analysis->logic_name())} = $analysis;
}
# Parse the description files and check formatting
foreach my $file( @files ){
open(FH, $file) or throw("Failed to open $file $@");
LINE: while(my $row = <FH>){
chomp($row);
next if ($row =~ /^\#/); # skip comments
next if ($row =~ /^$/); # and blank lines
next if ($row =~ /^\s+$/); # and whitespace-only lines
my ($nr, $logic_name, $description, $display_label, $displayable, $web_data) = split(/\t/, $row);
#print join("\t", $logic_name, $description, $display_label, $displayable, $web_data), "\n";
$reference{lc($logic_name)} = {
nr => $nr,
description => $description || '',
display_label => $display_label || '',
displayable => $displayable || '',
web_data => $web_data || '',
};
LINE: while(my $row = <FH>){
chomp($row);
next if ($row =~ /^\#/); # skip comments
next if ($row =~ /^$/); # and blank lines
next if ($row =~ /^\s+$/); # and whitespace-only lines
my ($nr, $logic_name, $description, $display_label, $displayable, $web_data) = split(/\t/, $row);
# print join("\t", $logic_name, $description, $display_label, $displayable, $web_data), "\n";
unless ($logic_name && defined($displayable)) {
throw("Please check description file entry for logic_name $logic_name (" . join("\n", $logic_name, $description, $display_label, $displayable, $web_data) . ")");
exit;
}
unless (defined $displayable){
throw("In the analysis_description file, logic name '$logic_name' should contain, at least, 5 columns: Number, logic_name, description, display_label and displayable. Fix it !!");
}
unless ($displayable =~ m/^[01]$/) {
throw("Displayable flag for analysis '$logic_name' has to be either 0 or 1, but not '$displayable'!");
}
$reference{lc($logic_name)} = {
nr => $nr,
description => $description || '',
display_label => $display_label || '',
displayable => $displayable || '',
web_data => $web_data || '',
};
$description =~ s/^\s+//;
$description =~ s/\s+$//;
$description =~ s/^\s+//;
$description =~ s/\s+$//;
next if not $description;
next if not $description;
if (exists $hash{lc($logic_name)}) {
if (exists $hash{lc($logic_name)}) {
my $analysis = $hash{lc($logic_name)};
my $analysis = $hash{lc($logic_name)};
$analysis->description($description);
$analysis->displayable($displayable);
$analysis->display_label($display_label);
$web_data ? $analysis->web_data($aa->get_dumped_data($web_data)) : $analysis->{_web_data} = undef;
$analysis->description($description);
$analysis->displayable($displayable);
$analysis->display_label($display_label);
$web_data ? $analysis->web_data($aa->get_dumped_data($web_data)) : $analysis->{_web_data} = undef;
# print Dumper $analysis->web_data();
unless ( $noupdate ) {
$aa->update($analysis) ;
}
unless ( $noupdate ) {
$aa->update($analysis) ;
}
delete $hash{lc($logic_name)};
}
delete $hash{lc($logic_name)};
}
close(FH) or throw("Failed to close $file $@");
}
close(FH) or throw("Failed to close $file $@");
}
if ( scalar(keys %hash)==0) {
unless ($noupdate) {
print STDERR "\nAll analysis descriptions have been updated, every analysis has a description now\n";
} else {
print STDERR "\nEvery analysis has a description in the file, all analysis descriptions can be updated.\n".
"To write analysis descriptions to the analysis_description table in your DB,\n".
"please run this script including the -update option on the commandline\n";
}
}
else {
foreach my $ln (keys %hash) {
warning ("Analysis '$ln' doesn't exist in reference file(s) '"
. join( "','", @files )
. "'! It needs to be added first")
unless (exists $reference{$ln});
warning "[$dbname] No description was found for logic_name '$ln':\n".
"\tref:\t display_label='".$reference{$ln}{display_label}."'; displayable=".$reference{$ln}{displayable}."; nr=".$reference{$ln}{nr}."\n".
"\tdb: \t display_label='".$hash{$ln}->display_label."'; displayable=".$hash{$ln}->displayable."; dbID=".$hash{$ln}->dbID."\n";
}
if ( scalar(keys %hash)==0) {
unless ($noupdate) {
print STDERR "\nAll analysis descriptions have been updated, every analysis has a description now\n";
} else {
print STDERR "\nEvery analysis has a description in the file, all analysis descriptions can be updated.\n".
"To write analysis descriptions to the analysis_description table in your DB,\n".
"please run this script excluding the -noupdate option on the commandline\n";
}
}
else {
foreach my $ln (keys %hash) {
unless (exists $reference{$ln}) {
warning ("[$dbname]: Analysis '$ln' doesn't exist in reference file(s) '"
. join( "','", @files )
. "'! It needs to be added first");
}
}
}
}
sub usage{
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment