diff --git a/misc-scripts/analysis_description/load_analysis_descriptions.pl b/misc-scripts/analysis_description/load_analysis_descriptions.pl index 56085ba63e0316b19b8e4b2156c8149cdfe7d81e..b0eb7a5f81aae2e0f63a7e2854a04b394e2e088e 100644 --- a/misc-scripts/analysis_description/load_analysis_descriptions.pl +++ b/misc-scripts/analysis_description/load_analysis_descriptions.pl @@ -48,7 +48,10 @@ if you want to update all databases for a type perl load_analysis_descriptions.pl -dbhost my_host -dbuser user -dbpass ***** - -pattern '%core_55_%' -file analysis.descriptions + -pattern '%_55_%' -file analysis.descriptions > & load_analysis_descriptions.log + +to identify logic_names mising from the definition file: +grep 'MSG' load_analysis_descriptions.log =cut @@ -56,6 +59,7 @@ use strict; use Data::Dumper; use Getopt::Long; use DBI; + use Bio::EnsEMBL::Utils::Exception qw(warning throw); use Bio::EnsEMBL::DBSQL::DBAdaptor; use Bio::EnsEMBL::Gene; @@ -74,25 +78,26 @@ my $noupdate; my $help = 0; my $pattern; + &GetOptions ( - 'host|dbhost=s' => \$dbhost, - 'dbname=s' => \$dbname, - 'user|dbuser=s' => \$dbuser, - 'pass|dbpass=s' => \$dbpass, - 'port|dbport=s' => \$dbport, - 'file|descriptions=s' => \@files, - 'noupdate' => \$noupdate, - 'pattern=s' => \$pattern, - 'h|help!' => \$help - ); + 'host|dbhost=s' => \$dbhost, + 'dbname=s' => \$dbname, + 'user|dbuser=s' => \$dbuser, + 'pass|dbpass=s' => \$dbpass, + 'port|dbport=s' => \$dbport, + 'file|descriptions=s' => \@files, + 'noupdate' => \$noupdate, + 'pattern=s' => \$pattern, + 'h|help!' => \$help +); if (!$dbhost){ - print ("Need to pass a dbhost\n"); - $help =1; + print ("Need to pass a dbhost\n"); + $help =1; } if (!$dbname and !$pattern){ - print("Need to enter either a database name in -dbname or a pattern in -pattern\n"); - $help = 1; + print("Need to enter either a database name in -dbname or a pattern in -pattern\n"); + $help = 1; } unless(@files){ @@ -110,7 +115,7 @@ if($help){ #connect to database $dsn = "DBI:mysql:host=" . $dbhost . ";port=" . $dbport; -eval{ +eval{ $dbh = DBI->connect($dsn, $dbuser, $dbpass, {'RaiseError' => 1, 'PrintError' => 0}); @@ -124,92 +129,100 @@ $sth = $dbh->prepare($sql); $sth->execute; while (my ($dbname) = $sth->fetchrow_array){ - print "Looking at ... $dbname\n"; - my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor( - -host => $dbhost, - -user => $dbuser, - -dbname => $dbname, - -pass => $dbpass, - -port => $dbport - ); - -# Pre-fetch all analyses in the database - my $aa = $db->get_AnalysisAdaptor(); - my $analyses = $aa->fetch_all(); - my (%hash,%reference); - foreach my $analysis(@$analyses){ - $hash{lc($analysis->logic_name())} = $analysis; - } - -# Parse the description files - foreach my $file( @files ){ - open(FH, $file) or throw("Failed to open $file $@"); + next unless $dbname =~ /core|cdna|otherfeatures/; + next if $dbname =~ /coreexpression/; + print "\n\nLooking at ... $dbname\n"; + my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor( + -host => $dbhost, + -user => $dbuser, + -dbname => $dbname, + -pass => $dbpass, + -port => $dbport + ); + + # Pre-fetch all analyses in the database + my $aa = $db->get_AnalysisAdaptor(); + my $analyses = $aa->fetch_all(); + my (%hash,%reference); + foreach my $analysis(@$analyses){ + $hash{lc($analysis->logic_name())} = $analysis; + } + +# Parse the description files and check formatting + foreach my $file( @files ){ + open(FH, $file) or throw("Failed to open $file $@"); - LINE: while(my $row = <FH>){ - - chomp($row); - - next if ($row =~ /^\#/); # skip comments - next if ($row =~ /^$/); # and blank lines - next if ($row =~ /^\s+$/); # and whitespace-only lines - - my ($nr, $logic_name, $description, $display_label, $displayable, $web_data) = split(/\t/, $row); - #print join("\t", $logic_name, $description, $display_label, $displayable, $web_data), "\n"; - - $reference{lc($logic_name)} = { - nr => $nr, - description => $description || '', - display_label => $display_label || '', - displayable => $displayable || '', - web_data => $web_data || '', - }; + LINE: while(my $row = <FH>){ + chomp($row); + next if ($row =~ /^\#/); # skip comments + next if ($row =~ /^$/); # and blank lines + next if ($row =~ /^\s+$/); # and whitespace-only lines + + my ($nr, $logic_name, $description, $display_label, $displayable, $web_data) = split(/\t/, $row); +# print join("\t", $logic_name, $description, $display_label, $displayable, $web_data), "\n"; + + unless ($logic_name && defined($displayable)) { + throw("Please check description file entry for logic_name $logic_name (" . join("\n", $logic_name, $description, $display_label, $displayable, $web_data) . ")"); + exit; + } + unless (defined $displayable){ + throw("In the analysis_description file, logic name '$logic_name' should contain, at least, 5 columns: Number, logic_name, description, display_label and displayable. Fix it !!"); + } + unless ($displayable =~ m/^[01]$/) { + throw("Displayable flag for analysis '$logic_name' has to be either 0 or 1, but not '$displayable'!"); + } + + $reference{lc($logic_name)} = { + nr => $nr, + description => $description || '', + display_label => $display_label || '', + displayable => $displayable || '', + web_data => $web_data || '', + }; - $description =~ s/^\s+//; - $description =~ s/\s+$//; + $description =~ s/^\s+//; + $description =~ s/\s+$//; - next if not $description; + next if not $description; - if (exists $hash{lc($logic_name)}) { + if (exists $hash{lc($logic_name)}) { - my $analysis = $hash{lc($logic_name)}; + my $analysis = $hash{lc($logic_name)}; - $analysis->description($description); - $analysis->displayable($displayable); - $analysis->display_label($display_label); - $web_data ? $analysis->web_data($aa->get_dumped_data($web_data)) : $analysis->{_web_data} = undef; + $analysis->description($description); + $analysis->displayable($displayable); + $analysis->display_label($display_label); + $web_data ? $analysis->web_data($aa->get_dumped_data($web_data)) : $analysis->{_web_data} = undef; # print Dumper $analysis->web_data(); - unless ( $noupdate ) { - $aa->update($analysis) ; - } + unless ( $noupdate ) { + $aa->update($analysis) ; + } - delete $hash{lc($logic_name)}; - } + delete $hash{lc($logic_name)}; } - close(FH) or throw("Failed to close $file $@"); } + close(FH) or throw("Failed to close $file $@"); + } - if ( scalar(keys %hash)==0) { - unless ($noupdate) { - print STDERR "\nAll analysis descriptions have been updated, every analysis has a description now\n"; - } else { - print STDERR "\nEvery analysis has a description in the file, all analysis descriptions can be updated.\n". - "To write analysis descriptions to the analysis_description table in your DB,\n". - "please run this script including the -update option on the commandline\n"; - } - } - else { - foreach my $ln (keys %hash) { - warning ("Analysis '$ln' doesn't exist in reference file(s) '" - . join( "','", @files ) - . "'! It needs to be added first") - unless (exists $reference{$ln}); - warning "[$dbname] No description was found for logic_name '$ln':\n". - "\tref:\t display_label='".$reference{$ln}{display_label}."'; displayable=".$reference{$ln}{displayable}."; nr=".$reference{$ln}{nr}."\n". - "\tdb: \t display_label='".$hash{$ln}->display_label."'; displayable=".$hash{$ln}->displayable."; dbID=".$hash{$ln}->dbID."\n"; - - } + if ( scalar(keys %hash)==0) { + unless ($noupdate) { + print STDERR "\nAll analysis descriptions have been updated, every analysis has a description now\n"; + } else { + print STDERR "\nEvery analysis has a description in the file, all analysis descriptions can be updated.\n". + "To write analysis descriptions to the analysis_description table in your DB,\n". + "please run this script excluding the -noupdate option on the commandline\n"; } + } + else { + foreach my $ln (keys %hash) { + unless (exists $reference{$ln}) { + warning ("[$dbname]: Analysis '$ln' doesn't exist in reference file(s) '" + . join( "','", @files ) + . "'! It needs to be added first"); + } + } + } } sub usage{