From cc95264d0f713e30b5c81a3bdb97b16fe3529b49 Mon Sep 17 00:00:00 2001 From: Glenn Proctor <gp1@sanger.ac.uk> Date: Fri, 16 Nov 2007 09:24:43 +0000 Subject: [PATCH] Added lots of sanity checking to track down formatting errors. --- .../external_db/update_external_dbs.pl | 65 ++++++++++++++----- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/misc-scripts/external_db/update_external_dbs.pl b/misc-scripts/external_db/update_external_dbs.pl index d05685208a..e06b0b9096 100644 --- a/misc-scripts/external_db/update_external_dbs.pl +++ b/misc-scripts/external_db/update_external_dbs.pl @@ -53,6 +53,9 @@ if($release_num) { @dbnames = grep {/^[a-zA-Z]+\_[a-zA-Z]+\_(core|est|estgene|vega|otherfeatures|cdna)\_${release_num}\_\d+[A-Za-z]?$/} @dbnames; } +my @field_names = qw(external_db_id db_name release status dbprimary_acc_linkable display_label_linkable priority db_display_name type); + +my @types = qw(ARRAY ALT_TRANS MISC LIT PRIMARY_DB_SYNONYM); # # make sure the user wishes to continue @@ -77,8 +80,9 @@ if ($input ne 'yes') { my $fh = IO::File->new(); $fh->open($file) or die("Could not open input file $file"); my @rows; -my $row; -while ($row = <$fh>) { +my %bad_lines; + +while (my $row = <$fh>) { chomp($row); next if ($row =~ /^#/); # skip comments next if ($row =~ /^$/); # and blank lines @@ -104,16 +108,45 @@ while ($row = <$fh>) { exit(1); } - if ( $a[1] =~ /^$/ || $a[1] =~ /^\s+$/ || $a[1] =~ /^\d+$/ ) { - print STDERR "Cannot parse the following line:\n" - . $row - . "\nIt probably has spaces separating the fields " - . "rather than tabs.\n"; - exit(1); + # do some formatting checks + my $blank; + for (my $i=0; $i < scalar(@a); $i++) { + if ($a[$i] eq '') { + $bad_lines{$row} = $field_names[$i] . " - field blank - check all tabs/spaces in line"; + } + } + + if ($a[1] =~ /\s/) { + $bad_lines{$row} = "db_name field appears to contain spaces"; } + if ($a[1] =~ /^$/) { + $bad_lines{$row} = "db_name field appears to be missing"; + } + if ($a[1] =~ /^\s+$/) { + $bad_lines{$row} = "db_name field appears to be blank"; + } + if ($a[1] =~ /^\d+$/) { + $bad_lines{$row} = "db_name field appears to be numeric - check formatting"; + } + + my $type_ok; + foreach my $type (@types) { + $type_ok = 1 if ($a[8] eq $type); + } + $bad_lines{$row} = "type field is " . $a[8] . ", not one of the recognised types" if (!$type_ok); + } $fh->close(); +if (%bad_lines) { + print STDERR "Cannot parse the following line(s) from $file; check that all fields are present and are separated by one tab (not spaces). \n"; + print STDERR "Name of problem field, and the error is printed in brackets first\n\n"; + foreach my $row (keys %bad_lines) { + print STDERR "[". $bad_lines{$row} . "]" . " $row\n"; + } + exit(1); +} + # Load into master database if(!$nonreleasemode){ load_database($db, $master, @rows); @@ -121,30 +154,28 @@ $fh->close(); # Check each other database in turn # Load if no extra rows in db that aren't in master # Warn and skip if there are - + foreach my $dbname (@dbnames) { - + print STDERR "Looking at $dbname ... \n"; if ($force || $nonreleasemode) { - + print STDERR "Forcing overwrite of external_db table in " . "$dbname from $file\n"; load_database( $db, $dbname, @rows ); - + } elsif (compare_external_db($db, $master, $dbname)) { - + print STDERR "$dbname has no additional rows. " . "Overwriting external_db table from $file\n"; load_database( $db, $dbname, @rows ); - + } else { - + print STDERR "$dbname has extra rows " . "that are not in $file, skipping\n"; } - - } -- GitLab