From 960fb72b761e4832573395b6bd62183b4cb33a3a Mon Sep 17 00:00:00 2001 From: Ian Longden <ianl@sanger.ac.uk> Date: Thu, 15 Sep 2011 09:29:05 +0000 Subject: [PATCH] Controls the overall process of the parsing. Makes calls for the creation of the database and then subsequent calls to fetch and parse the data files. --- .../xref_mapping/XrefParser/ProcessData.pm | 828 ++++++++++++++++++ 1 file changed, 828 insertions(+) create mode 100644 misc-scripts/xref_mapping/XrefParser/ProcessData.pm diff --git a/misc-scripts/xref_mapping/XrefParser/ProcessData.pm b/misc-scripts/xref_mapping/XrefParser/ProcessData.pm new file mode 100644 index 0000000000..a65664ea04 --- /dev/null +++ b/misc-scripts/xref_mapping/XrefParser/ProcessData.pm @@ -0,0 +1,828 @@ +package XrefParser::ProcessData; + +use strict; +use warnings; +use Carp; +use XrefParser::Database; + +use File::Basename; +use File::Spec::Functions; +use IO::File; +use Digest::MD5 qw(md5_hex); + +sub new +{ + my ($proto) = @_; + + my $class = ref $proto || $proto; + return bless {}, $class; +} + + +############################################################## +# Main routine (run). +# 1) creates xref database if new one needed +# 2) process the downloadable sources; +# a) fetch files if needed +# b) parse and store xrefs/object xrefs etc +# c) set checksum so that we know they have been processed +############################################################### +sub run { + my ($self, $ref_arg) = @_; + + my $base_dir = $ref_arg->{dl_path}; + my $release = $ref_arg->{release}; + my $verbose = $ref_arg->{verbose}; + my $unzip = $ref_arg->{unzip}; + my $stats = $ref_arg->{stats}; + my $cleanup = $ref_arg->{cleanup}; + my $rspecies = $ref_arg->{speciesr}; + my $sources = $ref_arg->{sourcesr}; + my $notsources = $ref_arg->{notsourcesr}; + + my $sql_dir = dirname($0); + + my $dbc = XrefParser::Database->new({ host => $ref_arg->{host}, + dbname => $ref_arg->{dbname}, + port => $ref_arg->{port}, + user => $ref_arg->{user}, + pass => $ref_arg->{pass}, + verbose => $ref_arg->{verbose} }); + + $self->database($dbc); + + if ($ref_arg->{create}) { + $dbc->create($sql_dir, + $ref_arg->{force}, + $ref_arg->{drop_db}); + } + + my $dbi = $dbc->dbi(); + $self->dbi($dbi); + + my $sth_c = $dbi->prepare("insert into process_status (status, date) values('xref_created',now())"); + $sth_c->execute; + + + # need to use name now and that download = 'Y' as well + + my $sql = (<<"DSS"); +SELECT s.name, s.source_id + FROM source s, dependent_source ds, source_url su + WHERE su.source_id = s.source_id AND + ds.dependent_name = s.name AND + ds.master_source_id = ? AND + su.species_id = ? AND + s.download = 'Y' AND + s.source_id != ? AND + su.checksum is null +DSS + my $dep_sth = $dbi->prepare($sql); + + # validate species names + my @species_ids = $self->validate_species($rspecies, $verbose); + + # validate source names + exit(1) if ( !$self->validate_sources(\@species_ids,$sources, $verbose) ); + exit(1) if ( !$self->validate_sources(\@species_ids,$notsources, $verbose) ); + + # build SQL + my $species_sql = ""; + if (@species_ids) { + $species_sql .= " AND su.species_id IN ("; + for ( my $i = 0 ; $i < @species_ids ; $i++ ) { + $species_sql .= "," if ( $i != 0 ); + $species_sql .= $species_ids[$i]; + } + $species_sql .= ") "; + } + + my $source_sql = ""; + if (defined $sources and @$sources) { + $source_sql .= " AND LOWER(s.name) IN ("; + for ( my $i = 0 ; $i < @$sources ; $i++ ) { + $source_sql .= "," if ( $i != 0 ); + $source_sql .= "\'" . lc( $$sources[$i] ) . "\'"; + } + $source_sql .= ") "; + } + + if (defined $notsources and @$notsources) { + $source_sql .= " AND LOWER(s.name) NOT IN ("; + for ( my $i = 0 ; $i < @$notsources ; $i++ ) { + $source_sql .= "," if ( $i != 0 ); + $source_sql .= "\'" . lc( $$notsources[$i] ) . "\'"; + } + $source_sql .= ") "; + } + my $sth = $dbi->prepare("insert into process_status (status, date) values('parsing_started',now())"); + $sth->execute; + + $sql = + "SELECT DISTINCT(s.source_id), su.source_url_id, s.name, su.url, " + . "su.release_url, su.checksum, su.parser, su.species_id " + . "FROM source s, source_url su, species sp " + . "WHERE s.download='Y' AND su.source_id=s.source_id " + . "AND su.species_id=sp.species_id " + . $source_sql + . $species_sql + . "ORDER BY s.ordered"; +# print {*STDERR} $sql . "\n"; + + $sth = $dbi->prepare($sql); + $sth->execute(); + + my ( $source_id, $source_url_id, $name, $url, $release_url, + $checksum, $parser, $species_id ); + + $sth->bind_columns( \$source_id, \$source_url_id, + \$name, \$url, + \$release_url, \$checksum, + \$parser, \$species_id ); + + my $dir; + my %summary = (); + + my %sum_xrefs; + my %sum_prim; + my %sum_dep; + my %sum_dir; + my %sum_coord; + my %sum_syn; + + # + # We may be continuing on form a previous run so find what xreefs we have already + # there so that when we calculate the number of new xrefs etc we start with the correct number + # + $self->get_stats({ xrefs => \%sum_xrefs, + primary => \%sum_prim, + depend => \%sum_dep, + direct => \%sum_dir, + coord => \%sum_coord, + synonym => \%sum_syn, + dbi => $dbi, + print => 0 }); + + + my $start_transaction_sth = $dbi->prepare("start transaction"); + + my $end_transaction_sth = $dbi->prepare("commit"); + + + while ( my @row = $sth->fetchrow_array() ) { + print '-' x 4, "{ $name }", '-' x ( 72 - length($name) ), "\n" if ($verbose); + + my $cs; + my $file_cs = ""; + my $parse = 0; + my $empty = 0; + my $type = $name; + my $dsn; + + my @files = split( /\s+/x, $url ); + my @files_to_parse = (); + + $dir = catdir( $base_dir, sanitise($type) ); + + # For summary purposes: If 0 is returned (in + # $summary{$name}->{$parser}) then it is successful. If 1 is + # returned then it failed. If undef/nothing is returned the we + # do not know. + $summary{$name}->{$parser} = 0; + + my $ff= XrefParser::FetchFiles->new(); + @files = $ff->fetch_files( {dest_dir => $dir, + user_uris => \@files, + del_down => $ref_arg->{deletedownloaded}, + chk_down => $ref_arg->{checkdownload}, + verbose => $verbose + }); + if ( !@files ) { + # Fetching failed. + ++$summary{$name}->{$parser}; + next; + } + if ( defined($release_url) ) { + my @rel=(); + push @rel , $release_url; + @rel = $ff->fetch_files( {dest_dir => $dir, + user_uris => \@rel, + del_down => $ref_arg->{deletedownloaded}, + chk_down => $ref_arg->{checkdownload}, + verbose => $verbose} + ); + $release_url = $rel[-1]; + } + $start_transaction_sth->execute(); + + foreach my $file (@files) { + + # check dependencies are loaded all ready + if(!($self->all_dependencies_loaded($source_id, $species_id, $name, $dep_sth))){ + ++$summary{$name}->{$parser}; + next; + } + # Database parsing + if ( $file =~ /^mysql:/ix ) { + $dsn = $file; + print "Parsing $dsn with $parser\n" if ($verbose); + my $eval_test = eval "require XrefParser::$parser"; + if($@ or $eval_test != 1) { + croak "Could not require XrefParser::$parser\ndollar=at=$@\neval_test = $eval_test\n"; + } + my $new = "XrefParser::$parser"->new($self->database, $verbose); + if ( + $new->run( $dsn, $source_id, $species_id, + $name, undef, $verbose ) ) + { + ++$summary{$name}->{$parser}; + } + next; + } + if ( $file =~ /^script:/ix ) { + if(!defined($checksum) || $checksum == 0){ + print "Parsing $file with $parser\n" if ($verbose); + my $eval_test = eval "require XrefParser::$parser"; + if($@ or $eval_test != 1) { + croak "Could not require XrefParser::$parser\ndollar=at=$@\neval_test = $eval_test\n"; + } + my $new = "XrefParser::$parser"->new($self->database, $verbose); + my $sqlu = + "UPDATE source_url SET checksum=1, upload_date=NOW() WHERE source_url_id=$source_url_id"; + + if ( + $new->run_script( {file => $file, + source_id => $source_id, + species_id => $species_id, + verbose => $verbose }) ) + { + ++$summary{$name}->{$parser}; + } + else{ + # set the checksum to 1 so that we know the script has been ran successfully + $dbi->prepare($sqlu)->execute() || croak( $dbi->errstr() ); + } + } + else{ + print "$file has already been ran with $parser and so will not be ran again\n" if($verbose); + } + next; + } + + + if ( $unzip && ( $file =~ /\. # anything + (gz|Z) # followed by gz or Z + $ # at the end + /x ) ) { + printf( "Uncompressing '%s' using 'gunzip'\n", $file ) if ($verbose); + system( "gunzip", "-f", $file ); + } + # remove the gz or Z at the end of the file name + if ($unzip) { $file =~ s/\.(gz|Z)$//x } + + # Compare checksums and parse/upload if necessary need to + # check file size as some .SPC files can be of zero length + + if ( !defined( $cs = md5sum($file) ) ) { + printf( "Download '%s'\n", $file ) if($verbose); + ++$summary{$name}->{$parser}; + } else { + $file_cs .= ':' . $cs; + if ( !defined $checksum + || index( $checksum, $file_cs ) == -1 ) + { + if ( -s $file ) { + $parse = 1; + print "Checksum for '$file' does not match, " + . "will parse...\n" if ($verbose); + + # Files from sources "Uniprot/SWISSPROT" and + # "Uniprot/SPTREMBL" are all parsed with the + # same parser + if ( $parser eq "Uniprot/SWISSPROT" + || $parser eq "Uniprot/SPTREMBL" ) + { + print STDERR "No idea why this is being done here??\n"; + print STDERR "parser was $parser now being set to UniProtParser\n"; + $parser = 'UniProtParser'; + } + } else { + $empty = 1; + printf( + "The file '%s' has zero length, skipping\n", + $file ) if ($verbose); + } + } + } ## end else [ if ( !defined( $cs = md5sum... + + # Push this file to the list of files to parsed. The files + # are *actually* parsed only if $parse == 1. + push @files_to_parse, $file; + + } ## end foreach my $file (@files) + + if ( $parse and @files_to_parse and defined $file_cs ) { + print "Parsing '" + . join( "', '", @files_to_parse ) + . "' with $parser\n" if ($verbose); + + eval "require XrefParser::$parser"; + $@ && carp( "[ERROR] Cannot require $parser: $@" ); + my $new = "XrefParser::$parser"->new($self->database, $verbose); + + if ( defined $release_url ) { + # Run with $release_url. + if ( + $new->run( { source_id => $source_id, + species_id => $species_id, + files => \@files_to_parse, + rel_file => $release_url, + verbose => $verbose } ) ) + { + ++$summary{$name}->{$parser}; + } + } else { + # Run without $release_url. + if ( + $new->run( { source_id => $source_id, + species_id => $species_id, + files => \@files_to_parse, + verbose => $verbose } )) + { + ++$summary{$name}->{$parser}; + } + } + + # update AFTER processing in case of crash. + $self->update_source( $source_url_id, + $file_cs, $files_to_parse[0] ); + + # Set release if specified + if ( defined $release ) { + $self->set_release( $source_id, $release ); + } + + } elsif ( !$dsn && !$empty && @files_to_parse ) { + print( "Ignoring '" + . join( "', '", @files_to_parse ) + . "' as checksums match\n" ) if ($verbose); + } + + if ($cleanup) { + foreach my $file (@files_to_parse) { + printf( "Deleting '%s'\n", $file ) if($verbose); + unlink($file); + } + } + + $end_transaction_sth->execute(); + if($stats){ + $self->get_stats({ xrefs => \%sum_xrefs, + primary => \%sum_prim, + depend => \%sum_dep, + direct => \%sum_dir, + coord => \%sum_coord, + synonym => \%sum_syn, + dbi => $dbi, + print => 1 }) + } + + } ## end while ( my @row = $sth->fetchrow_array... + + print "\n", '=' x 80, "\n"; + print "Summary of status\n"; + print '=' x 80, "\n"; + + + foreach my $source_name ( sort keys %summary ) { + foreach my $parser_name ( keys %{ $summary{$source_name} } ) { + printf( "%30s %-20s\t%s\n", + $source_name, + $parser_name, ( + defined $summary{$source_name}->{$parser_name} + && $summary{$source_name}->{$parser_name} + ? 'FAILED' + : 'OKAY' + ) ); + } + } + + if($stats){ + %sum_xrefs = (); # reset we now want total numbers + %sum_prim = (); + %sum_dep = (); + %sum_dir = (); + %sum_coord = (); + %sum_syn = (); + $self->get_stats({ xrefs => \%sum_xrefs, + primary => \%sum_prim, + depend => \%sum_dep, + direct => \%sum_dir, + coord => \%sum_coord, + synonym => \%sum_syn, + dbi => $dbi, + print => 1 }) + } + + $sth = $dbi->prepare("insert into process_status (status, date) values('parsing_finished',now())"); + $sth->execute; + + return 1; +} ## end sub run + + + +################################################################################# +# Print the statistics for numbers of xrefs, object xref etc that have been added +################################################################################# +sub get_stats { + my ($self, $ref_arg) = @_; + + my $sum_xrefs = $ref_arg->{xrefs}; + my $sum_prim = $ref_arg->{primary}; + my $sum_dep = $ref_arg->{depend}; + my $sum_dir = $ref_arg->{direct}; + my $sum_coord = $ref_arg->{coord}; + my $sum_syn = $ref_arg->{synonym}; + my $dbi = $ref_arg->{dbi}; + my $print = $ref_arg->{print}; + + # produce summary of what has been added + my %sum_line; + + # first the number of xrefs; + my $group_sql = "SELECT count(1), s.name from source s, xref x where s.source_id = x.source_id group by s.name"; + + my $sum_sth = $dbi->prepare($group_sql); + $sum_sth->execute(); + + my ($sum_count, $sum_name); + $sum_sth->bind_columns(\$sum_count, \$sum_name); + + while($sum_sth->fetch){ + if(defined($sum_xrefs->{$sum_name})){ + if($sum_count != $sum_xrefs->{$sum_name}){ + my $diff = ($sum_count - $sum_xrefs->{$sum_name}); + $sum_line{$sum_name} = [$diff, 0, 0, 0, 0, 0, 0, 0]; + } + } + else{ + $sum_line{$sum_name} = [$sum_count, 0, 0, 0, 0, 0, ,0 ,0]; + } + $sum_xrefs->{$sum_name} = $sum_count; + } + $sum_sth->finish; + + + # second the number of primary xrefs + $group_sql = "SELECT count(1), s.name from source s, primary_xref px, xref x where s.source_id = x.source_id and px.xref_id = x.xref_id group by s.name"; + + $sum_sth = $dbi->prepare($group_sql); + $sum_sth->execute(); + + $sum_sth->bind_columns(\$sum_count, \$sum_name); + + while($sum_sth->fetch){ + if ( defined($sum_prim->{$sum_name}) && ($sum_count != $sum_prim->{$sum_name}) ){ + my $diff = ($sum_count - $sum_prim->{$sum_name}); + $sum_line{$sum_name}[1] = $diff; + } + elsif(!defined($sum_prim->{$sum_name})){ + $sum_line{$sum_name}[1] = $sum_count; + } + $sum_prim->{$sum_name} = $sum_count; + } + $sum_sth->finish; + + + # third the number of dependent xrefs + $group_sql = "SELECT count(1), s.name from source s, dependent_xref dx, xref x where s.source_id = x.source_id and dx.dependent_xref_id = x.xref_id group by s.name"; + + $sum_sth = $dbi->prepare($group_sql); + $sum_sth->execute(); + + $sum_sth->bind_columns(\$sum_count, \$sum_name); + + while($sum_sth->fetch){ + if ( defined($sum_dep->{$sum_name}) && ($sum_count != $sum_dep->{$sum_name}) ){ + my $diff = ($sum_count - $sum_dep->{$sum_name}); + $sum_line{$sum_name}[2] = $diff; + } + elsif(!defined($sum_dep->{$sum_name})){ + $sum_line{$sum_name}[2] = $sum_count; + } + $sum_dep->{$sum_name} = $sum_count; + } + $sum_sth->finish; + + + + # fourth,fifth and sixth the number of direct xrefs + + my $type_count =0; + foreach my $type (qw (gene transcript translation)){ + + $group_sql = "SELECT count(1), s.name from source s, ".$type."_direct_xref dx, xref x where s.source_id = x.source_id and dx.general_xref_id = x.xref_id group by s.name"; + + $sum_sth = $dbi->prepare($group_sql); + $sum_sth->execute(); + + $sum_sth->bind_columns(\$sum_count, \$sum_name); + + while($sum_sth->fetch){ + $sum_name .= "_$type"; + if ( defined($sum_dir->{$sum_name}) && ($sum_count != $sum_dir->{$sum_name}) ){ + my $diff = ($sum_count - $sum_dir->{$sum_name}); + $sum_line{$sum_name}[3+$type_count] = $diff; + } + elsif(!defined($sum_dir->{$sum_name})){ + $sum_line{$sum_name}[3+$type_count] = $sum_count; + } + $sum_dir->{$sum_name} = $sum_count; + } + $sum_sth->finish; + $type_count++; + } + + # seventh the number of coordinate xrefs + $group_sql = "SELECT count(1), s.name from source s, coordinate_xref cx where s.source_id = cx.source_id group by s.name"; + + $sum_sth = $dbi->prepare($group_sql); + $sum_sth->execute(); + + $sum_sth->bind_columns(\$sum_count, \$sum_name); + + while($sum_sth->fetch){ + if ( defined($sum_coord->{$sum_name}) && ($sum_count != $sum_coord->{$sum_name}) ){ + my $diff = ($sum_count - $sum_coord->{$sum_name}); + $sum_line{$sum_name}[6] = $diff; + } + elsif(!defined($sum_coord->{$sum_name})){ + $sum_line{$sum_name}[6] = $sum_count; + } + $sum_coord->{$sum_name} = $sum_count; + } + + $sum_sth->finish; + + + # eigth the number of synonyms + $group_sql = "select count(1), s.name from source s, xref x, synonym o where s.source_id = x.source_id and x.xref_id = o.xref_id group by s.name"; + + $sum_sth = $dbi->prepare($group_sql); + $sum_sth->execute(); + + $sum_sth->bind_columns(\$sum_count, \$sum_name); + + while($sum_sth->fetch){ + if (defined($sum_syn->{$sum_name}) && ($sum_count != $sum_syn->{$sum_name}) ){ + my $diff = ($sum_count - $sum_syn->{$sum_name}); + $sum_line{$sum_name}[7] = $diff; + } + elsif(!defined($sum_syn->{$sum_name})) { + $sum_line{$sum_name}[7] = $sum_count; + } + $sum_syn->{$sum_name} = $sum_count; + } + $sum_sth->finish; + + if($print){ + ################### + # Print the header + ################### + my $max_name_length = 6; # (source) + foreach my $sum_name (keys %sum_line){ + if(length($sum_name) > $max_name_length){ + $max_name_length = length($sum_name); + } + } + + my $width = 8; + print "\nsource". " " x ($max_name_length - 3); #( 3 = length(source) - 3 spaces) + foreach my $val (qw(xrefs prim dep gdir tdir tdir coord synonyms)){ + print $val." " x ($width - length($val) ); + } + print "\n"; + + + ################### + # Print the numbers + ################### + $max_name_length += 3; # lets have 3 spaces after + foreach my $sum_name (keys %sum_line){ + $sum_name ||= 0; + print $sum_name. " " x ( $max_name_length - length($sum_name)); + foreach my $val (@{$sum_line{$sum_name}}){ + $val ||= 0; + print $val." " x ($width - length($val)); + } + print "\n"; + } + print "\n"; + } + return; +} + + +################################### +# Getter/Setter for database object +################################### +sub database { + my ($self, $arg) = @_; + + (defined $arg) && + ($self->{_database} = $arg ); + return $self->{_database}; +} + + +############################## +# Getter/Setter for dbi object +############################## +sub dbi { + my ($self, $arg) = @_; + + (defined $arg) && + ($self->{_dbi} = $arg ); + return $self->{_dbi}; +} + + +########################################################### +# Check if all the species name in a ref to list is valid +# If they all are; returns a list of species +# It not prints the values that are aloud and dies. +########################################################### +sub validate_species { + my ($self, $species, $verbose) = @_; + my @species_ids; + + my $dbi = $self->dbi(); + my $sth = $dbi->prepare("SELECT species_id, name FROM species WHERE LOWER(name)=? OR LOWER(aliases) REGEXP ?"); + my ($species_id, $species_name); + + foreach my $sp (@$species) { + + $sth->execute(lc($sp), "^".lc($sp).",|[ ]".lc($sp)."[,]|^".lc($sp)."\$|[,] ".lc($sp)."\$" ); + $sth->bind_columns(\$species_id, \$species_name); + if (my @row = $sth->fetchrow_array()) { + print "Species $sp is valid (name = " . $species_name . ", ID = " . $species_id . ")\n" if($verbose); + push @species_ids, $species_id; + } else { + print STDERR "Species $sp is not valid; valid species are:\n"; + show_valid_species(); + exit(1); + } + } + return @species_ids; +} + +############################################################ +# Return 1 if all dependent sources have already been loaded +# else return 0; +############################################################ +sub all_dependencies_loaded{ + my ($self, $source_id, $species_id, $s_name, $dep_sth) = @_; + my $okay = 1; + + $dep_sth->execute($source_id, $species_id, $source_id); + my ($id, $name); + $dep_sth->bind_columns(\$id, \$name); + while($dep_sth->fetch() ){ + print STDERR "dependent source $name ($id) not loaded so cannot process source $s_name\n"; + print "dependent source $name ($id) not loaded so cannot process source $s_name\n"; + $okay = 0; + } + return $okay; +} + + +######################################################################## +# Remove potentially problematic characters from string used as file or +# directory names. +######################################################################## +sub sanitise { + my $str = shift; + $str =~ tr[/:][]d; + return $str; +} + + +####################################################### +# Print to stanadrd error the list of species available +####################################################### +sub show_valid_species { + my ($self) =shift; + + my $dbi = $self->dbi(); + my $sth = $dbi->prepare("SELECT name, aliases FROM species"); + + $sth->execute(); + while (my @row = $sth->fetchrow_array()) { + print STDERR $row[0] . " (aliases: " . $row[1] . ")\n"; + } + return; +} + + +######################################################### +# Check if all the source names in a ref to list is valid +# If they all are; returns 1 +# It not prints the values that are aloud and returns 0. +######################################################### +sub validate_sources { + my ($self, $speciesref, $sources, $verbose) = @_; + + my $dbi = $self->dbi(); + my $sth = $dbi->prepare("SELECT * FROM source WHERE LOWER(name)=?"); + + foreach my $source (@$sources) { + + my $rv = $sth->execute(lc($source)); + if ( $rv > 0 ) { + print "Source $source is valid\n" if($verbose); + } else { + print "\nSource $source is not valid; valid sources are:\n"; + foreach my $sp (@{$speciesref}){ + show_valid_sources($sp); + } + return 0; + } + + } + + return 1; + +} + + +####################################################### +# Print to stanadrd error the list of sources available +####################################################### +sub show_valid_sources { + my ($self, $species) = @_; + + my $dbi = $self->dbi(); + my $sth = $dbi->prepare("SELECT distinct(name) FROM source s, source_url su WHERE s.download='Y' and s.source_id = su.source_id and su.species_id = $species"); + + $sth->execute(); + while (my @row = $sth->fetchrow_array()) { + print $row[0] . "\n"; + } + return; +} + + +#################################################################### +# Compute a checksum of a file. This checksum is not a straight MD5 +# hex digest, but instead the file size combined with the first six +# characters of the MD5 hex digest. This is to save space. +#################################################################### +sub md5sum +{ + my $file = shift; + + open my $FH, "<", $file or return; + binmode($FH); + + my $checksum = sprintf( "%s/%d", + substr( Digest::MD5->new()->addfile(*$FH)->hexdigest(), 0, 6 ), + [ stat $FH ]->[7] ); + + close($FH); + + return $checksum; +} + + +#################################### +# Set the checksum for a source file +#################################### +sub update_source { + my ($self, $source_url_id, $checksum, $file_name ) = @_; + + my $dbi = $self->dbi(); + my $file = IO::File->new($file_name) + or croak("Failed to open file '$file_name'"); + + my $file_date = + POSIX::strftime( '%Y%m%d%H%M%S', + localtime( [ $file->stat() ]->[9] ) ); + + $file->close(); + + my $sql = + "UPDATE source_url SET checksum='$checksum', " + . "file_modified_date='$file_date', " + . "upload_date=NOW() " + . "WHERE source_url_id=$source_url_id"; + + # The release is set by the individual parser by calling the + # inherited set_release() method. + + $dbi->prepare($sql)->execute() || croak( $dbi->errstr() ); + return; +} + + +1; -- GitLab