From 606bfd93698c726a8b4d2db0c0b1ce2040b897fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kusalananda=20K=C3=A4h=C3=A4ri?= <ak4@sanger.ac.uk> Date: Wed, 22 Jul 2009 15:24:35 +0000 Subject: [PATCH] Script to add auxiliary mapping tables. The script adds one table by subset previously added to the database and maps all terms to the closest term within that subset. The tables are named "aux_XX_YY_map" where "XX" is the ontology ("GO" or "SO"), and "YY" is the subset name (e.g., "goslim_goa"). The schema for each of the subset map tables is simply as follows: +----------------+------------------+------+-----+---------+-------+ | Field | Type | Null | Key | Default | Extra | +----------------+------------------+------+-----+---------+-------+ | term_id | int(10) unsigned | NO | PRI | NULL | | | subset_term_id | int(10) unsigned | NO | PRI | NULL | | +----------------+------------------+------+-----+---------+-------+ ... where both fields refers into the 'term' table. --- .../ontology/scripts/add_subset_maps.pl | 186 ++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100755 misc-scripts/ontology/scripts/add_subset_maps.pl diff --git a/misc-scripts/ontology/scripts/add_subset_maps.pl b/misc-scripts/ontology/scripts/add_subset_maps.pl new file mode 100755 index 0000000000..029b18018b --- /dev/null +++ b/misc-scripts/ontology/scripts/add_subset_maps.pl @@ -0,0 +1,186 @@ +#!/usr/bin/perl -w + +use strict; +use warnings; + +use DBI qw( :sql_types ); +use Getopt::Long qw( :config no_ignore_case ); + +#----------------------------------------------------------------------- + +sub usage { + print("Usage:\n"); + printf( "\t%s\t-h dbhost [-P dbport] \\\n" + . "\t%s\t-u dbuser [-p dbpass] \\\n" + . "\t%2\$s\t-d dbname\n", + $0, ' ' x length($0) ); + print("\n"); + printf( "\t%s\t-?\n", $0 ); + print("\n"); + print("Arguments:\n"); + print("\t-h/--host dbhost\tDatabase server host name\n"); + print("\t-P/--port dbport\tDatabase server port (optional)\n"); + print("\t-u/--user dbuser\tDatabase user name\n"); + print("\t-p/--pass dbpass\tUser password (optional)\n"); + print("\t-d/--name dbname\tDatabase name\n"); + print("\t-?/--help\t\tDisplays this information\n"); +} + +#----------------------------------------------------------------------- + +my ( $dbhost, $dbport ); +my ( $dbuser, $dbpass ); +my $dbname; + +$dbport = '3306'; + +if ( + !GetOptions( + 'dbhost|host|h=s' => \$dbhost, + 'dbport|port|P=i' => \$dbport, + 'dbuser|user|u=s' => \$dbuser, + 'dbpass|pass|p=s' => \$dbpass, + 'dbname|name|d=s' => \$dbname, + 'help|?' => sub { usage(); exit } ) + || !defined($dbhost) + || !defined($dbuser) + || !defined($dbname) ) +{ + usage(); + exit; +} + +my $dsn = sprintf( "DBI:mysql:database=%s;host=%s;port=%s", + $dbname, $dbhost, $dbport ); + +my $dbh = + DBI->connect( $dsn, $dbuser, $dbpass, + { 'RaiseError' => 0, 'PrintError' => 0 } ); + +my %subsets; +{ + my $statement = q( +SELECT DISTINCT + ontology.name, + subset.name +FROM ontology, + term, + subset +WHERE ontology.ontology_id = term.ontology_id + AND FIND_IN_SET(subset.name, term.subsets) > 0 +); + + my $sth = $dbh->prepare($statement); + + $sth->execute(); + + my ( $ontology_name, $subset_name ); + + $sth->bind_columns( \( $ontology_name, $subset_name ) ); + + while ( $sth->fetch() ) { + push( @{ $subsets{$ontology_name} }, $subset_name ); + } + + $sth->finish(); +} + +{ + my $select_statement = q( +SELECT child_term.term_id, + parent_term.term_id, + closure.distance +FROM closure, + term child_term, + term parent_term, + ontology +WHERE closure.parent_term_id = parent_term.term_id + AND closure.child_term_id = child_term.term_id + AND FIND_IN_SET(?, parent_term.subsets) > 0 + AND parent_term.ontology_id = ontology.ontology_id + AND ontology.name = ? +ORDER BY child_term.accession, closure.distance; +); + + my $select_sth = $dbh->prepare($select_statement); + + foreach my $ontology_name ( keys(%subsets) ) { + foreach my $subset_name ( @{ $subsets{$ontology_name} } ) { + + my $aux_table_name = $dbh->quote_identifier( + sprintf( "aux_%s_%s_map", $ontology_name, $subset_name ) ); + + $dbh->do( + sprintf( + "CREATE TABLE %s ( " + . "term_id INT UNSIGNED NOT NULL, " + . "subset_term_id INT UNSIGNED NOT NULL, " + . "UNIQUE INDEX map_idx (term_id, subset_term_id) )", + $aux_table_name + ) ); + + if ( $dbh->err() ) { + printf( "MySQL error, \"%s\", skipping...\n", $dbh->errstr() ); + next; + } + + $select_sth->bind_param( 1, $subset_name, SQL_VARCHAR ); + $select_sth->bind_param( 2, $ontology_name, SQL_VARCHAR ); + + $select_sth->execute(); + + my ( $child_id, $parent_id, $distance ); + + $select_sth->bind_columns( + \( $child_id, $parent_id, $distance ) ); + + my $insert_statement = sprintf( + "INSERT IGNORE INTO %s " + . "(term_id, subset_term_id) " + . "VALUES (?, ?)", + $aux_table_name + ); + + $dbh->do( sprintf( "LOCK TABLE %s WRITE", $aux_table_name ) ); + + my $insert_sth = $dbh->prepare($insert_statement); + + printf( "%s...\n", $aux_table_name ); + + my $last_child_id; + my $the_distance; + + while ( $select_sth->fetch() ) { + if ( !defined($last_child_id) + || $child_id != $last_child_id ) + { + $last_child_id = $child_id; + $the_distance = $distance; + } + + if ( $child_id == $last_child_id + && $distance != $the_distance ) + { + next; + } + + if ( $child_id == $last_child_id + && $distance == $the_distance ) + { + $insert_sth->bind_param( 1, $child_id, SQL_INTEGER ); + $insert_sth->bind_param( 2, $parent_id, SQL_INTEGER ); + + $insert_sth->execute(); + } + } + + $select_sth->finish(); + + $dbh->do( sprintf( "OPTIMIZE TABLE %s", $aux_table_name ) ); + $dbh->do("UNLOCK TABLES"); + + } ## end foreach my $subset_name ( @...) + } ## end foreach my $ontology_name (...) +} + +# $Id$ -- GitLab