diff --git a/misc-scripts/probe_mapping/compare_mappings.pl b/misc-scripts/probe_mapping/compare_mappings.pl index 14d222a8baaad4135e92472f9b6e80b9372e6ff7..3acf66e043bf49120d8e79765142488161a31b21 100644 --- a/misc-scripts/probe_mapping/compare_mappings.pl +++ b/misc-scripts/probe_mapping/compare_mappings.pl @@ -6,7 +6,7 @@ use Bio::EnsEMBL::DBEntry; use Bio::EnsEMBL::DBSQL::DBAdaptor; use Bio::EnsEMBL::Mapper::RangeRegistry; -my ($s_host, $s_port, $s_user, $s_pass, $s_dbname, $t_host, $t_port, $t_user, $t_pass, $t_dbname, ); +my ($s_host, $s_port, $s_user, $s_pass, $s_dbname, $t_host, $t_port, $t_user, $t_pass, $t_dbname, $array); GetOptions('source_host=s' => \$s_host, 'source_user=s' => \$s_user, @@ -18,11 +18,16 @@ GetOptions('source_host=s' => \$s_host, 'target_port=i' => \$t_port, 'target_pass=s' => \$t_pass, 'target_dbname=s' => \$t_dbname, + 'array=s' => \$array, 'help' => sub { usage(); exit(0); }); $s_port ||= 3306; $t_port ||= 3306; -usage() if(!$s_user || !$s_dbname || !$s_host || !$t_user || !$t_dbname || !$t_host); +usage() if(!$s_user || !$s_dbname || !$s_host || !$t_dbname); + +$t_host = $s_host if (!$t_host); +$t_port = $s_port if (!$t_port); +$t_user = $s_user if (!$t_user); my $s_db = new Bio::EnsEMBL::DBSQL::DBAdaptor('-host' => $s_host, '-port' => $s_port, @@ -40,13 +45,17 @@ my $t_db = new Bio::EnsEMBL::DBSQL::DBAdaptor('-host' => $t_host, # TODO fix this when external db is sorted out my $restrict_sql = "x.external_db_id > 3000 AND x.external_db_id < 3210"; +$restrict_sql = " e.db_name='$array'" if ($array); run(); sub run { + print "Using only array $array\n" if ($array); + # compare total counts first - my $count_sql = "SELECT COUNT(*) FROM xref x, object_xref ox WHERE x.xref_id=ox.xref_id AND ox.ensembl_object_type='Transcript' AND $restrict_sql"; + my $count_sql = "SELECT COUNT(*) FROM xref x, object_xref ox, external_db e WHERE e.external_db_id=x.external_db_id AND x.xref_id=ox.xref_id AND ox.ensembl_object_type='Transcript' AND $restrict_sql"; + my $s_sth = $s_db->dbc()->prepare($count_sql); $s_sth->execute(); my $count = ($s_sth->fetchrow_array())[0]; @@ -55,7 +64,7 @@ sub run { my $t_sth = $t_db->dbc()->prepare($count_sql); $t_sth->execute(); $count = ($t_sth->fetchrow_array())[0]; - print "Total oligo_xrefs in $t_dbname: $count\n"; + print "Total oligo xrefs in $t_dbname: $count\n"; # cache all mappings from each database # key format: transcript_stable_id:array:probe @@ -103,7 +112,7 @@ sub run { sub cache_mappings { - my ($db) = @_; + my ($db, $print) = @_; my %mappings; @@ -112,10 +121,12 @@ sub cache_mappings { $sth->execute(); while (my @row = $sth->fetchrow_array()) { - #my $key = $row[0] . ":" . $row[1]. ":" . $row[2]; # TODO - add array back in - my $key = $row[0] . ":" . $row[2]; + my $key = $row[0] . "\t" . $row[1]. "\t" . $row[2]; + #my $key = $row[0] . "\t" . $row[2]; $mappings{$key} = $key; + print "$key\n" if ($print); + } $sth->finish(); @@ -151,10 +162,11 @@ sub usage { NEW XREFS: --target_host The database server to read the second set of xrefs from. + Defaults to source_host. - [--target_port] The port to use.. Defaults to 3306. + [--target_port] The port to use. Defaults to 3306. - --target_user Database username. + --target_user Database username. Defaults to source_user. --target_pass Password for target_user, if required. @@ -162,6 +174,8 @@ sub usage { MISCELLANEOUS: + [--array] Just compare results from this array. + [--help] This text.