From a8b928a9a8b353f0f23c2d795cf0e19a0d05df28 Mon Sep 17 00:00:00 2001
From: Glenn Proctor <gp1@sanger.ac.uk>
Date: Mon, 31 Jul 2006 10:52:42 +0000
Subject: [PATCH] Change statistic calculation SQL to make it more comparable
 with previous results. Change in UNIQUE index on xref results in many more GO
 terms (apparently) being projected, but this is because the same GO term
 projected from multiple sources now gets multiple rows in the xref table,
 whereas before, because info_type/info_text wasn't being considered, only one
 row would have been created.

---
 misc-scripts/xref_projection/project_display_xrefs.pl | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/misc-scripts/xref_projection/project_display_xrefs.pl b/misc-scripts/xref_projection/project_display_xrefs.pl
index 4316a4a594..ed7dfddae1 100644
--- a/misc-scripts/xref_projection/project_display_xrefs.pl
+++ b/misc-scripts/xref_projection/project_display_xrefs.pl
@@ -265,8 +265,7 @@ sub project_go_terms {
       #print $dbEntry->display_id() . " " . $et . " " . $projections_by_evidence_type{$et} . "\n";
     }
 
-    # Change linkage_type to IEA (in the absence of a specific one for projections)
-    $dbEntry->flush_linkage_types();
+    # add linkage_type for projection to IEA (in the absence of a specific one for projections)
     $dbEntry->add_linkage_type("IEA");
 
     my $txt = "from $from_latin_species translation " . $from_translation->stable_id();
@@ -292,6 +291,8 @@ sub go_xref_exists {
 
   foreach my $xref (@{$to_go_xrefs}) {
 
+    next if (ref($dbEntry) ne "Bio::EnsEMBL::GoXref" || ref($xref) ne "Bio::EnsEMBL::GoXref");
+
     if ($xref->dbname() eq $dbEntry->dbname() &&
 	$xref->primary_id() eq $dbEntry->primary_id() &&
 	join("", @{$xref->get_all_linkage_types()}) eq join("", @{$dbEntry->get_all_linkage_types()})) {
@@ -333,10 +334,10 @@ sub print_stats {
   if ($go_terms) {
 
     print "GO xrefs: total ";
-    print &count_rows($to_ga, "SELECT COUNT(*) FROM xref x, external_db e WHERE e.external_db_id=x.external_db_id AND e.db_name='GO'");
+    print &count_rows($to_ga, "SELECT COUNT(DISTICT(x.dbprimary_acc)) FROM xref x, external_db e WHERE e.external_db_id=x.external_db_id AND e.db_name='GO'");
 
     print " projected ";
-    print &count_rows($to_ga, "SELECT COUNT(*) FROM xref x, external_db e WHERE e.external_db_id=x.external_db_id AND e.db_name='GO' AND x.info_type='PROJECTION'");
+    print &count_rows($to_ga, "SELECT COUNT(DISTICT(x.dbprimary_acc)) FROM xref x, external_db e WHERE e.external_db_id=x.external_db_id AND e.db_name='GO' AND x.info_type='PROJECTION'");
 
     print "\n";
 
@@ -355,7 +356,7 @@ sub count_rows {
   $sth->execute();
 
   return ($sth->fetchrow_array())[0];
-
+ 
 }
 
 # ----------------------------------------------------------------------
-- 
GitLab