From b3616580f12c6a9c07058ff300d7ea2ef452ffa4 Mon Sep 17 00:00:00 2001
From: Monika Komorowska <mk8@sanger.ac.uk>
Date: Tue, 7 Aug 2012 09:41:15 +0000
Subject: [PATCH] updated transcript_display_xref_sources and added
 gene_display_xref_sources

---
 .../XrefMapper/ornithorhynchus_anatinus.pm    | 61 +++++++++++--------
 .../XrefMapper/rattus_norvegicus.pm           | 54 ++++++++++------
 2 files changed, 68 insertions(+), 47 deletions(-)

diff --git a/misc-scripts/xref_mapping/XrefMapper/ornithorhynchus_anatinus.pm b/misc-scripts/xref_mapping/XrefMapper/ornithorhynchus_anatinus.pm
index 8cc0d18cfc..dba785e7e7 100644
--- a/misc-scripts/xref_mapping/XrefMapper/ornithorhynchus_anatinus.pm
+++ b/misc-scripts/xref_mapping/XrefMapper/ornithorhynchus_anatinus.pm
@@ -6,31 +6,22 @@ use vars '@ISA';
 
 @ISA = qw{ XrefMapper::BasicMapper };
 
-# Same as in BasicMapper but Genoscope order reversed.
 
-sub transcript_display_xref_sources {
-  my @list = qw(Platypus_olfactory_receptor
-		Oxford_FGU_Oa_tscript
-		Oxford_FGU_Oa_gene
-		RFAM
-		miRBase
-		IMGT/GENE_DB
-		HUGO
-		SGD
-		MGI
-		flybase_symbol
-		Anopheles_symbol
-		Genoscope_annotated_gene
-		Uniprot/SWISSPROT
-		Uniprot/Varsplic
-		RefSeq_peptide
-		RefSeq_dna
-		Uniprot/SPTREMBL
-		EntrezGene);
+sub gene_display_xref_sources {
+
+ my $self     = shift;
+	
+  my @list = qw(Oxford_FGU_Oa_gene
+                RFAM
+                miRBase
+                Uniprot_genename
+                EntrezGene);
 
   my %ignore;
 
-  $ignore{"EntrezGene"} =(<<'IEG');
+  #don't use EntrezGene labels dependent on predicted RefSeqs
+
+$ignore{'EntrezGene'} =<<IEG;
 SELECT DISTINCT ox.object_xref_id
   FROM object_xref ox, dependent_xref dx, 
        xref xmas, xref xdep, 
@@ -42,15 +33,31 @@ SELECT DISTINCT ox.object_xref_id
           xdep.source_id = sdep.source_id AND
           smas.name like "Refseq%predicted" AND
           sdep.name like "EntrezGene" AND
-          ox.ox_status = "DUMP_OUT"
+          ox.ox_status = "DUMP_OUT" 	 
 IEG
 
-  $ignore{"Uniprot/SPTREMBL"} =(<<BIGN);
+  #don't use labels starting with LOC
+
+$ignore{'LOC_prefix'} =<<LOCP;
 SELECT object_xref_id
-    FROM object_xref JOIN xref USING(xref_id) JOIN source USING(source_id)
-     WHERE ox_status = 'DUMP_OUT' AND name = 'Uniprot/SPTREMBL' 
-      AND priority_description = 'protein_evidence_gt_2'
-BIGN
+  FROM object_xref JOIN xref USING(xref_id) JOIN source USING(source_id)
+   WHERE ox_status = 'DUMP_OUT' AND label REGEXP '^LOC[[:digit:]]+'
+LOCP
+
+  return [\@list,\%ignore];
+} 
+
+
+sub transcript_display_xref_sources {
+  my @list = qw(Platypus_olfactory_receptor
+		Oxford_FGU_Oa_tscript		
+		RFAM
+		miRBase
+		Uniprot/SWISSPROT
+		Uniprot/Varsplic
+		);
+
+  my %ignore;
 
   return [\@list,\%ignore];
 }
diff --git a/misc-scripts/xref_mapping/XrefMapper/rattus_norvegicus.pm b/misc-scripts/xref_mapping/XrefMapper/rattus_norvegicus.pm
index 68e0e32da5..0ab3ade136 100644
--- a/misc-scripts/xref_mapping/XrefMapper/rattus_norvegicus.pm
+++ b/misc-scripts/xref_mapping/XrefMapper/rattus_norvegicus.pm
@@ -7,25 +7,20 @@ use vars '@ISA';
 @ISA = qw{ XrefMapper::BasicMapper };
 
 
-sub transcript_display_xref_sources {
+sub gene_display_xref_sources {
   my $self     = shift;
-
+	
   my @list = qw(RFAM
-	      miRBase
-	      RGD
-	      MGI
-	      flybase_symbol
-	      Anopheles_symbol
-	      Genoscope_annotated_gene
-	      Uniprot/SWISSPROT
-	      Uniprot/Varsplic
-	      Uniprot/SPTREMBL
-	      EntrezGene);
-
+                miRBase
+                MGI
+                Uniprot_genename
+                EntrezGene);
 
   my %ignore;
 
-  $ignore{"EntrezGene"} =(<<'IEG');
+  #don't use EntrezGene labels dependent on predicted RefSeqs
+
+$ignore{'EntrezGene'} =<<IEG;
 SELECT DISTINCT ox.object_xref_id
   FROM object_xref ox, dependent_xref dx, 
        xref xmas, xref xdep, 
@@ -37,15 +32,34 @@ SELECT DISTINCT ox.object_xref_id
           xdep.source_id = sdep.source_id AND
           smas.name like "Refseq%predicted" AND
           sdep.name like "EntrezGene" AND
-          ox.ox_status = "DUMP_OUT"
+          ox.ox_status = "DUMP_OUT" 	 
 IEG
 
-  $ignore{"Uniprot/SPTREMBL"} =(<<BIGN);
+  #don't use labels starting with LOC
+
+$ignore{'LOC_prefix'} =<<LOCP;
 SELECT object_xref_id
-    FROM object_xref JOIN xref USING(xref_id) JOIN source USING(source_id)
-     WHERE ox_status = 'DUMP_OUT' AND name = 'Uniprot/SPTREMBL' 
-      AND priority_description = 'protein_evidence_gt_2'
-BIGN
+  FROM object_xref JOIN xref USING(xref_id) JOIN source USING(source_id)
+   WHERE ox_status = 'DUMP_OUT' AND label REGEXP '^LOC[[:digit:]]+'
+LOCP
+
+  return [\@list,\%ignore];
+
+}
+
+
+sub transcript_display_xref_sources {
+  my $self     = shift;
+
+  my @list = qw(RFAM
+	      miRBase
+	      RGD 
+	      Uniprot/SWISSPROT
+	      Uniprot/Varsplic
+);
+
+
+  my %ignore;
 
   return [\@list,\%ignore];
   
-- 
GitLab