From d269e43829c39b7db6211a216c60273c1f3eb93e Mon Sep 17 00:00:00 2001
From: Ian Longden <ianl@sanger.ac.uk>
Date: Wed, 12 Nov 2008 17:11:01 +0000
Subject: [PATCH] config changes

---
 misc-scripts/xref_mapping/xref_config.ini | 143 +++++++++++++++++-----
 1 file changed, 111 insertions(+), 32 deletions(-)

diff --git a/misc-scripts/xref_mapping/xref_config.ini b/misc-scripts/xref_mapping/xref_config.ini
index 15fec03be5..1228b9068c 100644
--- a/misc-scripts/xref_mapping/xref_config.ini
+++ b/misc-scripts/xref_mapping/xref_config.ini
@@ -245,7 +245,7 @@ priority        = 1
 prio_descr      =
 parser          = CCDSParser
 release_uri     =
-data_uri        = script:host=>genebuild4,dbname=>jb16_human_patch_52,
+data_uri        = script:host=>ens-research,dbname=>ianl_human_ccds_oct08,tran_name=>ENST,
 
 [source CCDS::mus_musculus]
 # Used by mus_musculus
@@ -256,7 +256,7 @@ priority        = 1
 prio_descr      =
 parser          = CCDSParser
 release_uri     =
-data_uri        = script:host=>genebuildx,dbname=>xxx,
+data_uri        = script:host=>ens-research,dbname=>ianl_mouse_ccds_oct08,tran_name=>ENSMUST,
 
 [source Celera_Pep::anopheles_gambiae]
 # Used by anopheles_gambiae
@@ -558,6 +558,7 @@ prio_descr      = main
 parser          = GOParser
 release_uri     = http://www.ebi.ac.uk/GOA/uniprot_release.html
 data_uri        = ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/gene_association.goa_uniprot.gz
+data_uri        = http://archive.geneontology.org/lastest-termdb/go_daily-termdb.obo-xml.gz
 
 [source InterproGO::MULTI]
 # Used by all
@@ -591,6 +592,7 @@ prio_descr      = main
 parser          = GOParser
 release_uri     =
 data_uri        = ftp://ftp.geneontology.org/pub/go/gene-associations/gene_association.zfin.gz
+data_uri        = http://archive.geneontology.org/lastest-termdb/go_daily-termdb.obo-xml.gz
 
 [source GO::homo_sapiens]
 # Used by homo_sapiens
@@ -602,6 +604,7 @@ prio_descr      = main
 parser          = GOParser
 release_uri     = http://www.ebi.ac.uk/GOA/HUMAN_release.html
 data_uri        = ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/HUMAN/gene_association.goa_human.gz
+data_uri        = http://archive.geneontology.org/lastest-termdb/go_daily-termdb.obo-xml.gz
 
 [source GO::mus_musculus]
 # Used by mus_musculus
@@ -613,6 +616,7 @@ prio_descr      = main
 parser          = GOParser
 release_uri     = http://www.ebi.ac.uk/GOA/MOUSE_release.html
 data_uri        = ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/MOUSE/gene_association.goa_mouse.gz
+data_uri        = http://archive.geneontology.org/lastest-termdb/go_daily-termdb.obo-xml.gz
 
 [source GO::mus_musculus#01]
  # Used by mus_musculus
@@ -624,6 +628,7 @@ data_uri        = ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/MOUSE/gene_associatio
  parser          = GOParser
  release_uri     =
  data_uri        = http://www.geneontology.org/cgi-bin/downloadGOGA.pl/gene_association.mgi.gz
+ data_uri        = http://archive.geneontology.org/lastest-termdb/go_daily-termdb.obo-xml.gz
 
 [source GO::rattus_norvegicus]
 # Used by rattus_norvegicus
@@ -635,6 +640,7 @@ prio_descr      = main
 parser          = GOParser
 release_uri     = http://www.ebi.ac.uk/GOA/RAT_release.html
 data_uri        = ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/RAT/gene_association.goa_rat.gz
+data_uri        = http://archive.geneontology.org/lastest-termdb/go_daily-termdb.obo-xml.gz
 
 
 [source HPA::homo_sapiens]
@@ -654,11 +660,11 @@ name            = HGNC
 download        = Y
 order           = 30
 #order 51
-priority        = 2
+priority        = 1
 prio_descr      = ccds
 parser          = HGNC_CCDSParser
 release_uri     =
-data_uri        = script:wget=>http://www.genenames.org/cgi-bin/hgnc_downloads.cgi?title=Genew+output+data&col=gd_hgnc_id&col=gd_pub_refseq_ids&status=Approved&status=Approved+Non-Human&status_opt=3&=on&where=&order_by=gd_hgnc_id&limit=&format=text&submit=submit&.cgifields=&.cgifields=status&.cgifields=chr,dbname=>steve_cdstrack_08may08,host=>genebuild7,
+data_uri        = script:wget=>http://www.genenames.org/cgi-bin/hgnc_downloads.cgi?title=HGNC+output+data&hgnc_dbtag=on&col=gd_hgnc_id&col=gd_status&col=gd_ccds_ids&status=Approved&status_opt=2&level=pri&=on&where=&order_by=gd_app_sym_sort&limit=&format=text&submit=submit&.cgifields=&.cgifields=level&.cgifields=chr&.cgifields=status&.cgifields=hgnc_dbtag,dbname=>ianl_human_ccds_oct08,host=>ens-research,
 
 
 [source HGNC::homo_sapiens#07]
@@ -666,11 +672,11 @@ data_uri        = script:wget=>http://www.genenames.org/cgi-bin/hgnc_downloads.c
 name            = HGNC
 download        = N
 order           = 30
-priority        = 1
+priority        = 2
 prio_descr      = ensembl_mapped
 parser          = HGNCParser
 release_uri     =
-data_uri        = file:HGNC/HGNC_TO_ENSG
+data_uri        = 
 
 [source HGNC::homo_sapiens]
 # Used by homo_sapiens
@@ -681,7 +687,7 @@ priority        = 3
 prio_descr      = havana
 parser          = HGNC_ENSTParser
 release_uri     =
-data_uri        = script:host=>ens-staging,port=>3306,dbname=>homo_sapiens_vega_51_36m,
+data_uri        = script:host=>ens-staging,port=>3306,dbname=>homo_sapiens_vega_52_36n,
 
 
 
@@ -694,7 +700,7 @@ priority        = 4
 prio_descr      = entrezgene_manual
 parser          = HGNCParser
 release_uri     =
-data_uri        = http://www.genenames.org/cgi-bin/hgnc_downloads.cgi?title=Genew+output+data&col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=gd_pub_eg_id&col=gd_pub_refseq_ids&col=md_eg_id&col=md_refseq_id&col=md_ensembl_id&status=Approved&status=Approved+Non-Human&status_opt=3&=on&where=&order_by=gd_hgnc_id&limit=&format=text&submit=submit&.cgifields=&.cgifields=status&.cgifields=chr
+data_uri        = http://www.genenames.org/cgi-bin/hgnc_downloads.cgi?title=Genew+output+data&col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=gd_pub_eg_id&col=gd_pub_refseq_ids&col=md_eg_id&col=md_refseq_id&col=gd_pub_ensembl_id&status=Approved&status=Approved+Non-Human&status_opt=3&=on&where=&order_by=gd_hgnc_id&limit=&format=text&submit=submit&.cgifields=&.cgifields=status&.cgifields=chr
 
 
 [source HGNC::homo_sapiens#03]
@@ -943,38 +949,81 @@ parser          = MIMParser
 release_uri     =
 data_uri        = ftp://ftp.ncbi.nih.gov/repository/OMIM/omim.txt.Z
 
-[source MGI::mus_musculus]
+[source MGI::mus_musculus#01]
 # Used by mus_musculus
 name            = MGI
 download        = N
 order           = 30
-priority        = 1
-prio_descr      =
-parser          = MGDParser
+priority        = 3
+prio_descr      = uniprot
+parser          = 
 release_uri     =
 data_uri        = taken from uniprot files
 
+[source MGI::mus_musculus#02]
+# Used by mus_musculus
+name            = MGI
+download        = Y
+order           = 45
+priority        = 2
+prio_descr      = vega
+parser          = MGI_Vega_Parser
+release_uri     =
+data_uri        = script:vhost=>ens-staging,vport=>3306,vdbname=>mus_musculus_vega_52_37e,cdbname=>mus_musculus_core_52_37e,chost=>ens-staging,
+
+[source MGI::mus_musculus#03]
+# Used by mus_musculus
+name            = MGI
+download        = Y
+order           = 1
+priority        = 10
+prio_descr      = descriptions
+parser          = MGI_Desc_Parser
+release_uri     =
+data_uri        = ftp://ftp.informatics.jax.org/pub/reports/MRK_List2.sql.rpt
+data_uri        = ftp://ftp.informatics.jax.org/pub/reports/MRK_Synonym.sql.rpt
+
+[source MGI::mus_musculus#04]
+# Used by mus_musculus
+name            = MGI
+download        = Y
+order           = 35
+priority        = 1
+prio_descr      = ccds
+parser          = MGI_CCDS_Parser
+release_uri     =
+data_uri        = script:wget=>ftp://ftp.ncbi.nlm.nih.gov/pub/CCDS/current_mouse/CCDS.20071128.txt,
+
+
+
 [source OTTT::homo_sapiens]
+#
+#  NOW taken when merging done with havana
+#
 # Used by homo_sapiens
 name            = OTTT
-download        = Y
+download        = N
 order           = 50
 priority        = 1
 prio_descr      =
 parser          = OTTTParser
 release_uri     =
-data_uri        = script:host=>ens-staging,port=>3306,dbname=>homo_sapiens_vega_51_36m,
+data_uri        =
+#data_uri        = script:host=>ens-staging,port=>3306,dbname=>homo_sapiens_vega_52_36n,
 
 [source OTTT::mus_musculus]
+#
+# NOW taken when merging done with havana
+#
 # Used by mus_musculus
 name            = OTTT
-download        = Y
+download        = N
 order           = 50
 priority        = 1
 prio_descr      =
 parser          = OTTTParser
 release_uri     =
-data_uri        = script:host=>ensdb-1-11,port=>5317,dbname=>mus_musculus_vega_51_37d,
+data_uri        = script:host=>ens-staging,port=>5317,dbname=>mus_musculus_vega_52_37e,
 
 [source RGD::rattus_norvegicus]
 # Used by rattus_norvegicus
@@ -1142,7 +1191,7 @@ parser          = RefSeqParser
 release_uri     = ftp://ftp.ncbi.nih.gov/refseq/release/release-notes/RefSeq-release*.txt
 data_uri        = ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.rna.fna.gz
 
-[source RefSeq_dna::mus_musculus]
+[source RefSeq_dna::mus_musculus#01]
 # Used by mus_musculus
 name            = RefSeq_dna
 download        = Y
@@ -1153,6 +1202,18 @@ parser          = RefSeqParser
 release_uri     = ftp://ftp.ncbi.nih.gov/refseq/release/release-notes/RefSeq-release*.txt
 data_uri        = ftp://ftp.ncbi.nih.gov/refseq/M_musculus/mRNA_Prot/mouse.rna.fna.gz
 
+[source RefSeq_dna::mus_musculus]
+# Used by mus_musculus
+name            = RefSeq_dna
+download        = Y
+order           = 25
+priority        = 1
+prio_descr      = ccds
+parser          = RefSeq_CCDSParser
+release_uri     =
+data_uri        = script:host=>genebuild7,dbname=>steve_cdstrack_08may08,
+
+
 [source RefSeq_dna::pan_troglodytes]
 # Used by pan_troglodytes
 name            = RefSeq_dna
@@ -1753,7 +1814,17 @@ priority        = 1
 prio_descr      =
 parser          = HGNC_curated_transcriptParser
 release_uri     = 
-data_uri        = script:host=>ens-staging,port=>3306,dbname=>homo_sapiens_vega_51_36m,
+data_uri        = script:host=>ens-staging,port=>3306,dbname=>homo_sapiens_vega_52_36n,
+
+[source MGI_curated_transcript::mus_musculus]
+name            = MGI_curated_transcript
+download        = Y
+order           = 49
+priority        = 1
+prio_descr      =
+parser          = MGI_curated_transcriptParser
+release_uri     = 
+data_uri        = script:cdbname=>mus_musculus_core_52_37e,chost=>ens-staging,vhost=>ens-staging,vdbname=>mus_musculus_vega_52_37e,
 
 
 [source Clone_based_vega_transcript::homo_sapiens]
@@ -1767,16 +1838,19 @@ release_uri     =
 data_uri        = loaded as part of HGNC_curated_transcript
 
 
-[source Vega_mouse_transcript::mus_musculus]
+#[source Vega_mouse_transcript::mus_musculus]
 # Used by mus_musculus
-name            = Vega_mouse_transcript
-download        = Y
-order           = 50
-priority        = 1
-prio_descr      =
-parser          = VegaParser
-release_uri     =
-data_uri        = ftp://ftp.sanger.ac.uk/pub/vega/mouse/cdna/Mus_musculus.VEGA.*.cdna.tot.fa.gz
+#
+# Vega_transcript now used and obtained during merge
+#
+#name            = Vega_mouse_transcript
+#download        = Y
+#order           = 50
+#priority        = 1
+#prio_descr      =
+#parser          = VegaParser
+#release_uri     =
+#data_uri        = ftp://ftp.sanger.ac.uk/pub/vega/mouse/cdna/Mus_musculus.VEGA.*.cdna.tot.fa.gz
 
 [source Xenopus_Jamboree::xenopus_tropicalis]
 # Used by xenopus_tropicalis
@@ -2596,9 +2670,9 @@ source          = Illumina_V1::homo_sapiens
 source          = Illumina_V2::homo_sapiens
 source          = Interpro::MULTI
 source          = MIM::homo_sapiens
-source          = OTTT::homo_sapiens
+#source          = OTTT::homo_sapiens  # got from merge
 source          = RefSeq_dna::homo_sapiens
-source          = RefSeq_dna::homo_sapiens#01
+#source          = RefSeq_dna::homo_sapiens#01
 source          = RefSeq_peptide::homo_sapiens
 source          = UniGene::homo_sapiens
 source          = Uniprot/SPTREMBL::MULTI
@@ -2660,15 +2734,20 @@ source          = IMGT/GENE_DB::mus_musculus
 source          = IPI::mus_musculus
 source          = Illumina_V1::mus_musculus
 source          = Illumina_V2::mus_musculus
+source          = MGI::mus_musculus#02
+source          = MGI::mus_musculus#03
+source          = MGI::mus_musculus#04
+source          = MGI_curated_transcript::mus_musculus
 source          = Interpro::MULTI
-source          = OTTT::mus_musculus
+#source          = OTTT::mus_musculus   # data obtained form merge
 source          = RefSeq_dna::mus_musculus
+#source          = RefSeq_dna::mus_musculus#01
 source          = RefSeq_peptide::mus_musculus
 source          = UniGene::mus_musculus
 source          = Uniprot/SPTREMBL::MULTI
 source          = Uniprot/SWISSPROT::MULTI
-source          = Uniprot/Varsplic::MULTI
-source          = Vega_mouse_transcript::mus_musculus
+#source          = Uniprot/Varsplic::MULTI
+#source          = Vega_mouse_transcript::mus_musculus # removed use Vega_transcript
 source          = ncRNA::mus_musculus
 source          = UCSC::mus_musculus
 
-- 
GitLab