diff --git a/misc-scripts/xref_mapping/xref_config.ini b/misc-scripts/xref_mapping/xref_config.ini index 6a5a22a18846b46a1fddefd37ca7423dc33beda7..46beb344c92fa7c6ef5490715ddeda158355880f 100644 --- a/misc-scripts/xref_mapping/xref_config.ini +++ b/misc-scripts/xref_mapping/xref_config.ini @@ -382,7 +382,7 @@ priority = 1 prio_descr = parser = CCDSParser release_uri = -data_uri = script:host=>ens-livemirror,dbname=>ccds_human,tran_name=>ENST, +data_uri = script:host=>genebuild7,dbname=>db8_human_cdsonly_22sept2010,tran_name=>ENST, [source CCDS::mus_musculus] # Used by mus_musculus @@ -393,7 +393,7 @@ priority = 1 prio_descr = parser = CCDSParser release_uri = -data_uri = script:host=>ens-livemirror,dbname=>ccds_mouse,tran_name=>ENSMUST, +data_uri = script:host=>genebuild7,dbname=>db8_mouse_cdsonly_22sept2010,tran_name=>ENSMUST, [source Celera_Pep::anopheles_gambiae] # Used by anopheles_gambiae @@ -559,28 +559,6 @@ release_uri = data_uri = comes via EntrezGene -#[source Ens_Ag_gene::drosophila_melanogaster] -# Used by drosophila_melanogaster -#name = Ens_Ag_gene -#download = N -#order = 50 -#priority = 1 -#prio_descr = Ensembl mosquito gene predictions -#parser = Flybase_dmel_GFFv3_Parser -#release_uri = -#data_uri = ftp://ftp.flybase.net/genomes/Drosophila_melanogaster/current/gff/dmel-all-*.gff.gz - -#[source Ens_Am_gene::drosophila_melanogaster] -# Used by drosophila_melanogaster -#name = Ens_Am_gene -#download = N -#order = 50 -#priority = 1 -#prio_descr = Ensembl bee gene predictions -#parser = Flybase_dmel_GFFv3_Parser -#release_uri = -#data_uri = ftp://ftp.flybase.net/genomes/Drosophila_melanogaster/current/gff/dmel-all-*.gff.gz - [source Fantom::mus_musculus] # Used by mus_muscullus name = Fantom @@ -977,7 +955,8 @@ priority = 1 prio_descr = main parser = GOSlimParser release_uri = -data_uri = script:host=>ens-staging1,dbname=>ensembl_ontology_61, +data_uri = script: +#data_uri = script:host=>ens-staging1,dbname=>ensembl_ontology_61, [source goslim_goa::EG] # Used by Ensembl Genomes @@ -1059,7 +1038,7 @@ priority = 1 prio_descr = Human Protein Atlas (HPA) database parser = HPAParser release_uri = -data_uri = http://www.proteinatlas.org/data/xref.php +data_uri = http://www.proteinatlas.org/download/xref.php #[source IMGT_HLA::homo_sapiens] # Used by homo_sapiens @@ -1072,6 +1051,16 @@ data_uri = http://www.proteinatlas.org/data/xref.php #release_uri = #data_uri = http://hla.alleles.org/xrefs/embl.txt +[source LRG_HGNC_notransfer] +name = LRG_HGNC_notransfer +download = N +order = 30 +priority = 5 +prio_descr = +parser = HGNCParser +release_uri = +data_uri = + [source HGNC::homo_sapiens#01] # Used by homo_sapiens name = HGNC @@ -1082,7 +1071,7 @@ priority = 2 prio_descr = ccds parser = HGNC_CCDSParser release_uri = -data_uri = script:wget=>http://www.genenames.org/cgi-bin/hgnc_downloads.cgi?title=HGNC+output+data&hgnc_dbtag=on&col=gd_hgnc_id&col=gd_status&col=gd_ccds_ids&status=Approved&status_opt=2&level=pri&=on&where=&order_by=gd_app_sym_sort&limit=&format=text&submit=submit&.cgifields=&.cgifields=level&.cgifields=chr&.cgifields=status&.cgifields=hgnc_dbtag,dbname=>ccds_human,host=>ens-livemirror, +data_uri = script:wget=>http://www.genenames.org/cgi-bin/hgnc_downloads.cgi?title=HGNC+output+data&hgnc_dbtag=on&col=gd_hgnc_id&col=gd_status&col=gd_ccds_ids&status=Approved&status_opt=2&level=pri&=on&where=&order_by=gd_app_sym_sort&limit=&format=text&submit=submit&.cgifields=&.cgifields=level&.cgifields=chr&.cgifields=status&.cgifields=hgnc_dbtag,host=>genebuild7,dbname=>db8_human_cdsonly_22sept2010, [source HGNC::homo_sapiens#07] @@ -1090,7 +1079,7 @@ data_uri = script:wget=>http://www.genenames.org/cgi-bin/hgnc_downloads.c name = HGNC download = N order = 30 -priority = 3 +priority = 1 prio_descr = ensembl_manual parser = HGNCParser release_uri = @@ -1101,12 +1090,12 @@ data_uri = name = HGNC download = Y order = 30 -priority = 1 -prio_descr = havana +priority = 3 +prio_descr = vega parser = HGNC_ENSTParser release_uri = data_uri = script: -#data_uri = script:vhost=>ens-staging1,vdbname=>homo_sapiens_vega_58_37c,vport=>5306,vuser=>anonymous,chost=>ens-staging1,cport=>3306,cdbname=>homo_sapiens_core_58_37c, +#data_uri = script:vuser=>ensro,vhost=>ens-research,vdbname=>st3_homo_sapiens_vega_59_20100903,vport=>3306,cuser=>ensro,chost=>ens-staging1,cport=>3306,cdbname=>homo_sapiens_core_61_37f, [source HGNC::homo_sapiens#02] @@ -1198,8 +1187,8 @@ parser = IKMCParser release_uri = data_uri = script: -[source IKMC_No_products_available_yet::mus_musculus] -name = IKMC_No_products_available_yet +[source IKMCs_No_products_available_yet::mus_musculus] +name = IKMCs_No_products_available_yet download = N order = 40 priority = 1 @@ -1208,8 +1197,8 @@ parser = IKMCParser release_uri = data_uri = -[source IKMC_Vector_available::mus_musculus] -name = IKMC_Vector_available +[source IKMCs_Vector_available::mus_musculus] +name = IKMCs_Vector_available download = N order = 40 priority = 1 @@ -1218,8 +1207,8 @@ parser = IKMCParser release_uri = data_uri = -[source IKMC_ES_cells_available::mus_musculus] -name = IKMC_ES_cells_available +[source IKMCs_ES_cells_available::mus_musculus] +name = IKMCs_ES_cells_available download = N order = 40 priority = 1 @@ -1228,8 +1217,8 @@ parser = IKMCParser release_uri = data_uri = -[source IKMC_Mice_available::mus_musculus] -name = IKMC_Mice_available +[source IKMCs_Mice_available::mus_musculus] +name = IKMCs_Mice_available download = N order = 40 priority = 1 @@ -1408,30 +1397,41 @@ parser = Mim2GeneParser release_uri = data_uri = ftp://ftp.ncbi.nih.gov/gene/DATA/mim2gene -[source MGI::mus_musculus#01] +[source MGI::mus_musculus#04] # Used by mus_musculus name = MGI download = N order = 30 -priority = 3 +priority = 4 prio_descr = uniprot parser = release_uri = data_uri = taken from uniprot files +[source MGI::mus_musculus#01] +# Used by mus_musculus +name = MGI +download = Y +order = 30 +priority = 1 +prio_descr = official +parser = MGIParser +release_uri = +data_uri = ftp://ftp.informatics.jax.org/pub/reports/MRK_ENSEMBL.rpt + [source MGI::mus_musculus#02] # Used by mus_musculus name = MGI download = Y order = 45 -priority = 1 +priority = 2 prio_descr = vega parser = MGI_Vega_Parser release_uri = data_uri = script: -#data_uri = script:vhost=>ens-staging2,vport=>3306,vdbname=>mus_musculus_vega_58_37k,chost=>ens-staging2,cport=>3306,cdbname=>mus_musculus_core_58_37k, +#data_uri = script:vhost=>ens-research,vport=>3306,vdbname=>st3_mus_musculus_vega_61_20100917,chost=>ens-staging2,cport=>3306,cdbname=>mus_musculus_core_61_37n, -[source MGI::mus_musculus#03] +[source MGI::mus_musculus#05] # Used by mus_musculus name = MGI download = Y @@ -1443,12 +1443,12 @@ release_uri = data_uri = ftp://ftp.informatics.jax.org/pub/reports/MRK_List2.sql.rpt data_uri = ftp://ftp.informatics.jax.org/pub/reports/MRK_Synonym.sql.rpt -[source MGI::mus_musculus#04] +[source MGI::mus_musculus#03] # Used by mus_musculus name = MGI download = Y order = 35 -priority = 2 +priority = 3 prio_descr = ccds parser = MGI_CCDS_Parser release_uri = @@ -1660,7 +1660,7 @@ priority = 1 prio_descr = ccds parser = RefSeq_CCDSParser release_uri = -data_uri = script:host=>ens-livemirror,dbname=>ccds_human, +data_uri = script:host=>genebuild7,dbname=>db8_human_cdsonly_22sept2010, [source RefSeq_dna::homo_sapiens#01] # Used by homo_sapiens @@ -1693,7 +1693,7 @@ priority = 1 prio_descr = ccds parser = RefSeq_CCDSParser release_uri = -data_uri = script:host=>ens-livemirror,dbname=>ccds_mouse, +data_uri = script:host=>genebuild7,dbname=>db8_mouse_cdsonly_22sept2010, [source RefSeq_dna::pan_troglodytes] @@ -2428,19 +2428,19 @@ release_uri = [source HGNC_curated_transcript::homo_sapiens] -name = HGNC_curated_transcript +name = HGNC_curated_transcript_notransfer download = Y order = 49 priority = 1 -prio_descr = +prio_descr = curated with transcript number postfix parser = curated_transcriptParser release_uri = data_uri = script: -#data_uri = script:vhost=>ensembldb.ensembl.org,vport=>5306,vdbname=>homo_sapiens_vega_58_37c,vuser=>anonymous,vprot=>5306,chost=>ens-livemirror,cport=>3306,cdbname=>homo_sapiens_core_58_37c, +#data_uri = script:vhost=>ens-research,vdbname=>st3_homo_sapiens_vega_59_20100903,vuser=>ensro,vport=>3306,chost=>ens-staging,cport=>3306,cdbname=>homo_sapiens_core_61_37f, -[source HGNC_automatic_transcript::homo_sapiens] -name = HGNC_automatic_transcript +[source HGNC_transcript_name] +name = HGNC_transcript_name download = N order = 49 priority = 1 @@ -2448,8 +2448,8 @@ prio_descr = parser = done_in_official_naming release_uri = -[source HGNC_curated_gene::homo_sapiens] -name = HGNC_curated_gene +[source HGNC_gene_name] +name = HGNC_gene_name download = N order = 49 priority = 1 @@ -2457,17 +2457,9 @@ prio_descr = parser = done_in_official_naming release_uri = -[source HGNC_automatic_gene::homo_sapiens] -name = HGNC_automatic_gene -download = N -order = 49 -priority = 1 -prio_descr = -parser = done_in_official_naming -release_uri = [source MGI_curated_transcript::mus_musculus] -name = MGI_curated_transcript +name = MGI_curated_transcript_notransfer download = Y order = 49 priority = 1 @@ -2475,10 +2467,11 @@ prio_descr = parser = curated_transcriptParser release_uri = data_uri = script: -#data_uri = script:vhost=>ens-staging2,vport=>3306,vdbname=>mus_musculus_vega_58_37k,chost=>ens-staging2,cport=>3306,cdbname=>mus_musculus_core_58_37k, +#data_uri = script:vhost=>ens-research,vport=>3306,vdbname=>st3_mus_musculus_vega_61_20100917,chost=>ens-staging2,cport=>3306,cdbname=>mus_musculus_core_61_37n, -[source MGI_curated_gene::mus_musculus] -name = MGI_curated_gene + +[source MGI_automatic_transcript::mus_musculus] +name = MGI_automatic_transcript_notransfer download = N order = 49 priority = 1 @@ -2486,25 +2479,27 @@ prio_descr = parser = done_in_official_naming release_uri = -[source MGI_automatic_gene::mus_musculus] -name = MGI_automatic_gene +[source MGI_gene_name] +# Used homo_sapiens,mus_musculus +name = MGI_gene_name download = N -order = 49 +order = 70 priority = 1 prio_descr = -parser = done_in_official_naming -release_uri = - +parser = comes via official naming +release_uri = +data_uri = -[source MGI_automatic_transcript::mus_musculus] -name = MGI_automatic_transcript +[source MGI_transcript_name] +# Used homo_sapiens,mus_musculus +name = MGI_transcript_name download = N -order = 49 +order = 70 priority = 1 prio_descr = -parser = done_in_official_naming -release_uri = - +parser = comes via official naming +release_uri = +data_uri = [source Clone_based_vega_transcript::homo_sapiens] name = Clone_based_vega_transcript @@ -2557,13 +2552,13 @@ parser = XenopusJamboreeParser release_uri = data_uri = ftp://ftp.xenbase.org/pub/GenePageReports/GenePageEnsemblModelMapping.txt -[source ZFIN_ID::danio_rerio] +[source ZFIN_ID::danio_rerio#01] # Used by danio_rerio name = ZFIN_ID download = Y order = 30 -priority = 1 -prio_descr = +priority = 2 +prio_descr = uniprot/refseq parser = ZFINParser release_uri = data_uri = http://zfin.org/data_transfer/Downloads/refseq.txt @@ -2571,6 +2566,57 @@ data_uri = http://zfin.org/data_transfer/Downloads/uniprot.txt data_uri = http://zfin.org/data_transfer/Downloads/aliases.txt data_uri = http://zfin.org/data_transfer/Downloads/gene_seq.txt +[source ZFIN_ID::danio_rerio#02] +# Used by danio_rerio +name = ZFIN_ID +download = Y +order = 30 +priority = 1 +prio_descr = vega +parser = ZFIN_DARTParser +release_uri = +data_uri = script:vuser=>ensro,vhost=>ens-staging,vdbname=>danio_rerio_vega_61_9a,vport=>3306, + +[source ZFIN_ID::danio_rerio#03] +# Used by danio_rerio +name = ZFIN_ID +download = Y +order = 1 +priority = 10 +prio_descr = description_only +parser = ZFINDescParser +release_uri = +data_uri = http://zfin.org/data_transfer/Downloads/genetic_markers.txt + +[source ZFIN_ID_curated_transcript::danio_rerio] +name = ZFIN_ID_curated_transcript_notransfer +download = Y +order = 49 +priority = 1 +prio_descr = curated with transcript number postfix +parser = curated_transcriptParser +release_uri = +data_uri = script: + +[source ZFIN_ID_transcript_name] +name = ZFIN_ID_transcript_name +download = N +order = 49 +priority = 1 +prio_descr = +parser = done_in_official_naming +release_uri = + +[source ZFIN_ID_gene_name] +name = ZFIN_ID_gene_name +download = N +order = 49 +priority = 1 +prio_descr = +parser = done_in_official_naming +release_uri = + + [source cint_aniseed_jgi_v1::ciona_intestinalis] # Used by ciona_intestinalis name = cint_aniseed_jgi_v1 @@ -2638,6 +2684,29 @@ parser = comes via ncRNAParser release_uri = data_uri = +[source miRBase_gene_name] +# Used homo_sapiens,mus_musculus +name = miRBase_gene_name +download = N +order = 70 +priority = 1 +prio_descr = +parser = comes via official naming +release_uri = +data_uri = + +[source miRBase_transcript_name] +# Used homo_sapiens,mus_musculus +name = miRBase_transcript_name +download = N +order = 70 +priority = 1 +prio_descr = +parser = comes via official naming +release_uri = +data_uri = + + [source ncRNA::MULTI] name = ncRNA download = Y @@ -2646,7 +2715,7 @@ priority = 1 prio_descr = parser = ncRNA_DBParser release_uri = -data_uri = script:host=>genebuild7,port=>3306,dbname=>sw4_ncRNA_Xrefs +data_uri = script:host=>genebuild7,port=>3306,dbname=>sw4_ncRNA_Xrefs, [source ncRNA::culex_pipiens] name = ncRNA_Culex @@ -2658,6 +2727,29 @@ parser = ncRNAParser release_uri = data_uri = file:/lustre/work1/ensembl/kmegy/Culex/Xrefs/culex_ncRNAxref_sw4.txt +[source RFAM_gene_name] +# Used homo_sapiens,mus_musculus +name = RFAM_gene_name +download = N +order = 70 +priority = 1 +prio_descr = +parser = comes via official naming +release_uri = +data_uri = + +[source RFAM_transcript_name] +# Used homo_sapiens,mus_musculus +name = RFAM_transcript_name +download = N +order = 70 +priority = 1 +prio_descr = +parser = comes via official naming +release_uri = +data_uri = + + [source RNAMMER::MULTI-asp] # Used by aspergillus_clavatus, aspergillus_flavus, aspergillus_fumigatus, aspergillus_nidulans, aspergillus_niger, aspergillus_oryzae, aspergillus_terreus, neosartorya_fischeri name = RNAMMER @@ -3186,8 +3278,11 @@ source = RefSeq_peptide::danio_rerio source = UniGene::danio_rerio source = Uniprot/SPTREMBL::MULTI source = Uniprot/SWISSPROT::MULTI -source = ZFIN_ID::danio_rerio +source = ZFIN_ID::danio_rerio#01 +source = ZFIN_ID::danio_rerio#02 +source = ZFIN_ID::danio_rerio#03 source = ncRNA::MULTI +#source = ZFIN_ID_curated_transcript::danio_rerio [species dasypus_novemcinctus] taxonomy_id = 9361 @@ -3623,9 +3718,10 @@ source = InterproGO::MULTI source = IMGT/GENE_DB::mus_musculus source = IKMCs_KOs::mus_musculus source = IPI::mus_musculus +source = MGI::mus_musculus#01 source = MGI::mus_musculus#02 source = MGI::mus_musculus#03 -source = MGI::mus_musculus#04 +source = MGI::mus_musculus#05 source = MGI_curated_transcript::mus_musculus source = Interpro::MULTI source = RefSeq_dna::mus_musculus @@ -3639,6 +3735,7 @@ source = ncRNA::MULTI source = UCSC::mus_musculus source = Fantom::mus_musculus + [species myotis_lucifugus] taxonomy_id = 59463 aliases = little brown bat, microbat