diff --git a/misc-scripts/xref_mapping/xref_config.ini b/misc-scripts/xref_mapping/xref_config.ini index 1f75193adfcb803a3c3fcf58e2edbd191520b1a3..364abed9701626e23599f19a7d15d70dca6ece7b 100644 --- a/misc-scripts/xref_mapping/xref_config.ini +++ b/misc-scripts/xref_mapping/xref_config.ini @@ -1,23 +1,25 @@ # $Id$ -######################################################################## -# SOURCES # -# # -# Keys: # -# name - name of this source (required) # -# download - must be downloaded (Y or N, required) # -# order - parsing order for this source # -# priority - priority of these data files when more files belong # -# to the same source 'name' # -# prio_descr - label for the 'priority' # -# parser - the parser to be used (required) # -# release_uri - URI pointing to release information (optional) # -# data_uri - URI pointing to the data files (multiple, required) # -# dependent - These must be loaded first # -# Note is sepecies does not have this source then test # -# ignores these # -# # -######################################################################## +########################################################################## +# SOURCES # +# # +# Keys: # +# name - name of this source (required) # +# download - must be downloaded (Y or N, required) # +# order - parsing order for this source # +# priority - priority of these data files when more files belong # +# to the same source 'name' # +# prio_descr - label for the 'priority' # +# parser - the parser to be used (required) # +# release_uri - URI pointing to release information (optional) # +# data_uri - URI pointing to the data files (multiple, required) # +# dependent_on - Comma separated list of sources which must be loaded # +# first. # +# Note that if species does not have xrefs from a # +# master source specified in this list than the # +# dependency is ignored # +# # +########################################################################## [source EC_NUMBER::saccharomyces_cerevisiae] # Used by S.cerevisiae @@ -202,7 +204,7 @@ order = 50 priority = 1 prio_descr = parser = PHIbaseParser -dependent = Uniprot/SWISSPROT,Uniprot/SPTREMBL +dependent_on = Uniprot/SWISSPROT,Uniprot/SPTREMBL release_uri = data_uri = http://www.phi-base.org/PHI-export.php @@ -597,7 +599,7 @@ order = 100 priority = 1 prio_descr = parser = FantomParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT release_uri = data_uri = ftp://fantom.gsc.riken.jp/FANTOM3/DDBJ/DDBJ_fantom3_HTC_accession.txt.gz @@ -620,7 +622,7 @@ order = 100 priority = 1 prio_descr = ID assigned by FlyBase parser = FlybaseParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro release_uri = data_uri = ftp://ftp.flybase.net/genomes/Drosophila_melanogaster/dmel_r5.39_FB2011_07/gff/dmel-all-*.gff.gz @@ -632,7 +634,7 @@ order = 100 priority = 1 prio_descr = ID assigned by FlyBase parser = FlybaseParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro release_uri = data_uri = ftp://ftp.flybase.net/genomes/Drosophila_pseudoobscura/current/gff/dpse-all-*.gff.gz @@ -644,7 +646,7 @@ order = 100 priority = 1 prio_descr = ID assigned by FlyBase parser = FlybaseParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro release_uri = data_uri = ftp://ftp.flybase.org/releases/current/dgri_r1.3/gff/dgri-all-r1.3.gff.gz @@ -656,7 +658,7 @@ order = 100 priority = 1 prio_descr = ID assigned by FlyBase parser = FlybaseParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro release_uri = data_uri = ftp://ftp.flybase.org/releases/current/dwil_r1.3/gff/dwil-all-r1.3.gff.gz @@ -668,7 +670,7 @@ order = 100 priority = 1 prio_descr = ID assigned by FlyBase parser = FlybaseParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro release_uri = data_uri = ftp://ftp.flybase.org/releases/current/dana_r1.3/gff/dana-all-r1.3.gff.gz @@ -680,7 +682,7 @@ order = 100 priority = 1 prio_descr = ID assigned by FlyBase parser = FlybaseParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro release_uri = data_uri = ftp://ftp.flybase.org/releases/current/dyak_r1.3/gff/dyak-all-r1.3.gff.gz @@ -692,7 +694,7 @@ order = 100 priority = 1 prio_descr = ID assigned by FlyBase parser = FlybaseParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro release_uri = data_uri = ftp://ftp.flybase.org/releases/current/dsim_r1.3/gff/dsim-all-r1.3.gff.gz @@ -704,7 +706,7 @@ order = 100 priority = 1 prio_descr = ID assigned by FlyBase parser = FlybaseParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro release_uri = data_uri = ftp://ftp.flybase.org/releases/current/dsec_r1.3/gff/dsec-all-r1.3.gff.gz @@ -717,7 +719,7 @@ order = 100 priority = 1 prio_descr = ID assigned by FlyBase parser = FlybaseParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro release_uri = data_uri = ftp://ftp.flybase.org/releases/current/dere_r1.3/gff/dere-all-r1.3.gff.gz @@ -729,7 +731,7 @@ order = 100 priority = 1 prio_descr = ID assigned by FlyBase parser = FlybaseParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro release_uri = data_uri = ftp://ftp.flybase.org/releases/current/dper_r1.3/gff/dper-all-r1.3.gff.gz @@ -741,7 +743,7 @@ order = 100 priority = 1 prio_descr = ID assigned by FlyBase parser = FlybaseParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro release_uri = data_uri = ftp://ftp.flybase.org/releases/current/dmoj_r1.3/gff/dmoj-all-r1.3.gff.gz @@ -753,7 +755,7 @@ order = 100 priority = 1 prio_descr = ID assigned by FlyBase parser = FlybaseParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,Interpro release_uri = data_uri = ftp://ftp.flybase.org/releases/current/dvir_r1.2/gff/dvir-all-r1.2.gff.gz @@ -952,7 +954,7 @@ order = 80 priority = 1 prio_descr = main parser = GOParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide,SGD +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide,SGD release_uri = http://www.ebi.ac.uk/GOA/uniprot_release.html data_uri = ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/gene_association.goa_uniprot.gz data_uri = http://archive.geneontology.org/latest-termdb/go_daily-termdb.obo-xml.gz @@ -965,7 +967,7 @@ order = 85 priority = 2 prio_descr = interpro parser = InterproGoParser -dependent = GO +dependent_on = GO release_uri = data_uri = http://www.geneontology.org/external2go/interpro2go @@ -977,7 +979,7 @@ order = 85 priority = 1 prio_descr = main parser = GOParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide,wormbase_all +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide,wormbase_all release_uri = data_uri = ftp://ftp.geneontology.org/pub/go/gene-associations/gene_association.wb.gz @@ -990,13 +992,11 @@ order = 85 priority = 1 prio_descr = main parser = Use by GOParser -dependent = +dependent_on = release_uri = data_uri = - - [source GO::danio_rerio] # Used by danio_rerio name = GO @@ -1005,7 +1005,7 @@ order = 85 priority = 1 prio_descr = main parser = GOParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide release_uri = data_uri = ftp://ftp.geneontology.org/pub/go/gene-associations/gene_association.zfin.gz data_uri = http://archive.geneontology.org/latest-termdb/go_daily-termdb.obo-xml.gz @@ -1018,7 +1018,7 @@ order = 120 priority = 1 prio_descr = main parser = GOSlimParser -dependent = GO +dependent_on = GO release_uri = #data_uri = script: data_uri = script:host=>ens-staging1,dbname=>ensembl_ontology_67, @@ -1031,7 +1031,7 @@ order = 120 priority = 1 prio_descr = main parser = GOSlimParser -dependent = GO +dependent_on = GO release_uri = data_uri = script:host=>mysql-eg-pan-1.ebi.ac.uk,port=>4276,dbname=>ensemblgenomes_ontology_14_67, @@ -1043,7 +1043,7 @@ order = 85 priority = 1 prio_descr = main parser = GOParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide release_uri = http://www.ebi.ac.uk/GOA/HUMAN_release.html data_uri = ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/HUMAN/gene_association.goa_human.gz data_uri = http://archive.geneontology.org/latest-termdb/go_daily-termdb.obo-xml.gz @@ -1056,7 +1056,7 @@ order = 85 priority = 1 prio_descr = main parser = GOParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide release_uri = http://www.ebi.ac.uk/GOA/MOUSE_release.html data_uri = ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/MOUSE/gene_association.goa_mouse.gz data_uri = http://archive.geneontology.org/latest-termdb/go_daily-termdb.obo-xml.gz @@ -1070,7 +1070,7 @@ data_uri = http://archive.geneontology.org/latest-termdb/go_daily-termdb. # priority = 1 # prio_descr = goga # parser = GOParser -# dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide +# dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide # release_uri = # data_uri = http://www.geneontology.org/cgi-bin/downloadGOGA.pl/gene_association.mgi.gz # data_uri = http://archive.geneontology.org/latest-termdb/go_daily-termdb.obo-xml.gz @@ -1083,7 +1083,7 @@ order = 85 priority = 1 prio_descr = main parser = GOParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide release_uri = http://www.ebi.ac.uk/GOA/rat_release.html data_uri = ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/RAT/gene_association.goa_rat.gz data_uri = http://archive.geneontology.org/latest-termdb/go_daily-termdb.obo-xml.gz @@ -1096,7 +1096,7 @@ order = 85 priority = 1 prio_descr = main parser = GOParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide,SGD +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide,SGD release_uri = data_uri = ftp://ftp.geneontology.org/pub/go/gene-associations/gene_association.sgd.gz data_uri = http://archive.geneontology.org/latest-termdb/go_daily-termdb.obo-xml.gz @@ -1121,7 +1121,7 @@ order = 85 priority = 2 prio_descr = goga parser = GOParser -dependent = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide +dependent_on = Uniprot/SPTREMBL,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide release_uri = data_uri = http://www.geneontology.org/cgi-bin/downloadGOGA.pl/gene_association.fb.gz data_uri = http://archive.geneontology.org/latest-termdb/go_daily-termdb.obo-xml.gz @@ -1201,7 +1201,7 @@ order = 29 priority = 4 prio_descr = entrezgene_manual parser = HGNCParser -dependent = EntrezGene,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide +dependent_on = EntrezGene,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide release_uri = data_uri = http://www.genenames.org/cgi-bin/hgnc_downloads.cgi?title=Genew+output+data&col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=gd_pub_eg_id&col=gd_pub_refseq_ids&col=md_eg_id&col=md_refseq_id&col=gd_pub_ensembl_id&col=md_prot_id&col=gd_lsdb_links&status=Approved&status=Approved+Non-Human&status_opt=3&=on&where=&order_by=gd_hgnc_id&limit=&format=text&submit=submit&.cgifields=&.cgifields=status&.cgifields=chr @@ -1490,7 +1490,7 @@ order = 60 priority = 1 prio_descr = parser = Mim2GeneParser -dependent = MIM,EntrezGene +dependent_on = MIM,EntrezGene release_uri = data_uri = ftp://grcf.jhmi.edu/OMIM/mim2gene.txt @@ -1559,7 +1559,7 @@ order = 50 priority = 1 prio_descr = parser = OrphanetParser -dependent = HGNC +dependent_on = HGNC release_uri = data_uri = http://www.orphadata.org/data/xml/en_product6.xml @@ -1600,7 +1600,7 @@ order = 30 priority = 1 prio_descr = parser = RGDParser -dependent = RefSeq_dna,RefSeq_peptide +dependent_on = RefSeq_dna,RefSeq_peptide release_uri = data_uri = ftp://rgd.mcw.edu/pub/data_release/GENES_RAT.txt @@ -1677,7 +1677,6 @@ order = 20 priority = 1 prio_descr = refseq parser = RefSeqParser -dependent = RefSeq_dna release_uri = [source RefSeq_mRNA::CCDS] @@ -1698,7 +1697,6 @@ order = 20 priority = 1 prio_descr = parser = RefSeqParser -dependent = RefSeq_dna release_uri = [source RefSeq_mRNA_predicted::MULTI] @@ -1709,7 +1707,6 @@ order = 20 priority = 1 prio_descr = refseq parser = RefSeqParser -dependent = RefSeq_dna release_uri = [source RefSeq_mRNA_predicted::CCDS] @@ -1730,7 +1727,6 @@ order = 20 priority = 1 prio_descr = parser = RefSeqParser -dependent = RefSeq_dna release_uri = [source RefSeq_dna::MULTI-vertebrate_mammalian] @@ -2748,7 +2744,7 @@ order = 20 priority = 1 prio_descr = parser = UniProtAltParser -dependent = MIM +dependent_on = MIM release_uri = ftp://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/reldate.txt data_uri = ftp://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/uniprot_trembl.dat.gz @@ -2783,7 +2779,7 @@ order = 20 priority = 2 prio_descr = sequence_mapped parser = UniProtAltParser -dependent = MIM +dependent_on = MIM release_uri = ftp://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/reldate.txt data_uri = ftp://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/uniprot_sprot.dat.gz @@ -2802,7 +2798,7 @@ order = 22 priority = 1 prio_descr = uniprot_mapped parser = UniProtDirectParser -dependent = Unprot/SWISSPROT +dependent_on = Unprot/SWISSPROT release_uri = data_uri = script:wget=>ftp://ftp.ebi.ac.uk/pub/contrib/xrefs/ens-sp.map,host=>ens-staging1,dbname=>ensembl_production, diff --git a/misc-scripts/xref_mapping/xref_config2sql.pl b/misc-scripts/xref_mapping/xref_config2sql.pl index 87ada76faac2979efa8c3015551f0931649358bf..3164bf37506e506f8fcebdc37a913ddbc162d7bc 100755 --- a/misc-scripts/xref_mapping/xref_config2sql.pl +++ b/misc-scripts/xref_mapping/xref_config2sql.pl @@ -113,7 +113,7 @@ foreach my $source_section ( sort( $config->GroupMembers('source') ) ) { print("\n"); my @dependents = - split( /\,/, $config->val( $source_section, 'dependent', '' ) ); + split( /\,/, $config->val( $source_section, 'dependent_on', '' ) ); foreach my $dep (@dependents){ print "# adding source dependency that $source_section needs $dep loaded first\n"; @@ -188,16 +188,7 @@ foreach my $species_section ( sort( $config->GroupMembers('species') ) ) print("\n"); - my @dependents = - split( /\,/, $config->val( $source_section, 'dependent', '' ) ); - - foreach my $dep (@dependents){ - print "# adding source dependency that $source_section needs $dep loaded first\n"; - print "INSERT IGNORE INTO dependent_source (master_source_id, dependent_name)\n"; - printf( "VALUES (%d, '%s');\n\n", $source_ids{$source_section}, $dep); - } - } ## end foreach my $source_name ( sort...) } ## end foreach my $species_section... -print "# FINISEHD SUCCESSFULLY\n" +print "# FINISHED SUCCESSFULLY\n"