From ac2ee6552ba6743af14cad358330802db8a913c2 Mon Sep 17 00:00:00 2001 From: Ian Longden <ianl@sanger.ac.uk> Date: Tue, 25 Oct 2011 13:02:39 +0000 Subject: [PATCH] example files from the xref pipeline --- misc-scripts/xref_mapping/docs/mapper1.out | 225 +++++++ misc-scripts/xref_mapping/docs/mapper2.out | 165 ++++++ misc-scripts/xref_mapping/docs/parse.out | 646 +++++++++++++++++++++ 3 files changed, 1036 insertions(+) create mode 100644 misc-scripts/xref_mapping/docs/mapper1.out create mode 100644 misc-scripts/xref_mapping/docs/mapper2.out create mode 100644 misc-scripts/xref_mapping/docs/parse.out diff --git a/misc-scripts/xref_mapping/docs/mapper1.out b/misc-scripts/xref_mapping/docs/mapper1.out new file mode 100644 index 0000000000..e6e556173e --- /dev/null +++ b/misc-scripts/xref_mapping/docs/mapper1.out @@ -0,0 +1,225 @@ +Options: -file xref_input +running in verbose mode +current status is parsing_finished +No alt_alleles found for this species. + +Dumping xref & Ensembl sequences +Dumping Xref fasta files +Dumping Ensembl Fasta files +53067 Transcripts dumped 41693 Transaltions dumped +Deleting out, err and map files from output dir: /workdir/release_65/zebrafish/ensembl +Deleting txt and sql files from output dir: /workdir/release_65/zebrafish/ensembl +LSF job ID for main mapping job: 887287, name ExonerateGappedBest1_1318933449 with 481 arrays elements) +LSF job ID for main mapping job: 887288, name ExonerateGappedBest1_1318933451 with 253 arrays elements) +LSF job ID for Depend job: 887289 (job array with 1 job) +already processed = 0, processed = 734, errors = 0, empty = 0 +Could not find stable id ENSDART00000126968 in table to get the internal id hence ignoring!!! (for RFAM) +Could not find stable id ENSDART00000121043 in table to get the internal id hence ignoring!!! (for RFAM) +The foillowing will be processed as priority xrefs + Uniprot/SPTREMBL + ZFIN_ID +Process Pairs +Starting at object_xref of 837705 + NEW 2733 +2733 new relationships added +Writing InterPro + +246386 already existed + + Wrote 0 interpro table entries + including 51399 object xrefs, + and 51399 go xrefs +ZFIN_ID is associated with both Transcript and Translation object types +Therefore moving all associations from Translation to Transcript +DBASS3 moved to Gene level. +DBASS3 moved to Gene level. +DBASS5 moved to Gene level. +DBASS5 moved to Gene level. +EntrezGene moved to Gene level. +EntrezGene moved to Gene level. +miRBase moved to Gene level. +miRBase moved to Gene level. +RFAM moved to Gene level. +RFAM moved to Gene level. +TRNASCAN_SE moved to Gene level. +TRNASCAN_SE moved to Gene level. +RNAMMER moved to Gene level. +RNAMMER moved to Gene level. +UniGene moved to Gene level. +UniGene moved to Gene level. +Uniprot_genename moved to Gene level. +Uniprot_genename moved to Gene level. +WikiGene moved to Gene level. +WikiGene moved to Gene level. +MIM_GENE moved to Gene level. +MIM_GENE moved to Gene level. +MIM_MORBID moved to Gene level. +MIM_MORBID moved to Gene level. +HGNC moved to Gene level. +HGNC moved to Gene level. +MOVE SQL +UPDATE IGNORE object_xref ox, xref x, source s + SET ox.ensembl_id = ? + WHERE x.source_id = s.source_id AND + ox.xref_id = x.xref_id AND + ox.ensembl_id = ? AND + ox.ensembl_object_type = 'Gene' AND + ox.ox_status = 'DUMP_OUT' AND + s.name in ( +'DBASS3', 'DBASS5', 'EntrezGene', 'miRBase', 'RFAM', 'TRNASCAN_SE', 'RNAMMER', 'UniGene', 'Uniprot_genename', 'WikiGene', 'MIM_GENE', 'MIM_MORBID', 'HGNC') +Number of rows:- moved = 0, identitys deleted = 0, object_xrefs deleted = 0 +Added 0 new mapping but ignored 0 +ZFIN_ID moved to Gene level. +ZFIN_ID moved to Gene level. +MAX xref_id = 620426 MAX object_xref_id = 985210, max_object_xref from identity_xref = 985210 +LIST to delete 23, 21, 135, 278, 22, 136, 279, 253 +_ins_xref sql is:- +insert into xref (xref_id, source_id, accession, label, version, species_id, info_type, info_text, description) values (?, ?, ?, ?, 0, 7955, 'MISC', ?, ? ) +For gene ENSDARG00000001014 we have mutiple ZFIN_ID's + Keeping the best one si:ch211-150d5.2 + removing myh9b from gene +For gene ENSDARG00000001470 we have mutiple ZFIN_ID's + Keeping the best one si:ch211-287j19.6 + removing zgc:162351 from gene +For gene ENSDARG00000001559 we have mutiple ZFIN_ID's + Keeping the best one si:ch211-46o5.1 + removing csmd2 from gene +For gene ENSDARG00000001733 we have mutiple ZFIN_ID's + Keeping the best one si:ch211-198b21.4 + removing gulp1 from gene +For gene ENSDARG00000001832 we have mutiple ZFIN_ID's + Keeping the best one si:ch1073-403i13.1 + removing zgc:113912 from gene + removing zgc:103599 from gene +For gene ENSDARG00000001879 we have mutiple ZFIN_ID's + Keeping the best one si:ch211-169k21.2 + removing im:7156396 from gene +For gene ENSDARG00000001889 we have mutiple ZFIN_ID's + Keeping the best one tuba1l2 + removing zgc:123298 from gene +For gene ENSDARG00000001890 we have mutiple ZFIN_ID's + Keeping the best one si:dkey-239i15.3 + removing stt3b from gene +For gene ENSDARG00000002084 we have mutiple ZFIN_ID's + Keeping the best one lamb2 + removing hm:zehs0001 from gene +Multiple best ZFIN_ID's using vega to find the most common for ENSDARG00000002670 + zgc:113944 (chosen as first) + tbpl2 (left as ZFIN_ID reference but not gene symbol) +For gene ENSDARG00000002937 we have mutiple ZFIN_ID's + Keeping the best one meis4.1a + removing meis4.1b from gene +For gene ENSDARG00000003635 we have mutiple ZFIN_ID's + Keeping the best one mogat3b + removing atp6v1e1a from gene +Multiple best ZFIN_ID's using vega to find the most common for ENSDARG00000087402 + tpm1 (chosen as first) + zgc:171719 (left as ZFIN_ID reference but not gene symbol) +Multiple best ZFIN_ID's using vega to find the most common for ENSDARG00000087472 +For gene ENSDARG00000087472 we have mutiple ZFIN_ID's + removing zgc:154164 from gene + removing zgc:163040 from gene + removing hist1h4l from gene + Keeping the best one wu:fe37d09 + Keeping the best one wu:fe38f03 + Keeping the best one zgc:165555 + wu:fe37d09 (chosen as first) + zgc:165555 (left as ZFIN_ID reference but not gene symbol) + wu:fe38f03 (left as ZFIN_ID reference but not gene symbol) +Multiple best ZFIN_ID's using vega to find the most common for ENSDARG00000087543 +For gene ENSDARG00000087543 we have mutiple ZFIN_ID's + removing zgc:154164 from gene + removing zgc:163040 from gene + removing hist1h4l from gene + Keeping the best one wu:fe37d09 + Keeping the best one wu:fe38f03 + removing zgc:165555 from gene + wu:fe37d09 (chosen as first) + wu:fe38f03 (left as ZFIN_ID reference but not gene symbol) +For gene ENSDARG00000087583 we have mutiple ZFIN_ID's + Keeping the best one si:ch211-226h8.13 + removing si:ch211-154a22.8 from gene +Multiple best ZFIN_ID's using vega to find the most common for ENSDARG00000087670 +For gene ENSDARG00000087670 we have mutiple ZFIN_ID's + removing zgc:154164 from gene + removing zgc:163040 from gene + removing hist1h4l from gene + Keeping the best one wu:fe37d09 + Keeping the best one wu:fe38f03 + Keeping the best one zgc:165555 + wu:fe37d09 (chosen as first) + zgc:165555 (left as ZFIN_ID reference but not gene symbol) + wu:fe38f03 (left as ZFIN_ID reference but not gene symbol) +Multiple best ZFIN_ID's using vega to find the most common for ENSDARG00000087694 +For gene ENSDARG00000087694 we have mutiple ZFIN_ID's + Keeping the best one zgc:112234 + Keeping the best one zgc:171759 + removing zgc:171937 from gene + Keeping the best one wu:fe11b02 + wu:fe11b02 (chosen as first) + zgc:171759 (left as ZFIN_ID reference but not gene symbol) + zgc:112234 (left as ZFIN_ID reference but not gene symbol) +For gene ENSDARG00000096097 we have mutiple ZFIN_ID's + Keeping the best one si:dkeyp-98a7.5 + removing zgc:172150 from gene +For gene ENSDARG00000096159 we have mutiple ZFIN_ID's + Keeping the best one si:dkeyp-98a7.4 + removing zgc:172150 from gene + +For gene.... Lots of these so cut them out to save time and space + +WARNING: Clone_based_ensembl_gene has decreased by -5 % was 7652 now 7194 +WARNING: Clone_based_ensembl_transcript has decreased by -8 % was 8260 now 7554 +WARNING: Clone_based_vega_gene has increased by 144% was 276 now 675 +WARNING: GO has increased by 56% was 87289 now 136827 +WARNING: goslim_goa has increased by 54% was 62738 now 96927 +WARNING: xrefs miRBase_gene_name are not in the new database but are in the old??? +WARNING: xrefs OTTG are not in the new database but are in the old??? +WARNING: xrefs OTTT are not in the new database but are in the old??? +WARNING: RefSeq_ncRNA has increased by 5% was 644 now 677 +WARNING: xrefs RFAM_gene_name are not in the new database but are in the old??? +WARNING: xrefs shares_CDS_and_UTR_with_OTTT are not in the new database but are in the old??? +WARNING: xrefs shares_CDS_with_ENST are not in the new database but are in the old??? +WARNING: xrefs shares_CDS_with_OTTT are not in the new database but are in the old??? +WARNING: xrefs Vega_transcript are not in the new database but are in the old??? +WARNING: xrefs Vega_translation are not in the new database but are in the old??? +WARNING: ZFIN_ID_curated_transcript_notransfer has 9748 xrefs in the new database but NONE in the old +xref_mapper.pl FINISHED NORMALLY + +------------------------------------------------------------ +Sender: LSF System <lsfadmin@bc-24-1-04> +Subject: Job 886769: <perl ~/src/ensembl/misc-scripts/xref_mapping/xref_mapper.pl -file xref_input> Done + +Job <perl ~/src/ensembl/misc-scripts/xref_mapping/xref_mapper.pl -file xref_input> was submitted from host <farm2-head4> by user <ianl> in cluster <farm2>. +Job was executed on host(s) <bc-24-1-04>, in queue <normal>, as user <ianl> in cluster <farm2>. +<~/> was used as the home directory. +</workdir/release_65/zebrafish> was used as the working directory. +Started at Tue Oct 18 11:01:18 2011 +Results reported at Tue Oct 18 12:17:34 2011 + +Your job looked like: + +------------------------------------------------------------ +# LSBATCH: User input +perl ~/src/ensembl/misc-scripts/xref_mapping/xref_mapper.pl -file xref_input +------------------------------------------------------------ + +Successfully completed. + +Resource usage summary: + + CPU time : 734.06 sec. + Max Memory : 173 MB + Max Swap : 204 MB + + Max Processes : 6 + Max Threads : 7 + +The output (if any) is above this job summary. + + + +PS: + +Read file <mapper.err> for stderr output of this job. + diff --git a/misc-scripts/xref_mapping/docs/mapper2.out b/misc-scripts/xref_mapping/docs/mapper2.out new file mode 100644 index 0000000000..3a7a9b28f5 --- /dev/null +++ b/misc-scripts/xref_mapping/docs/mapper2.out @@ -0,0 +1,165 @@ +Options: -file xref_input -upload +running in verbose mode +current status is tests_finished +Deleting data for Clone_based_ensembl_gene from core before updating from new xref database +Deleting data for Clone_based_ensembl_transcript from core before updating from new xref database +Deleting data for Clone_based_vega_gene from core before updating from new xref database +Deleting data for Clone_based_vega_transcript from core before updating from new xref database +Deleting data for EMBL from core before updating from new xref database +Deleting data for EntrezGene from core before updating from new xref database +Deleting data for GO from core before updating from new xref database +Deleting data for goslim_goa from core before updating from new xref database +Deleting data for IPI from core before updating from new xref database +Deleting data for MEROPS from core before updating from new xref database +Deleting data for miRBase from core before updating from new xref database +Deleting data for miRBase_transcript_name from core before updating from new xref database +Deleting data for PDB from core before updating from new xref database +Deleting data for protein_id from core before updating from new xref database +Deleting data for RefSeq_mRNA from core before updating from new xref database +Deleting data for RefSeq_mRNA_predicted from core before updating from new xref database +Deleting data for RefSeq_ncRNA from core before updating from new xref database +Deleting data for RefSeq_ncRNA_predicted from core before updating from new xref database +Deleting data for RefSeq_peptide from core before updating from new xref database +Deleting data for RefSeq_peptide_predicted from core before updating from new xref database +Deleting data for RFAM from core before updating from new xref database +Deleting data for RFAM_transcript_name from core before updating from new xref database +Deleting data for UniGene from core before updating from new xref database +Deleting data for Uniprot/SPTREMBL from core before updating from new xref database +Deleting data for Uniprot/SWISSPROT from core before updating from new xref database +Deleting data for Uniprot_genename from core before updating from new xref database +Deleting data for WikiGene from core before updating from new xref database +Deleting data for ZFIN_ID from core before updating from new xref database +Deleting data for ZFIN_ID_transcript_name from core before updating from new xref database +xref offset is 722445, object_xref offset is 170998 +updating (21) Clone_based_ensembl_gene in core (for MISC xrefs) +DIRECT 7194 +updating (22) Clone_based_ensembl_transcript in core (for MISC xrefs) +DIRECT 7554 +updating (23) Clone_based_vega_gene in core (for MISC xrefs) +DIRECT 675 +updating (24) Clone_based_vega_transcript in core (for MISC xrefs) +DIRECT 302 +updating (24) Clone_based_vega_transcript in core (for DIRECT xrefs) +DIRECT 17688 +updating (236) EMBL in core (for DEPENDENT xrefs) +DEP 42665 xrefs, 94223 object_xrefs +updating (39) EntrezGene in core (for DEPENDENT xrefs) +DEP 21473 xrefs, 23897 object_xrefs + added 30853 synonyms +updating (52) GO in core (for DEPENDENT xrefs) +GO 4535 +updating (274) goslim_goa in core (for DEPENDENT xrefs) +DEP 99 xrefs, 96927 object_xrefs +updating (91) IPI in core (for SEQUENCE_MATCH xrefs) +SEQ 35478 +updating (107) MEROPS in core (for DEPENDENT xrefs) +DEP 286 xrefs, 490 object_xrefs +updating (275) miRBase in core (for DIRECT xrefs) +DIRECT 354 +updating (279) miRBase_transcript_name in core (for MISC xrefs) +DIRECT 337 +updating (224) PDB in core (for DEPENDENT xrefs) +DEP 65 xrefs, 82 object_xrefs +updating (225) protein_id in core (for DEPENDENT xrefs) +DEP 35479 xrefs, 45695 object_xrefs +updating (163) RefSeq_mRNA in core (for SEQUENCE_MATCH xrefs) +SEQ 13272 +updating (163) RefSeq_mRNA in core (for INFERRED_PAIR xrefs) +DIRECT 598 +updating (165) RefSeq_mRNA_predicted in core (for SEQUENCE_MATCH xrefs) +SEQ 7546 +updating (165) RefSeq_mRNA_predicted in core (for INFERRED_PAIR xrefs) +DIRECT 1333 +updating (166) RefSeq_ncRNA in core (for SEQUENCE_MATCH xrefs) +SEQ 342 +updating (167) RefSeq_ncRNA_predicted in core (for SEQUENCE_MATCH xrefs) +SEQ 323 +updating (168) RefSeq_peptide in core (for SEQUENCE_MATCH xrefs) +SEQ 13705 +updating (168) RefSeq_peptide in core (for INFERRED_PAIR xrefs) +DIRECT 127 +updating (172) RefSeq_peptide_predicted in core (for SEQUENCE_MATCH xrefs) +SEQ 8283 +updating (172) RefSeq_peptide_predicted in core (for INFERRED_PAIR xrefs) +DIRECT 348 +updating (134) RFAM in core (for DIRECT xrefs) +DIRECT 146 +updating (136) RFAM_transcript_name in core (for MISC xrefs) +DIRECT 3667 +updating (198) UniGene in core (for SEQUENCE_MATCH xrefs) +SEQ 22897 +updating (227) Uniprot/SPTREMBL in core (for SEQUENCE_MATCH xrefs) +SEQ 22028 + added 139 synonyms +updating (228) Uniprot/SPTREMBL in core (for SEQUENCE_MATCH xrefs) +SEQ 28993 + added 98 synonyms +updating (232) Uniprot/SWISSPROT in core (for SEQUENCE_MATCH xrefs) +SEQ 2650 + added 1408 synonyms +updating (238) Uniprot_genename in core (for DEPENDENT xrefs) +DEP 30002 xrefs, 31056 object_xrefs + added 17256 synonyms +updating (246) WikiGene in core (for DEPENDENT xrefs) +DEP 21473 xrefs, 23897 object_xrefs +updating (248) ZFIN_ID in core (for DEPENDENT xrefs) +DEP 3804 xrefs, 8337 object_xrefs + added 4988 synonyms +updating (249) ZFIN_ID in core (for DIRECT xrefs) +DIRECT 16414 + added 25129 synonyms +updating (253) ZFIN_ID_transcript_name in core (for MISC xrefs) +DIRECT 40344 +Setting Transcript and Gene display_xrefs from xref database into core and setting the desc +Using xref_off set of 722445 +24488 gene descriptions added +Only setting those not already set +Presedence for Gene Descriptions + Uniprot/SPTREMBL 1 + RefSeq_dna 3 + RefSeq_peptide 4 + Uniprot/SWISSPROT 5 + IMGT/GENE_DB 6 + ZFIN_ID 7 + miRBase 8 + RFAM 9 +6437 gene descriptions added +xref_mapper.pl FINISHED NORMALLY + +------------------------------------------------------------ +Sender: LSF System <lsfadmin@bc-17-3-12> +Subject: Job 897678: <perl ~/src/ensembl/misc-scripts/xref_mapping/xref_mapper.pl -file xref_input -upload> Done + +Job <perl ~/src/ensembl/misc-scripts/xref_mapping/xref_mapper.pl -file xref_input -upload> was submitted from host <farm2-head3> by user <ianl> in cluster <farm2>. +Job was executed on host(s) <bc-17-3-12>, in queue <normal>, as user <ianl> in cluster <farm2>. +</nfs/users/nfs_i/ianl> was used as the home directory. +</workdir/release_65/zebrafish> was used as the working directory. +Started at Tue Oct 18 13:38:32 2011 +Results reported at Tue Oct 18 14:02:49 2011 + +Your job looked like: + +------------------------------------------------------------ +# LSBATCH: User input +perl ~/src/ensembl/misc-scripts/xref_mapping/xref_mapper.pl -file xref_input -upload +------------------------------------------------------------ + +Successfully completed. + +Resource usage summary: + + CPU time : 127.40 sec. + Max Memory : 40 MB + Max Swap : 71 MB + + Max Processes : 3 + Max Threads : 4 + +The output (if any) is above this job summary. + + + +PS: + +Read file <mapper2.err> for stderr output of this job. + diff --git a/misc-scripts/xref_mapping/docs/parse.out b/misc-scripts/xref_mapping/docs/parse.out new file mode 100644 index 0000000000..255f0e562c --- /dev/null +++ b/misc-scripts/xref_mapping/docs/parse.out @@ -0,0 +1,646 @@ +Options: -user rw -pass password -host ens-research -dbname ianl_human_xref_65 -species human -stats -create -force +host os ens-research +==> Done. +Creating ianl_human_xref_65 from ~/src/ensembl/misc-scripts/xref_mapping/sql/table.sql +Populating metadata in ianl_human_xref_65 from ~/src/ensembl/misc-scripts/xref_mapping/sql/populate_metadata.sql +Species human is valid (name = homo_sapiens, ID = 9606) +----{ CCDS }-------------------------------------------------------------------- +Parsing script:host=>ens-livemirror,dbname=>ccds_human_65,tran_name=>ENST, with CCDSParser +Parsed CCDS identifiers from script:host=>ens-livemirror,dbname=>ccds_human_65,tran_name=>ENST,, added 26451 xrefs and 33689 direct_xrefs + +source xrefs prim dep gdir tdir tdir coord synonyms +CCDS_transcript 0 0 0 0 33689 +CCDS 26451 0 0 0 0 0 0 0 + +----{ EntrezGene }-------------------------------------------------------------- +Connecting to FTP host 'ftp.ncbi.nlm.nih.gov' for file 'EntrezGene/gene_info.gz' +Fetching 'gene_info.gz' (size = 136652895) +Local file is 'EntrezGene/gene_info.gz' +'EntrezGene/gene_info.gz' passed (gzip -t) corruption test. +Checksum for 'EntrezGene/gene_info.gz' does not match, will parse... +Parsing 'EntrezGene/gene_info.gz' with EntrezGeneParser +Reading from 'EntrezGene/gene_info.gz'... +42029 EntrezGene Xrefs added with 74164 synonyms + +source xrefs prim dep gdir tdir tdir coord synonyms +WikiGene 42029 0 0 0 0 0 0 0 +EntrezGene 42029 0 0 0 0 0 0 74164 + +----{ MIM }--------------------------------------------------------------------- +Connecting to FTP host 'grcf.jhmi.edu' for file 'MIM/omim.txt.Z' +Creating directory 'MIM' +Fetching 'omim.txt.Z' (size = 68826997) +Local file is 'MIM/omim.txt.Z' +'MIM/omim.txt.Z' passed (gzip -t) corruption test. +Checksum for 'MIM/omim.txt.Z' does not match, will parse... +Parsing 'MIM/omim.txt.Z' with MIMParser +sources are:- 118, 119, 120 +Reading from 'MIM/omim.txt.Z'... +13876 genemap and 7209 phenotype MIM xrefs added +added 947 synonyms (defined by MOVED TO) + +source xrefs prim dep gdir tdir tdir coord synonyms +MIM_MORBID 7209 0 0 0 0 0 0 471 +MIM_GENE 13876 0 0 0 0 0 0 497 + +----{ RefSeq_dna }-------------------------------------------------------------- +Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'RefSeq_dna/human.rna.fna.gz' +Creating directory 'RefSeq_dna' +Fetching 'human.rna.fna.gz' (size = 37698093) +Local file is 'RefSeq_dna/human.rna.fna.gz' +'RefSeq_dna/human.rna.fna.gz' passed (gzip -t) corruption test. +Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'RefSeq_dna/RefSeq-release*.txt' +Fetching 'RefSeq-release49.txt' (size = 57655) +Local file is 'RefSeq_dna/RefSeq-release49.txt' +Checksum for 'RefSeq_dna/human.rna.fna.gz' does not match, will parse... +Parsing 'RefSeq_dna/human.rna.fna.gz' with RefSeqParser +RefSeq_peptide source ID = 168 +RefSeq_dna source ID = 139 +RefSeq_mRNA source ID = 163 +RefSeq_ncRNA source ID = 166 +RefSeq_peptide_predicted source ID = 172 +RefSeq_dna_predicted source ID = 143 +RefSeq_mRNA_predicted source ID = 165 +RefSeq_ncRNA_predicted source ID = 167 +Reading from 'RefSeq_dna/human.rna.fna.gz'... +Read 42733 xrefs from RefSeq_dna/human.rna.fna.gz +count = 42733 +Uploading xrefs +Reading from 'RefSeq_dna/RefSeq-release49.txt'... +RefSeq release: 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '155' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '168' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '139' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '163' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '166' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '172' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '143' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '165' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '167' + +source xrefs prim dep gdir tdir tdir coord synonyms +RefSeq_mRNA_predicted 1113 1113 0 0 0 0 0 0 +RefSeq_ncRNA_predicted 3742 3742 0 0 0 0 0 0 +RefSeq_ncRNA 5957 5957 0 0 0 0 0 0 +RefSeq_mRNA 31921 31921 0 0 0 0 0 0 + +----{ RefSeq_peptide }---------------------------------------------------------- +Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'RefSeq_peptide/human.protein.gpff.gz' +Creating directory 'RefSeq_peptide' +Fetching 'human.protein.gpff.gz' (size = 87557956) +Local file is 'RefSeq_peptide/human.protein.gpff.gz' +'RefSeq_peptide/human.protein.gpff.gz' passed (gzip -t) corruption test. +Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'RefSeq_peptide/RefSeq-release*.txt' +Fetching 'RefSeq-release49.txt' (size = 57655) +Local file is 'RefSeq_peptide/RefSeq-release49.txt' +Checksum for 'RefSeq_peptide/human.protein.gpff.gz' does not match, will parse... +Parsing 'RefSeq_peptide/human.protein.gpff.gz' with RefSeqGPFFParser +RefSeq_peptide source ID = 168 +RefSeq_dna source ID = 139 +RefSeq_mRNA source ID = 163 +RefSeq_ncRNA source ID = 166 +RefSeq_peptide_predicted source ID = 172 +RefSeq_dna_predicted source ID = 143 +RefSeq_mRNA_predicted source ID = 165 +RefSeq_ncRNA_predicted source ID = 167 +Reading from 'RefSeq_peptide/human.protein.gpff.gz'... +Read 33047 xrefs from RefSeq_peptide/human.protein.gpff.gz +count = 33047 +Uploading xrefs +Reading from 'RefSeq_peptide/RefSeq-release49.txt'... +RefSeq release: 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '182' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '168' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '163' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '166' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '165' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '167' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '139' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '172' +Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '143' + +source xrefs prim dep gdir tdir tdir coord synonyms +WikiGene 18 0 33047 0 0 0 0 0 +RefSeq_peptide_predicted 1113 1113 0 0 0 0 0 0 +RefSeq_peptide 31934 31934 0 0 0 0 0 0 +EntrezGene 18 0 33047 0 0 0 0 0 + +----{ Uniprot/SPTREMBL }-------------------------------------------------------- +Connecting to FTP host 'ftp.ebi.ac.uk' for file 'UniprotSPTREMBL/uniprot_trembl.dat.gz' +Creating directory 'UniprotSPTREMBL' +Fetching 'uniprot_trembl.dat.gz' (size = 6928570594) +Local file is 'UniprotSPTREMBL/uniprot_trembl.dat.gz' +'UniprotSPTREMBL/uniprot_trembl.dat.gz' passed (gzip -t) corruption test. +Connecting to FTP host 'ftp.ebi.ac.uk' for file 'UniprotSPTREMBL/reldate.txt' +Fetching 'reldate.txt' (size = 151) +Local file is 'UniprotSPTREMBL/reldate.txt' +Checksum for 'UniprotSPTREMBL/uniprot_trembl.dat.gz' does not match, will parse... +Parsing 'UniprotSPTREMBL/uniprot_trembl.dat.gz' with UniProtAltParser +SwissProt source id for UniprotSPTREMBL/uniprot_trembl.dat.gz: 232 +SpTREMBL source id for UniprotSPTREMBL/uniprot_trembl.dat.gz: 227 +SpTREMBL protein_evidence > 3 source id for UniprotSPTREMBL/uniprot_trembl.dat.gz: 228 +Predicted SwissProt source id for UniprotSPTREMBL/uniprot_trembl.dat.gz: 233 +Predicted SpTREMBL source id for UniprotSPTREMBL/uniprot_trembl.dat.gz: 229 +Predicted EMBL source id for UniprotSPTREMBL/uniprot_trembl.dat.gz: 237 +Predicted protein_id source id for UniprotSPTREMBL/uniprot_trembl.dat.gz: 226 +Reading from 'UniprotSPTREMBL/uniprot_trembl.dat.gz'... +Read 0 SwissProt xrefs, 58146 SPTrEMBL xrefs with protein evidence codes 1-3, and 36008 SPTrEMBL xrefs with protein evidence codes > 3 from UniprotSPTREMBL/uniprot_trembl.dat.gz +Added the following dependent xrefs:- +MEROPS 226 +PDB 154 +EMBL 488892 +Uniprot_genename 72109 +count = 94154 +Uploading xrefs +Reading from 'UniprotSPTREMBL/reldate.txt'... +Swiss-Prot release is 'UniProtKB/Swiss-Prot Release 2011_09 of 21-Sep-2011' +SpTrEMBL release is 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011' +Setting release to 'UniProtKB/Swiss-Prot Release 2011_09 of 21-Sep-2011' for source ID '232' +Setting release to 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011' for source ID '227' +Setting release to 'UniProtKB/Swiss-Prot Release 2011_09 of 21-Sep-2011' for source ID '233' +Setting release to 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011' for source ID '229' +Setting release to 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011' for source ID '228' + +source xrefs prim dep gdir tdir tdir coord synonyms +MEROPS 81 0 226 0 0 0 0 0 +Uniprot/SPTREMBL 94154 94154 0 0 0 0 0 965 +protein_id 207835 0 207835 0 0 0 0 0 +PDB 138 0 154 0 0 0 0 0 +EMBL 99122 0 275393 0 0 0 0 0 +Uniprot_genename 18031 0 72109 0 0 0 0 1751 + +----{ Uniprot/SWISSPROT }------------------------------------------------------- +Connecting to FTP host 'ftp.ebi.ac.uk' for file 'UniprotSWISSPROT/uniprot_sprot.dat.gz' +Creating directory 'UniprotSWISSPROT' +Fetching 'uniprot_sprot.dat.gz' (size = 446105793) +Local file is 'UniprotSWISSPROT/uniprot_sprot.dat.gz' +'UniprotSWISSPROT/uniprot_sprot.dat.gz' passed (gzip -t) corruption test. +Connecting to FTP host 'ftp.ebi.ac.uk' for file 'UniprotSWISSPROT/reldate.txt' +Fetching 'reldate.txt' (size = 151) +Local file is 'UniprotSWISSPROT/reldate.txt' +Checksum for 'UniprotSWISSPROT/uniprot_sprot.dat.gz' does not match, will parse... +Parsing 'UniprotSWISSPROT/uniprot_sprot.dat.gz' with UniProtAltParser +SwissProt source id for UniprotSWISSPROT/uniprot_sprot.dat.gz: 232 +SpTREMBL source id for UniprotSWISSPROT/uniprot_sprot.dat.gz: 227 +SpTREMBL protein_evidence > 3 source id for UniprotSWISSPROT/uniprot_sprot.dat.gz: 228 +Predicted SwissProt source id for UniprotSWISSPROT/uniprot_sprot.dat.gz: 233 +Predicted SpTREMBL source id for UniprotSWISSPROT/uniprot_sprot.dat.gz: 229 +Predicted EMBL source id for UniprotSWISSPROT/uniprot_sprot.dat.gz: 237 +Predicted protein_id source id for UniprotSWISSPROT/uniprot_sprot.dat.gz: 226 +Reading from 'UniprotSWISSPROT/uniprot_sprot.dat.gz'... +Read 20248 SwissProt xrefs, 0 SPTrEMBL xrefs with protein evidence codes 1-3, and 0 SPTrEMBL xrefs with protein evidence codes > 3 from UniprotSWISSPROT/uniprot_sprot.dat.gz +Added the following dependent xrefs:- +MEROPS 823 +MIM_MORBID 3782 +MIM_GENE 13155 +HPA 14688 +PDB 21041 +EMBL 286861 +Uniprot_genename 19696 +count = 20248 +Uploading xrefs +Reading from 'UniprotSWISSPROT/reldate.txt'... +Swiss-Prot release is 'UniProtKB/Swiss-Prot Release 2011_09 of 21-Sep-2011' +SpTrEMBL release is 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011' +Setting release to 'UniProtKB/Swiss-Prot Release 2011_09 of 21-Sep-2011' for source ID '232' +Setting release to 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011' for source ID '227' +Setting release to 'UniProtKB/Swiss-Prot Release 2011_09 of 21-Sep-2011' for source ID '233' +Setting release to 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011' for source ID '229' +Setting release to 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011' for source ID '228' + +source xrefs prim dep gdir tdir tdir coord synonyms +MIM_MORBID 4 0 3782 0 0 0 0 0 +Uniprot/SWISSPROT 20248 20248 0 0 0 0 0 59372 +protein_id 125858 0 125931 0 0 0 0 0 +HPA 14438 0 14688 0 0 0 0 0 +EMBL 111268 0 140519 0 0 0 0 0 +Uniprot_genename 6435 0 19696 0 0 0 0 18979 +MEROPS 758 0 823 0 0 0 0 0 +MIM_GENE 1 0 13155 0 0 0 0 0 +PDB 18191 0 21041 0 0 0 0 0 + +----{ Uniprot/SWISSPROT }------------------------------------------------------- +Connecting to FTP host 'ftp.ebi.ac.uk' for file 'UniprotSWISSPROT/ens-sp.map' +Fetching 'ens-sp.map' (size = 2157658) +Local file is 'UniprotSWISSPROT/ens-sp.map' +Checksum for 'UniprotSWISSPROT/ens-sp.map' does not match, will parse... +Parsing 'UniprotSWISSPROT/ens-sp.map' with UniProtDirectParser +Reading from 'UniprotSWISSPROT/ens-sp.map'... +Source_id = 232 +19243 entrys added +0 not found + +source xrefs prim dep gdir tdir tdir coord synonyms +MIM_MORBID 0 0 5748 +protein_id 0 0 204529 +HPA 0 0 21717 +Uniprot/SWISSPROT 19243 0 0 0 0 0 0 58674 +EMBL 0 0 219245 +Uniprot_genename 0 0 27717 +MEROPS 0 0 1148 +MIM_GENE 0 0 19913 +Uniprot/SWISSPROT_translation 0 0 0 0 0 27953 +PDB 0 0 34851 + +----{ RefSeq_dna }-------------------------------------------------------------- +Parsing script:host=>ens-livemirror,dbname=>ccds_human_65, with RefSeq_CCDSParser +RefSeq_mRNA source ID = 162 +RefSeq_mRNA_predicted source ID = 164 +Parsed 29980 RefSeq_dna identifiers from script:host=>ens-livemirror,dbname=>ccds_human_65,, added 29536 xrefs and 43349 direct_xrefs from 29980 lines. + +source xrefs prim dep gdir tdir tdir coord synonyms +RefSeq_mRNA_predicted 2 0 0 0 0 0 0 0 +RefSeq_mRNA_predicted_transcript 0 0 0 0 2 +RefSeq_mRNA_transcript 0 0 0 0 43347 +RefSeq_mRNA 29534 0 0 0 0 0 0 0 + +----{ HGNC }-------------------------------------------------------------------- +Creating directory 'HGNC' +Connecting to HTTP host 'www.genenames.org' +Fetching '/cgi-bin/hgnc_downloads.cgi' +Local file is 'HGNC/hgnc_downloads.cgi' +Checksum for 'HGNC/hgnc_downloads.cgi' does not match, will parse... +Parsing 'HGNC/hgnc_downloads.cgi' with HGNCParser +Reading from 'HGNC/hgnc_downloads.cgi'... +Loaded a total of :- entrezgene_manual 18431 + ensembl_manual 21589 + entrezgene_mapped 18885 + refseq_manual 30665 + refseq_mapped 38787 + Locus Specific Databases 176 + swissprot_manual 38633 +6042 xrefs could not be associated via RefSeq, EntrezGene or ensembl + +source xrefs prim dep gdir tdir tdir coord synonyms +LRG_HGNC_notransfer 176 0 0 0 0 0 0 522 +LRG_HGNC_notransfer_gene 0 0 0 176 +HGNC_gene 0 0 0 21589 +HGNC 122651 0 145401 0 0 0 0 228088 + +----{ HGNC }-------------------------------------------------------------------- +Parsing script:host=>ens-staging1,source=>HGNC, with VegaOfficialNameParser +We have 221829/524288 vega to external source entries + We have 76942/131072 vega to external source entries +Parsed 115264 HGNC identifiers from script:host=>ens-staging1,source=>HGNC,, added 18597 xrefs and 115264 direct_xrefs +3 ignoreed due to numbers no identifiers being no longer valid :- 25711 (HGNC:25711 ) 18577 (HGNC:18577 ) 18577 (HGNC:18577 ) + +source xrefs prim dep gdir tdir tdir coord synonyms +HGNC 18597 0 0 0 0 0 0 253277 +HGNC_transcript 0 0 0 0 115264 + +----{ HGNC }-------------------------------------------------------------------- +Parsing script:wget=>http://www.genenames.org/cgi-bin/hgnc_downloads.cgi?title=HGNC+output+data&hgnc_dbtag=on&col=gd_hgnc_id&col=gd_status&col=gd_ccds_ids&status=Approved&status_opt=2&level=pri&=on&where=&order_by=gd_app_sym_sort&limit=&format=text&submit=submit&.cgifields=&.cgifields=level&.cgifields=chr&.cgifields=status&.cgifields=hgnc_dbtag,host=>ens-livemirror,dbname=>ccds_human_65, with HGNC_CCDSParser +75 missed as no hgnc for the ccds. Added 26376 HGNC xrefs via CCDS and 52752 direct xrefs + +source xrefs prim dep gdir tdir tdir coord synonyms +HGNC 18394 0 0 0 0 0 0 56859 +HGNC_transcript 0 0 0 0 52752 + +----{ IPI }--------------------------------------------------------------------- +Connecting to FTP host 'ftp.ebi.ac.uk' for file 'IPI/ipi.HUMAN.fasta.gz' +Creating directory 'IPI' +Fetching 'ipi.HUMAN.fasta.gz' (size = 25558620) +Local file is 'IPI/ipi.HUMAN.fasta.gz' +'IPI/ipi.HUMAN.fasta.gz' passed (gzip -t) corruption test. +Checksum for 'IPI/ipi.HUMAN.fasta.gz' does not match, will parse... +Parsing 'IPI/ipi.HUMAN.fasta.gz' with IPIParser +Reading from 'IPI/ipi.HUMAN.fasta.gz'... +count = 91464 +Uploading xrefs +91464 IPI xrefs succesfully parsed + +source xrefs prim dep gdir tdir tdir coord synonyms +IPI 91464 91464 0 0 0 0 0 0 + +----{ UniGene }----------------------------------------------------------------- +Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'UniGene/Hs.seq.uniq.gz' +Creating directory 'UniGene' +Fetching 'Hs.seq.uniq.gz' (size = 55756319) +Local file is 'UniGene/Hs.seq.uniq.gz' +'UniGene/Hs.seq.uniq.gz' passed (gzip -t) corruption test. +Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'UniGene/Hs.data.gz' +Fetching 'Hs.data.gz' (size = 167673440) +Local file is 'UniGene/Hs.data.gz' +'UniGene/Hs.data.gz' passed (gzip -t) corruption test. +Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'UniGene/*.LOG' +Fetching '2003.LOG' (size = 38108) +Local file is 'UniGene/2003.LOG' +Fetching '2004.LOG' (size = 37143) +Local file is 'UniGene/2004.LOG' +Fetching '2005.LOG' (size = 26650) +Local file is 'UniGene/2005.LOG' +Fetching '2006.LOG' (size = 32236) +Local file is 'UniGene/2006.LOG' +Fetching '2007.LOG' (size = 48006) +Local file is 'UniGene/2007.LOG' +Fetching '2008.LOG' (size = 47076) +Local file is 'UniGene/2008.LOG' +Fetching '2009.LOG' (size = 12247) +Local file is 'UniGene/2009.LOG' +Fetching '2010.LOG' (size = 27499) +Local file is 'UniGene/2010.LOG' +Fetching '2011.LOG' (size = 12486) +Local file is 'UniGene/2011.LOG' +Checksum for 'UniGene/Hs.seq.uniq.gz' does not match, will parse... +Checksum for 'UniGene/Hs.data.gz' does not match, will parse... +Parsing 'UniGene/Hs.seq.uniq.gz', 'UniGene/Hs.data.gz' with UniGeneParser +UniGene source ID = 198. +Reading from 'UniGene/Hs.data.gz'... +Reading from 'UniGene/Hs.seq.uniq.gz'... +count = 122727 +Uploading xrefs +Reading from 'UniGene/2011.LOG'... +UniGene release: '31 May 2011, UniGene Build #230 Homo sapiens' +Setting release to '31 May 2011, UniGene Build #230 Homo sapiens' for source ID '198' + +source xrefs prim dep gdir tdir tdir coord synonyms +UniGene 122727 122727 0 0 0 0 0 0 + +----{ HGNC_curated_transcript_notransfer }-------------------------------------- +Parsing script: with curated_transcriptParser +source id is 77, curated_source_id is 77 +We have 221829/524288 ott to enst entries + 148114 direct xrefs succesfully parsed + +source xrefs prim dep gdir tdir tdir coord synonyms +HGNC_curated_transcript_notransfer_transcript 0 0 0 0 118840 +HGNC_curated_transcript_notransfer 112068 0 0 0 0 0 0 0 +Clone_based_vega_transcript_transcript 0 0 0 0 29274 +Clone_based_vega_transcript 29036 0 0 0 0 0 0 0 + +----{ DBASS3 }------------------------------------------------------------------ +Creating directory 'DBASS3' +Connecting to HTTP host 'www.som.soton.ac.uk' +Fetching '/research/geneticsdiv/dbass3/download.asp' +Local file is 'DBASS3/download.asp' +Checksum for 'DBASS3/download.asp' does not match, will parse... +Parsing 'DBASS3/download.asp' with DBASSParser +Reading from 'DBASS3/download.asp'... +160 direct xrefs succesfully parsed + +source xrefs prim dep gdir tdir tdir coord synonyms +DBASS3 160 0 0 0 0 0 0 5 +DBASS3_gene 0 0 0 160 + +----{ DBASS5 }------------------------------------------------------------------ +Creating directory 'DBASS5' +Connecting to HTTP host 'www.som.soton.ac.uk' +Fetching '/research/geneticsdiv/dbass5/download.aspx' +Local file is 'DBASS5/download.aspx' +Checksum for 'DBASS5/download.aspx' does not match, will parse... +Parsing 'DBASS5/download.aspx' with DBASSParser +Reading from 'DBASS5/download.aspx'... +246 direct xrefs succesfully parsed + +source xrefs prim dep gdir tdir tdir coord synonyms +DBASS5_gene 0 0 0 246 +DBASS5 246 0 0 0 0 0 0 8 + +----{ HPA }--------------------------------------------------------------------- +Creating directory 'HPA' +Connecting to HTTP host 'www.proteinatlas.org' +Fetching '/download/xref.php' +Local file is 'HPA/xref.php' +Checksum for 'HPA/xref.php' does not match, will parse... +Parsing 'HPA/xref.php' with HPAParser +Reading from 'HPA/xref.php'... +50502 direct xrefs succesfully parsed + +source xrefs prim dep gdir tdir tdir coord synonyms +HPA 14506 0 0 0 0 0 0 0 +HPA_translation 0 0 0 0 0 50502 + +----{ MIM2GENE }---------------------------------------------------------------- +Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'MIM2GENE/mim2gene' +Creating directory 'MIM2GENE' +Fetching 'mim2gene' (size = 365646) +Local file is 'MIM2GENE/mim2gene' +Checksum for 'MIM2GENE/mim2gene' does not match, will parse... +Parsing 'MIM2GENE/mim2gene' with Mim2GeneParser +Reading from 'MIM2GENE/mim2gene'... +0 EntrezGene entries could not be found. +4 Omim entries could not be found. +1378 had different types out of 19461 Entries. + +source xrefs prim dep gdir tdir tdir coord synonyms +MIM_MORBID 0 0 5725 +MIM_GENE 0 0 13741 + +----{ Interpro }---------------------------------------------------------------- +Connecting to FTP host 'ftp.ebi.ac.uk' for file 'Interpro/interpro.xml.gz' +Creating directory 'Interpro' +Fetching 'interpro.xml.gz' (size = 16495205) +Local file is 'Interpro/interpro.xml.gz' +'Interpro/interpro.xml.gz' passed (gzip -t) corruption test. +Connecting to FTP host 'ftp.ebi.ac.uk' for file 'Interpro/release_notes.txt' +Fetching 'release_notes.txt' (size = 3575) +Local file is 'Interpro/release_notes.txt' +Checksum for 'Interpro/interpro.xml.gz' does not match, will parse... +Parsing 'Interpro/interpro.xml.gz' with InterproParser +Reading from 'Interpro/interpro.xml.gz'... + 2007 PRINTS loaded. + 12015 PFAM loaded. + 22245 INTERPRO loaded. + 2926 PIRSF loaded. + 1292 PROSITE loaded. + 4003 TIGRFAMs loaded. + 882 SMART loaded. + 1203 SSF loaded. + 897 PROFILE loaded. +Reading from 'Interpro/release_notes.txt'... +Interpro release is 'Release 34.0, 23 September 2011' +Setting release to 'Release 34.0, 23 September 2011' for source ID '100' + +source xrefs prim dep gdir tdir tdir coord synonyms +Interpro 22245 0 0 0 0 0 0 0 + +----{ UCSC_hg19 }--------------------------------------------------------------- +Connecting to FTP host 'hgdownload.cse.ucsc.edu' for file 'UCSC_hg19/knownGene.txt.gz' +Creating directory 'UCSC_hg19' +Fetching 'knownGene.txt.gz' (size = 4108721) +Local file is 'UCSC_hg19/knownGene.txt.gz' +'UCSC_hg19/knownGene.txt.gz' passed (gzip -t) corruption test. +Connecting to FTP host 'hgdownload.cse.ucsc.edu' for file 'UCSC_hg19/README.txt' +Fetching 'README.txt' (size = 2624) +Local file is 'UCSC_hg19/README.txt' +Checksum for 'UCSC_hg19/knownGene.txt.gz' does not match, will parse... +Parsing 'UCSC_hg19/knownGene.txt.gz' with UCSCParser +Reading from 'UCSC_hg19/knownGene.txt.gz'... + +source xrefs prim dep gdir tdir tdir coord synonyms +UCSC 0 0 0 0 0 0 77614 + +----{ ncRNA }------------------------------------------------------------------- +Parsing script:host=>genebuild7,port=>3306,dbname=>sw4_ncRNA_Xrefs, with ncRNA_DBParser +Added 938 Xrefs for ncRNAs + +source xrefs prim dep gdir tdir tdir coord synonyms +RFAM 249 0 0 0 0 0 0 0 +miRBase_transcript 0 0 0 0 715 +RFAM_transcript 0 0 0 0 5199 +miRBase 689 0 0 0 0 0 0 0 + +----{ GO }---------------------------------------------------------------------- +Connecting to FTP host 'ftp.ebi.ac.uk' for file 'GO/gene_association.goa_human.gz' +Creating directory 'GO' +Fetching 'gene_association.goa_human.gz' (size = 13897660) +Local file is 'GO/gene_association.goa_human.gz' +'GO/gene_association.goa_human.gz' passed (gzip -t) corruption test. +Connecting to HTTP host 'archive.geneontology.org' +Fetching '/latest-termdb/go_daily-termdb.obo-xml.gz' +Local file is 'GO/go_daily-termdb.obo-xml.gz' +Connecting to HTTP host 'www.ebi.ac.uk' +Fetching '/GOA/HUMAN_release.html' +Local file is 'GO/HUMAN_release.html' +Checksum for 'GO/gene_association.goa_human.gz' does not match, will parse... +Checksum for 'GO/go_daily-termdb.obo-xml.gz' does not match, will parse... +Parsing 'GO/gene_association.goa_human.gz', 'GO/go_daily-termdb.obo-xml.gz' with GOParser +Reading from 'GO/go_daily-termdb.obo-xml.gz'... +description file for GO +Reading from 'GO/gene_association.goa_human.gz'... +processing for taxon: 9606 + 567692 GO dependent xrefs added 0 refseq not found and 0 Swissprot not found +Reading from 'GO/HUMAN_release.html'... +GO release: 'GOA Human (version 102), released on 20 September, 2011 and assembled using the publicly released data available in the source databases on 17 September, 2011. ' +Setting release to 'GOA Human (version 102), released on 20 September, 2011 and assembled using the publicly released data available in the source databases on 17 September, 2011. ' for source ID '52' + +source xrefs prim dep gdir tdir tdir coord synonyms +GO 11987 0 479945 0 0 0 0 0 + +----{ GO }---------------------------------------------------------------------- +Connecting to HTTP host 'www.geneontology.org' +Fetching '/external2go/interpro2go' +Local file is 'GO/interpro2go' +Checksum for 'GO/interpro2go' does not match, will parse... +Parsing 'GO/interpro2go' with InterproGoParser +Reading from 'GO/interpro2go'... +Parsed identifiers from GO/interpro2go + added 20056 GO xrefs dependent on InterPro + skipped 5119 GO terms due to missing InterPros + +source xrefs prim dep gdir tdir tdir coord synonyms +GO 932 0 20056 0 0 0 0 0 + +----{ goslim_goa }-------------------------------------------------------------- +Parsing script:host=>ens-staging1,dbname=>ensembl_ontology_65, with GOSlimParser +Parsed GOSlim Generic identifiers from script:host=>ens-staging1,dbname=>ensembl_ontology_65,, added 15130 dependent_xrefs + +source xrefs prim dep gdir tdir tdir coord synonyms +goslim_goa 104 0 15130 0 0 0 0 0 + + +================================================================================ +Summary of status +================================================================================ + CCDS CCDSParser OKAY + DBASS3 DBASSParser OKAY + DBASS5 DBASSParser OKAY + EntrezGene EntrezGeneParser OKAY + GO GOParser OKAY + GO InterproGoParser OKAY + HGNC VegaOfficialNameParser OKAY + HGNC HGNC_CCDSParser OKAY + HGNC HGNCParser OKAY +HGNC_curated_transcript_notransfer curated_transcriptParser OKAY + HPA HPAParser OKAY + IPI IPIParser OKAY + Interpro InterproParser OKAY + MIM MIMParser OKAY + MIM2GENE Mim2GeneParser OKAY + RefSeq_dna RefSeqParser OKAY + RefSeq_dna RefSeq_CCDSParser OKAY + RefSeq_peptide RefSeqGPFFParser OKAY + UCSC_hg19 UCSCParser OKAY + UniGene UniGeneParser OKAY + Uniprot/SPTREMBL UniProtAltParser OKAY + Uniprot/SWISSPROT UniProtAltParser OKAY + Uniprot/SWISSPROT UniProtDirectParser OKAY + goslim_goa GOSlimParser OKAY + ncRNA ncRNA_DBParser OKAY + +source xrefs prim dep gdir tdir tdir coord synonyms +CCDS_transcript 0 0 0 0 33689 +HGNC_curated_transcript_notransfer_transcript 0 0 0 0 118840 +RefSeq_mRNA_predicted_transcript 0 0 0 0 2 +Interpro 22245 0 0 0 0 0 0 0 +HPA 28944 0 36405 0 0 0 0 0 +RFAM_transcript 0 0 0 0 5199 +LRG_HGNC_notransfer_gene 0 0 0 176 +IPI 91464 91464 0 0 0 0 0 0 +HGNC_gene 0 0 0 21589 +MIM_GENE 13877 0 46809 0 0 0 0 497 +DBASS3 160 0 0 0 0 0 0 5 +HGNC_transcript 0 0 0 0 168016 +RefSeq_peptide 31934 31934 0 0 0 0 0 0 +EntrezGene 42047 0 33047 0 0 0 0 74164 +Uniprot/SPTREMBL 94154 94154 0 0 0 0 0 965 +protein_id 333693 0 538295 0 0 0 0 0 +DBASS5 246 0 0 0 0 0 0 8 +EMBL 210390 0 635157 0 0 0 0 0 +Uniprot_genename 24466 0 119522 0 0 0 0 20730 +MEROPS 839 0 2197 0 0 0 0 0 +RefSeq_peptide_predicted 1113 1113 0 0 0 0 0 0 +Clone_based_vega_transcript 29036 0 0 0 0 0 0 0 +GO 12919 0 500001 0 0 0 0 0 +RefSeq_mRNA_transcript 0 0 0 0 43347 +RefSeq_mRNA 61455 31921 0 0 0 0 0 0 +HGNC 159642 0 145401 0 0 0 0 538224 +RFAM 249 0 0 0 0 0 0 0 +DBASS5_gene 0 0 0 246 +RefSeq_ncRNA 5957 5957 0 0 0 0 0 0 +HGNC_curated_transcript_notransfer 112068 0 0 0 0 0 0 0 +HPA_translation 0 0 0 0 0 50502 +CCDS 26451 0 0 0 0 0 0 0 +RefSeq_ncRNA_predicted 3742 3742 0 0 0 0 0 0 +miRBase 689 0 0 0 0 0 0 0 +DBASS3_gene 0 0 0 160 +RefSeq_mRNA_predicted 1115 1113 0 0 0 0 0 0 +miRBase_transcript 0 0 0 0 715 +LRG_HGNC_notransfer 176 0 0 0 0 0 0 522 +MIM_MORBID 7213 0 15255 0 0 0 0 471 +Uniprot/SWISSPROT 39491 20248 0 0 0 0 0 118046 +UCSC 0 0 0 0 0 0 77614 +WikiGene 42047 0 33047 0 0 0 0 0 +Clone_based_vega_transcript_transcript 0 0 0 0 29274 +goslim_goa 104 0 15130 0 0 0 0 0 +Uniprot/SWISSPROT_translation 0 0 0 0 0 27953 +PDB 18329 0 56046 0 0 0 0 0 +UniGene 122727 122727 0 0 0 0 0 0 + + +------------------------------------------------------------ +Sender: LSF System <lsfadmin@bc-17-3-13> +Subject: Job 546494: <perl ~/src/ensembl/misc-scripts/xref_mapping/xref_parser.pl -user rw -pass passwrod -host ens-research -dbname ianl_human_xref_65 -species human -stats -create -force> Done + +Job <perl ~/src/ensembl/misc-scripts/xref_mapping/xref_parser.pl -user rw -pass password -host ens-research -dbname ianl_human_xref_65 -species human -stats -create -force> was submitted from host <farm2-head3> by user <ianl> in cluster <farm2>. +Job was executed on host(s) <bc-17-3-13>, in queue <normal>, as user <ianl> in cluster <farm2>. +</lustre/scratch103/ensembl/ianl/release_65/human> was used as the working directory. +Started at Wed Oct 12 15:24:39 2011 +Results reported at Wed Oct 12 17:20:22 2011 + +Your job looked like: + +------------------------------------------------------------ +# LSBATCH: User input +perl ~/src/ensembl-live/ensembl/misc-scripts/xref_mapping/xref_parser.pl -user rw -pass password -host ens-research -dbname ianl_human_xref_65 -species human -stats -create -force +------------------------------------------------------------ + +Successfully completed. + +Resource usage summary: + + CPU time : 1775.25 sec. + Max Memory : 431 MB + Max Swap : 477 MB + + Max Processes : 5 + Max Threads : 6 + +The output (if any) is above this job summary. + + + +PS: + +Read file <parse.err> for stderr output of this job. + -- GitLab