From ac2ee6552ba6743af14cad358330802db8a913c2 Mon Sep 17 00:00:00 2001
From: Ian Longden <ianl@sanger.ac.uk>
Date: Tue, 25 Oct 2011 13:02:39 +0000
Subject: [PATCH] example files from the xref pipeline

---
 misc-scripts/xref_mapping/docs/mapper1.out | 225 +++++++
 misc-scripts/xref_mapping/docs/mapper2.out | 165 ++++++
 misc-scripts/xref_mapping/docs/parse.out   | 646 +++++++++++++++++++++
 3 files changed, 1036 insertions(+)
 create mode 100644 misc-scripts/xref_mapping/docs/mapper1.out
 create mode 100644 misc-scripts/xref_mapping/docs/mapper2.out
 create mode 100644 misc-scripts/xref_mapping/docs/parse.out

diff --git a/misc-scripts/xref_mapping/docs/mapper1.out b/misc-scripts/xref_mapping/docs/mapper1.out
new file mode 100644
index 0000000000..e6e556173e
--- /dev/null
+++ b/misc-scripts/xref_mapping/docs/mapper1.out
@@ -0,0 +1,225 @@
+Options: -file xref_input
+running in verbose mode
+current status is parsing_finished
+No alt_alleles found for this species.
+
+Dumping xref & Ensembl sequences
+Dumping Xref fasta files
+Dumping Ensembl Fasta files
+53067 Transcripts dumped 41693 Transaltions dumped
+Deleting out, err and map files from output dir: /workdir/release_65/zebrafish/ensembl
+Deleting txt and sql files from output dir: /workdir/release_65/zebrafish/ensembl
+LSF job ID for main mapping job: 887287, name ExonerateGappedBest1_1318933449 with 481 arrays elements)
+LSF job ID for main mapping job: 887288, name ExonerateGappedBest1_1318933451 with 253 arrays elements)
+LSF job ID for Depend job: 887289 (job array with 1 job)
+already processed = 0, processed = 734, errors = 0, empty = 0
+Could not find stable id ENSDART00000126968 in table to get the internal id hence ignoring!!! (for RFAM)
+Could not find stable id ENSDART00000121043 in table to get the internal id hence ignoring!!! (for RFAM)
+The foillowing will be processed as priority xrefs
+	Uniprot/SPTREMBL
+	ZFIN_ID
+Process Pairs
+Starting at object_xref of 837705
+	NEW	2733
+2733 new relationships added
+Writing InterPro
+
+246386 already existed
+
+  Wrote 0 interpro table entries
+    including 51399 object xrefs, 
+    and 51399 go xrefs
+ZFIN_ID is associated with both Transcript and Translation object types
+Therefore moving all associations from Translation to Transcript
+DBASS3 moved to Gene level.
+DBASS3 moved to Gene level.
+DBASS5 moved to Gene level.
+DBASS5 moved to Gene level.
+EntrezGene moved to Gene level.
+EntrezGene moved to Gene level.
+miRBase moved to Gene level.
+miRBase moved to Gene level.
+RFAM moved to Gene level.
+RFAM moved to Gene level.
+TRNASCAN_SE moved to Gene level.
+TRNASCAN_SE moved to Gene level.
+RNAMMER moved to Gene level.
+RNAMMER moved to Gene level.
+UniGene moved to Gene level.
+UniGene moved to Gene level.
+Uniprot_genename moved to Gene level.
+Uniprot_genename moved to Gene level.
+WikiGene moved to Gene level.
+WikiGene moved to Gene level.
+MIM_GENE moved to Gene level.
+MIM_GENE moved to Gene level.
+MIM_MORBID moved to Gene level.
+MIM_MORBID moved to Gene level.
+HGNC moved to Gene level.
+HGNC moved to Gene level.
+MOVE SQL
+UPDATE IGNORE object_xref ox, xref x, source s 
+  SET ox.ensembl_id = ? 
+    WHERE x.source_id = s.source_id AND 
+          ox.xref_id = x.xref_id AND
+          ox.ensembl_id = ? AND
+          ox.ensembl_object_type = 'Gene' AND
+          ox.ox_status = 'DUMP_OUT' AND 
+          s.name in (
+'DBASS3', 'DBASS5', 'EntrezGene', 'miRBase', 'RFAM', 'TRNASCAN_SE', 'RNAMMER', 'UniGene', 'Uniprot_genename', 'WikiGene', 'MIM_GENE', 'MIM_MORBID', 'HGNC')
+Number of rows:- moved = 0, identitys deleted = 0, object_xrefs deleted = 0
+Added 0 new mapping but ignored 0
+ZFIN_ID moved to Gene level.
+ZFIN_ID moved to Gene level.
+MAX xref_id = 620426 MAX object_xref_id = 985210, max_object_xref from identity_xref = 985210
+LIST to delete 23, 21, 135, 278, 22, 136, 279, 253
+_ins_xref sql is:-
+insert into xref (xref_id, source_id, accession, label, version, species_id, info_type, info_text, description) values (?, ?, ?, ?,  0, 7955, 'MISC', ?, ? )
+For gene ENSDARG00000001014 we have mutiple ZFIN_ID's
+	Keeping the best one si:ch211-150d5.2
+	removing myh9b from gene
+For gene ENSDARG00000001470 we have mutiple ZFIN_ID's
+	Keeping the best one si:ch211-287j19.6
+	removing zgc:162351 from gene
+For gene ENSDARG00000001559 we have mutiple ZFIN_ID's
+	Keeping the best one si:ch211-46o5.1
+	removing csmd2 from gene
+For gene ENSDARG00000001733 we have mutiple ZFIN_ID's
+	Keeping the best one si:ch211-198b21.4
+	removing gulp1 from gene
+For gene ENSDARG00000001832 we have mutiple ZFIN_ID's
+	Keeping the best one si:ch1073-403i13.1
+	removing zgc:113912 from gene
+	removing zgc:103599 from gene
+For gene ENSDARG00000001879 we have mutiple ZFIN_ID's
+	Keeping the best one si:ch211-169k21.2
+	removing im:7156396 from gene
+For gene ENSDARG00000001889 we have mutiple ZFIN_ID's
+	Keeping the best one tuba1l2
+	removing zgc:123298 from gene
+For gene ENSDARG00000001890 we have mutiple ZFIN_ID's
+	Keeping the best one si:dkey-239i15.3
+	removing stt3b from gene
+For gene ENSDARG00000002084 we have mutiple ZFIN_ID's
+	Keeping the best one lamb2
+	removing hm:zehs0001 from gene
+Multiple best ZFIN_ID's using vega to find the most common for ENSDARG00000002670
+	zgc:113944  (chosen as first)
+	tbpl2  (left as ZFIN_ID reference but not gene symbol)
+For gene ENSDARG00000002937 we have mutiple ZFIN_ID's
+	Keeping the best one meis4.1a
+	removing meis4.1b from gene
+For gene ENSDARG00000003635 we have mutiple ZFIN_ID's
+	Keeping the best one mogat3b
+	removing atp6v1e1a from gene
+Multiple best ZFIN_ID's using vega to find the most common for ENSDARG00000087402
+	tpm1  (chosen as first)
+	zgc:171719  (left as ZFIN_ID reference but not gene symbol)
+Multiple best ZFIN_ID's using vega to find the most common for ENSDARG00000087472
+For gene ENSDARG00000087472 we have mutiple ZFIN_ID's
+	removing zgc:154164 from gene
+	removing zgc:163040 from gene
+	removing hist1h4l from gene
+	Keeping the best one wu:fe37d09
+	Keeping the best one wu:fe38f03
+	Keeping the best one zgc:165555
+	wu:fe37d09  (chosen as first)
+	zgc:165555  (left as ZFIN_ID reference but not gene symbol)
+	wu:fe38f03  (left as ZFIN_ID reference but not gene symbol)
+Multiple best ZFIN_ID's using vega to find the most common for ENSDARG00000087543
+For gene ENSDARG00000087543 we have mutiple ZFIN_ID's
+	removing zgc:154164 from gene
+	removing zgc:163040 from gene
+	removing hist1h4l from gene
+	Keeping the best one wu:fe37d09
+	Keeping the best one wu:fe38f03
+	removing zgc:165555 from gene
+	wu:fe37d09  (chosen as first)
+	wu:fe38f03  (left as ZFIN_ID reference but not gene symbol)
+For gene ENSDARG00000087583 we have mutiple ZFIN_ID's
+	Keeping the best one si:ch211-226h8.13
+	removing si:ch211-154a22.8 from gene
+Multiple best ZFIN_ID's using vega to find the most common for ENSDARG00000087670
+For gene ENSDARG00000087670 we have mutiple ZFIN_ID's
+	removing zgc:154164 from gene
+	removing zgc:163040 from gene
+	removing hist1h4l from gene
+	Keeping the best one wu:fe37d09
+	Keeping the best one wu:fe38f03
+	Keeping the best one zgc:165555
+	wu:fe37d09  (chosen as first)
+	zgc:165555  (left as ZFIN_ID reference but not gene symbol)
+	wu:fe38f03  (left as ZFIN_ID reference but not gene symbol)
+Multiple best ZFIN_ID's using vega to find the most common for ENSDARG00000087694
+For gene ENSDARG00000087694 we have mutiple ZFIN_ID's
+	Keeping the best one zgc:112234
+	Keeping the best one zgc:171759
+	removing zgc:171937 from gene
+	Keeping the best one wu:fe11b02
+	wu:fe11b02  (chosen as first)
+	zgc:171759  (left as ZFIN_ID reference but not gene symbol)
+	zgc:112234  (left as ZFIN_ID reference but not gene symbol)
+For gene ENSDARG00000096097 we have mutiple ZFIN_ID's
+	Keeping the best one si:dkeyp-98a7.5
+	removing zgc:172150 from gene
+For gene ENSDARG00000096159 we have mutiple ZFIN_ID's
+	Keeping the best one si:dkeyp-98a7.4
+	removing zgc:172150 from gene
+
+For gene.... Lots of these so cut them out to save time and space
+
+WARNING: Clone_based_ensembl_gene has decreased by -5 % was 7652 now 7194
+WARNING: Clone_based_ensembl_transcript has decreased by -8 % was 8260 now 7554
+WARNING: Clone_based_vega_gene has increased by 144% was 276 now 675
+WARNING: GO has increased by 56% was 87289 now 136827
+WARNING: goslim_goa has increased by 54% was 62738 now 96927
+WARNING: xrefs miRBase_gene_name are not in the new database but are in the old???
+WARNING: xrefs OTTG are not in the new database but are in the old???
+WARNING: xrefs OTTT are not in the new database but are in the old???
+WARNING: RefSeq_ncRNA has increased by 5% was 644 now 677
+WARNING: xrefs RFAM_gene_name are not in the new database but are in the old???
+WARNING: xrefs shares_CDS_and_UTR_with_OTTT are not in the new database but are in the old???
+WARNING: xrefs shares_CDS_with_ENST are not in the new database but are in the old???
+WARNING: xrefs shares_CDS_with_OTTT are not in the new database but are in the old???
+WARNING: xrefs Vega_transcript are not in the new database but are in the old???
+WARNING: xrefs Vega_translation are not in the new database but are in the old???
+WARNING: ZFIN_ID_curated_transcript_notransfer has 9748 xrefs in the new database but NONE in the old
+xref_mapper.pl FINISHED NORMALLY
+
+------------------------------------------------------------
+Sender: LSF System <lsfadmin@bc-24-1-04>
+Subject: Job 886769: <perl ~/src/ensembl/misc-scripts/xref_mapping/xref_mapper.pl -file xref_input> Done
+
+Job <perl ~/src/ensembl/misc-scripts/xref_mapping/xref_mapper.pl -file xref_input> was submitted from host <farm2-head4> by user <ianl> in cluster <farm2>.
+Job was executed on host(s) <bc-24-1-04>, in queue <normal>, as user <ianl> in cluster <farm2>.
+<~/> was used as the home directory.
+</workdir/release_65/zebrafish> was used as the working directory.
+Started at Tue Oct 18 11:01:18 2011
+Results reported at Tue Oct 18 12:17:34 2011
+
+Your job looked like:
+
+------------------------------------------------------------
+# LSBATCH: User input
+perl ~/src/ensembl/misc-scripts/xref_mapping/xref_mapper.pl -file xref_input
+------------------------------------------------------------
+
+Successfully completed.
+
+Resource usage summary:
+
+    CPU time   :    734.06 sec.
+    Max Memory :       173 MB
+    Max Swap   :       204 MB
+
+    Max Processes  :         6
+    Max Threads    :         7
+
+The output (if any) is above this job summary.
+
+
+
+PS:
+
+Read file <mapper.err> for stderr output of this job.
+
diff --git a/misc-scripts/xref_mapping/docs/mapper2.out b/misc-scripts/xref_mapping/docs/mapper2.out
new file mode 100644
index 0000000000..3a7a9b28f5
--- /dev/null
+++ b/misc-scripts/xref_mapping/docs/mapper2.out
@@ -0,0 +1,165 @@
+Options: -file xref_input -upload
+running in verbose mode
+current status is tests_finished
+Deleting data for Clone_based_ensembl_gene from core before updating from new xref database
+Deleting data for Clone_based_ensembl_transcript from core before updating from new xref database
+Deleting data for Clone_based_vega_gene from core before updating from new xref database
+Deleting data for Clone_based_vega_transcript from core before updating from new xref database
+Deleting data for EMBL from core before updating from new xref database
+Deleting data for EntrezGene from core before updating from new xref database
+Deleting data for GO from core before updating from new xref database
+Deleting data for goslim_goa from core before updating from new xref database
+Deleting data for IPI from core before updating from new xref database
+Deleting data for MEROPS from core before updating from new xref database
+Deleting data for miRBase from core before updating from new xref database
+Deleting data for miRBase_transcript_name from core before updating from new xref database
+Deleting data for PDB from core before updating from new xref database
+Deleting data for protein_id from core before updating from new xref database
+Deleting data for RefSeq_mRNA from core before updating from new xref database
+Deleting data for RefSeq_mRNA_predicted from core before updating from new xref database
+Deleting data for RefSeq_ncRNA from core before updating from new xref database
+Deleting data for RefSeq_ncRNA_predicted from core before updating from new xref database
+Deleting data for RefSeq_peptide from core before updating from new xref database
+Deleting data for RefSeq_peptide_predicted from core before updating from new xref database
+Deleting data for RFAM from core before updating from new xref database
+Deleting data for RFAM_transcript_name from core before updating from new xref database
+Deleting data for UniGene from core before updating from new xref database
+Deleting data for Uniprot/SPTREMBL from core before updating from new xref database
+Deleting data for Uniprot/SWISSPROT from core before updating from new xref database
+Deleting data for Uniprot_genename from core before updating from new xref database
+Deleting data for WikiGene from core before updating from new xref database
+Deleting data for ZFIN_ID from core before updating from new xref database
+Deleting data for ZFIN_ID_transcript_name from core before updating from new xref database
+xref offset is 722445, object_xref offset is 170998
+updating (21) Clone_based_ensembl_gene in core (for MISC xrefs)
+DIRECT 7194
+updating (22) Clone_based_ensembl_transcript in core (for MISC xrefs)
+DIRECT 7554
+updating (23) Clone_based_vega_gene in core (for MISC xrefs)
+DIRECT 675
+updating (24) Clone_based_vega_transcript in core (for MISC xrefs)
+DIRECT 302
+updating (24) Clone_based_vega_transcript in core (for DIRECT xrefs)
+DIRECT 17688
+updating (236) EMBL in core (for DEPENDENT xrefs)
+DEP 42665 xrefs, 94223 object_xrefs
+updating (39) EntrezGene in core (for DEPENDENT xrefs)
+DEP 21473 xrefs, 23897 object_xrefs
+	added 30853 synonyms
+updating (52) GO in core (for DEPENDENT xrefs)
+GO 4535
+updating (274) goslim_goa in core (for DEPENDENT xrefs)
+DEP 99 xrefs, 96927 object_xrefs
+updating (91) IPI in core (for SEQUENCE_MATCH xrefs)
+SEQ 35478
+updating (107) MEROPS in core (for DEPENDENT xrefs)
+DEP 286 xrefs, 490 object_xrefs
+updating (275) miRBase in core (for DIRECT xrefs)
+DIRECT 354
+updating (279) miRBase_transcript_name in core (for MISC xrefs)
+DIRECT 337
+updating (224) PDB in core (for DEPENDENT xrefs)
+DEP 65 xrefs, 82 object_xrefs
+updating (225) protein_id in core (for DEPENDENT xrefs)
+DEP 35479 xrefs, 45695 object_xrefs
+updating (163) RefSeq_mRNA in core (for SEQUENCE_MATCH xrefs)
+SEQ 13272
+updating (163) RefSeq_mRNA in core (for INFERRED_PAIR xrefs)
+DIRECT 598
+updating (165) RefSeq_mRNA_predicted in core (for SEQUENCE_MATCH xrefs)
+SEQ 7546
+updating (165) RefSeq_mRNA_predicted in core (for INFERRED_PAIR xrefs)
+DIRECT 1333
+updating (166) RefSeq_ncRNA in core (for SEQUENCE_MATCH xrefs)
+SEQ 342
+updating (167) RefSeq_ncRNA_predicted in core (for SEQUENCE_MATCH xrefs)
+SEQ 323
+updating (168) RefSeq_peptide in core (for SEQUENCE_MATCH xrefs)
+SEQ 13705
+updating (168) RefSeq_peptide in core (for INFERRED_PAIR xrefs)
+DIRECT 127
+updating (172) RefSeq_peptide_predicted in core (for SEQUENCE_MATCH xrefs)
+SEQ 8283
+updating (172) RefSeq_peptide_predicted in core (for INFERRED_PAIR xrefs)
+DIRECT 348
+updating (134) RFAM in core (for DIRECT xrefs)
+DIRECT 146
+updating (136) RFAM_transcript_name in core (for MISC xrefs)
+DIRECT 3667
+updating (198) UniGene in core (for SEQUENCE_MATCH xrefs)
+SEQ 22897
+updating (227) Uniprot/SPTREMBL in core (for SEQUENCE_MATCH xrefs)
+SEQ 22028
+	added 139 synonyms
+updating (228) Uniprot/SPTREMBL in core (for SEQUENCE_MATCH xrefs)
+SEQ 28993
+	added 98 synonyms
+updating (232) Uniprot/SWISSPROT in core (for SEQUENCE_MATCH xrefs)
+SEQ 2650
+	added 1408 synonyms
+updating (238) Uniprot_genename in core (for DEPENDENT xrefs)
+DEP 30002 xrefs, 31056 object_xrefs
+	added 17256 synonyms
+updating (246) WikiGene in core (for DEPENDENT xrefs)
+DEP 21473 xrefs, 23897 object_xrefs
+updating (248) ZFIN_ID in core (for DEPENDENT xrefs)
+DEP 3804 xrefs, 8337 object_xrefs
+	added 4988 synonyms
+updating (249) ZFIN_ID in core (for DIRECT xrefs)
+DIRECT 16414
+	added 25129 synonyms
+updating (253) ZFIN_ID_transcript_name in core (for MISC xrefs)
+DIRECT 40344
+Setting Transcript and Gene display_xrefs from xref database into core and setting the desc
+Using xref_off set of 722445
+24488 gene descriptions added
+Only setting those not already set
+Presedence for Gene Descriptions
+	Uniprot/SPTREMBL	1
+	RefSeq_dna	3
+	RefSeq_peptide	4
+	Uniprot/SWISSPROT	5
+	IMGT/GENE_DB	6
+	ZFIN_ID	7
+	miRBase	8
+	RFAM	9
+6437 gene descriptions added
+xref_mapper.pl FINISHED NORMALLY
+
+------------------------------------------------------------
+Sender: LSF System <lsfadmin@bc-17-3-12>
+Subject: Job 897678: <perl ~/src/ensembl/misc-scripts/xref_mapping/xref_mapper.pl -file xref_input -upload> Done
+
+Job <perl ~/src/ensembl/misc-scripts/xref_mapping/xref_mapper.pl -file xref_input -upload> was submitted from host <farm2-head3> by user <ianl> in cluster <farm2>.
+Job was executed on host(s) <bc-17-3-12>, in queue <normal>, as user <ianl> in cluster <farm2>.
+</nfs/users/nfs_i/ianl> was used as the home directory.
+</workdir/release_65/zebrafish> was used as the working directory.
+Started at Tue Oct 18 13:38:32 2011
+Results reported at Tue Oct 18 14:02:49 2011
+
+Your job looked like:
+
+------------------------------------------------------------
+# LSBATCH: User input
+perl ~/src/ensembl/misc-scripts/xref_mapping/xref_mapper.pl -file xref_input -upload
+------------------------------------------------------------
+
+Successfully completed.
+
+Resource usage summary:
+
+    CPU time   :    127.40 sec.
+    Max Memory :        40 MB
+    Max Swap   :        71 MB
+
+    Max Processes  :         3
+    Max Threads    :         4
+
+The output (if any) is above this job summary.
+
+
+
+PS:
+
+Read file <mapper2.err> for stderr output of this job.
+
diff --git a/misc-scripts/xref_mapping/docs/parse.out b/misc-scripts/xref_mapping/docs/parse.out
new file mode 100644
index 0000000000..255f0e562c
--- /dev/null
+++ b/misc-scripts/xref_mapping/docs/parse.out
@@ -0,0 +1,646 @@
+Options: -user rw -pass password -host ens-research -dbname ianl_human_xref_65 -species human -stats -create -force
+host os ens-research
+==> Done.
+Creating ianl_human_xref_65 from ~/src/ensembl/misc-scripts/xref_mapping/sql/table.sql
+Populating metadata in ianl_human_xref_65 from ~/src/ensembl/misc-scripts/xref_mapping/sql/populate_metadata.sql
+Species human is valid (name = homo_sapiens, ID = 9606)
+----{ CCDS }--------------------------------------------------------------------
+Parsing script:host=>ens-livemirror,dbname=>ccds_human_65,tran_name=>ENST, with CCDSParser
+Parsed CCDS identifiers from script:host=>ens-livemirror,dbname=>ccds_human_65,tran_name=>ENST,, added 26451 xrefs and 33689 direct_xrefs
+
+source            xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+CCDS_transcript   0       0       0       0       33689   
+CCDS              26451   0       0       0       0       0       0       0       
+
+----{ EntrezGene }--------------------------------------------------------------
+Connecting to FTP host 'ftp.ncbi.nlm.nih.gov' for file 'EntrezGene/gene_info.gz' 
+Fetching 'gene_info.gz' (size = 136652895)
+Local file is 'EntrezGene/gene_info.gz'
+'EntrezGene/gene_info.gz' passed (gzip -t) corruption test.
+Checksum for 'EntrezGene/gene_info.gz' does not match, will parse...
+Parsing 'EntrezGene/gene_info.gz' with EntrezGeneParser
+Reading from 'EntrezGene/gene_info.gz'...
+42029 EntrezGene Xrefs added with 74164 synonyms
+
+source       xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+WikiGene     42029   0       0       0       0       0       0       0       
+EntrezGene   42029   0       0       0       0       0       0       74164   
+
+----{ MIM }---------------------------------------------------------------------
+Connecting to FTP host 'grcf.jhmi.edu' for file 'MIM/omim.txt.Z' 
+Creating directory 'MIM'
+Fetching 'omim.txt.Z' (size = 68826997)
+Local file is 'MIM/omim.txt.Z'
+'MIM/omim.txt.Z' passed (gzip -t) corruption test.
+Checksum for 'MIM/omim.txt.Z' does not match, will parse...
+Parsing 'MIM/omim.txt.Z' with MIMParser
+sources are:- 118, 119, 120
+Reading from 'MIM/omim.txt.Z'...
+13876 genemap and 7209 phenotype MIM xrefs added
+added 947 synonyms (defined by MOVED TO)
+
+source       xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+MIM_MORBID   7209    0       0       0       0       0       0       471     
+MIM_GENE     13876   0       0       0       0       0       0       497     
+
+----{ RefSeq_dna }--------------------------------------------------------------
+Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'RefSeq_dna/human.rna.fna.gz' 
+Creating directory 'RefSeq_dna'
+Fetching 'human.rna.fna.gz' (size = 37698093)
+Local file is 'RefSeq_dna/human.rna.fna.gz'
+'RefSeq_dna/human.rna.fna.gz' passed (gzip -t) corruption test.
+Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'RefSeq_dna/RefSeq-release*.txt' 
+Fetching 'RefSeq-release49.txt' (size = 57655)
+Local file is 'RefSeq_dna/RefSeq-release49.txt'
+Checksum for 'RefSeq_dna/human.rna.fna.gz' does not match, will parse...
+Parsing 'RefSeq_dna/human.rna.fna.gz' with RefSeqParser
+RefSeq_peptide source ID = 168
+RefSeq_dna source ID = 139
+RefSeq_mRNA source ID = 163
+RefSeq_ncRNA source ID = 166
+RefSeq_peptide_predicted source ID = 172
+RefSeq_dna_predicted source ID = 143
+RefSeq_mRNA_predicted source ID = 165
+RefSeq_ncRNA_predicted source ID = 167
+Reading from 'RefSeq_dna/human.rna.fna.gz'...
+Read 42733 xrefs from RefSeq_dna/human.rna.fna.gz
+count = 42733
+Uploading xrefs
+Reading from 'RefSeq_dna/RefSeq-release49.txt'...
+RefSeq release: 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '155'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '168'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '139'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '163'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '166'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '172'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '143'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '165'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '167'
+
+source                   xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+RefSeq_mRNA_predicted    1113    1113    0       0       0       0       0       0       
+RefSeq_ncRNA_predicted   3742    3742    0       0       0       0       0       0       
+RefSeq_ncRNA             5957    5957    0       0       0       0       0       0       
+RefSeq_mRNA              31921   31921   0       0       0       0       0       0       
+
+----{ RefSeq_peptide }----------------------------------------------------------
+Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'RefSeq_peptide/human.protein.gpff.gz' 
+Creating directory 'RefSeq_peptide'
+Fetching 'human.protein.gpff.gz' (size = 87557956)
+Local file is 'RefSeq_peptide/human.protein.gpff.gz'
+'RefSeq_peptide/human.protein.gpff.gz' passed (gzip -t) corruption test.
+Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'RefSeq_peptide/RefSeq-release*.txt' 
+Fetching 'RefSeq-release49.txt' (size = 57655)
+Local file is 'RefSeq_peptide/RefSeq-release49.txt'
+Checksum for 'RefSeq_peptide/human.protein.gpff.gz' does not match, will parse...
+Parsing 'RefSeq_peptide/human.protein.gpff.gz' with RefSeqGPFFParser
+RefSeq_peptide source ID = 168
+RefSeq_dna source ID = 139
+RefSeq_mRNA source ID = 163
+RefSeq_ncRNA source ID = 166
+RefSeq_peptide_predicted source ID = 172
+RefSeq_dna_predicted source ID = 143
+RefSeq_mRNA_predicted source ID = 165
+RefSeq_ncRNA_predicted source ID = 167
+Reading from 'RefSeq_peptide/human.protein.gpff.gz'...
+Read 33047 xrefs from RefSeq_peptide/human.protein.gpff.gz
+count = 33047
+Uploading xrefs
+Reading from 'RefSeq_peptide/RefSeq-release49.txt'...
+RefSeq release: 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '182'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '168'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '163'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '166'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '165'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '167'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '139'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '172'
+Setting release to 'NCBI Reference Sequence (RefSeq) Database Release 49, September 7, 2011' for source ID '143'
+
+source                     xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+WikiGene                   18      0       33047   0       0       0       0       0       
+RefSeq_peptide_predicted   1113    1113    0       0       0       0       0       0       
+RefSeq_peptide             31934   31934   0       0       0       0       0       0       
+EntrezGene                 18      0       33047   0       0       0       0       0       
+
+----{ Uniprot/SPTREMBL }--------------------------------------------------------
+Connecting to FTP host 'ftp.ebi.ac.uk' for file 'UniprotSPTREMBL/uniprot_trembl.dat.gz' 
+Creating directory 'UniprotSPTREMBL'
+Fetching 'uniprot_trembl.dat.gz' (size = 6928570594)
+Local file is 'UniprotSPTREMBL/uniprot_trembl.dat.gz'
+'UniprotSPTREMBL/uniprot_trembl.dat.gz' passed (gzip -t) corruption test.
+Connecting to FTP host 'ftp.ebi.ac.uk' for file 'UniprotSPTREMBL/reldate.txt' 
+Fetching 'reldate.txt' (size = 151)
+Local file is 'UniprotSPTREMBL/reldate.txt'
+Checksum for 'UniprotSPTREMBL/uniprot_trembl.dat.gz' does not match, will parse...
+Parsing 'UniprotSPTREMBL/uniprot_trembl.dat.gz' with UniProtAltParser
+SwissProt source id for UniprotSPTREMBL/uniprot_trembl.dat.gz: 232
+SpTREMBL source id for UniprotSPTREMBL/uniprot_trembl.dat.gz: 227
+SpTREMBL protein_evidence > 3 source id for UniprotSPTREMBL/uniprot_trembl.dat.gz: 228
+Predicted SwissProt source id for UniprotSPTREMBL/uniprot_trembl.dat.gz: 233
+Predicted SpTREMBL source id for UniprotSPTREMBL/uniprot_trembl.dat.gz: 229
+Predicted EMBL source id for UniprotSPTREMBL/uniprot_trembl.dat.gz: 237
+Predicted protein_id source id for UniprotSPTREMBL/uniprot_trembl.dat.gz: 226
+Reading from 'UniprotSPTREMBL/uniprot_trembl.dat.gz'...
+Read 0 SwissProt xrefs, 58146 SPTrEMBL xrefs with protein evidence codes 1-3, and 36008 SPTrEMBL xrefs with protein evidence codes > 3 from UniprotSPTREMBL/uniprot_trembl.dat.gz
+Added the following dependent xrefs:-
+MEROPS	226
+PDB	154
+EMBL	488892
+Uniprot_genename	72109
+count = 94154
+Uploading xrefs
+Reading from 'UniprotSPTREMBL/reldate.txt'...
+Swiss-Prot release is 'UniProtKB/Swiss-Prot Release 2011_09 of 21-Sep-2011'
+SpTrEMBL release is 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011'
+Setting release to 'UniProtKB/Swiss-Prot Release 2011_09 of 21-Sep-2011' for source ID '232'
+Setting release to 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011' for source ID '227'
+Setting release to 'UniProtKB/Swiss-Prot Release 2011_09 of 21-Sep-2011' for source ID '233'
+Setting release to 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011' for source ID '229'
+Setting release to 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011' for source ID '228'
+
+source             xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+MEROPS             81      0       226     0       0       0       0       0       
+Uniprot/SPTREMBL   94154   94154   0       0       0       0       0       965     
+protein_id         207835  0       207835  0       0       0       0       0       
+PDB                138     0       154     0       0       0       0       0       
+EMBL               99122   0       275393  0       0       0       0       0       
+Uniprot_genename   18031   0       72109   0       0       0       0       1751    
+
+----{ Uniprot/SWISSPROT }-------------------------------------------------------
+Connecting to FTP host 'ftp.ebi.ac.uk' for file 'UniprotSWISSPROT/uniprot_sprot.dat.gz' 
+Creating directory 'UniprotSWISSPROT'
+Fetching 'uniprot_sprot.dat.gz' (size = 446105793)
+Local file is 'UniprotSWISSPROT/uniprot_sprot.dat.gz'
+'UniprotSWISSPROT/uniprot_sprot.dat.gz' passed (gzip -t) corruption test.
+Connecting to FTP host 'ftp.ebi.ac.uk' for file 'UniprotSWISSPROT/reldate.txt' 
+Fetching 'reldate.txt' (size = 151)
+Local file is 'UniprotSWISSPROT/reldate.txt'
+Checksum for 'UniprotSWISSPROT/uniprot_sprot.dat.gz' does not match, will parse...
+Parsing 'UniprotSWISSPROT/uniprot_sprot.dat.gz' with UniProtAltParser
+SwissProt source id for UniprotSWISSPROT/uniprot_sprot.dat.gz: 232
+SpTREMBL source id for UniprotSWISSPROT/uniprot_sprot.dat.gz: 227
+SpTREMBL protein_evidence > 3 source id for UniprotSWISSPROT/uniprot_sprot.dat.gz: 228
+Predicted SwissProt source id for UniprotSWISSPROT/uniprot_sprot.dat.gz: 233
+Predicted SpTREMBL source id for UniprotSWISSPROT/uniprot_sprot.dat.gz: 229
+Predicted EMBL source id for UniprotSWISSPROT/uniprot_sprot.dat.gz: 237
+Predicted protein_id source id for UniprotSWISSPROT/uniprot_sprot.dat.gz: 226
+Reading from 'UniprotSWISSPROT/uniprot_sprot.dat.gz'...
+Read 20248 SwissProt xrefs, 0 SPTrEMBL xrefs with protein evidence codes 1-3, and 0 SPTrEMBL xrefs with protein evidence codes > 3 from UniprotSWISSPROT/uniprot_sprot.dat.gz
+Added the following dependent xrefs:-
+MEROPS	823
+MIM_MORBID	3782
+MIM_GENE	13155
+HPA	14688
+PDB	21041
+EMBL	286861
+Uniprot_genename	19696
+count = 20248
+Uploading xrefs
+Reading from 'UniprotSWISSPROT/reldate.txt'...
+Swiss-Prot release is 'UniProtKB/Swiss-Prot Release 2011_09 of 21-Sep-2011'
+SpTrEMBL release is 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011'
+Setting release to 'UniProtKB/Swiss-Prot Release 2011_09 of 21-Sep-2011' for source ID '232'
+Setting release to 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011' for source ID '227'
+Setting release to 'UniProtKB/Swiss-Prot Release 2011_09 of 21-Sep-2011' for source ID '233'
+Setting release to 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011' for source ID '229'
+Setting release to 'UniProtKB/TrEMBL Release 2011_09 of 21-Sep-2011' for source ID '228'
+
+source              xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+MIM_MORBID          4       0       3782    0       0       0       0       0       
+Uniprot/SWISSPROT   20248   20248   0       0       0       0       0       59372   
+protein_id          125858  0       125931  0       0       0       0       0       
+HPA                 14438   0       14688   0       0       0       0       0       
+EMBL                111268  0       140519  0       0       0       0       0       
+Uniprot_genename    6435    0       19696   0       0       0       0       18979   
+MEROPS              758     0       823     0       0       0       0       0       
+MIM_GENE            1       0       13155   0       0       0       0       0       
+PDB                 18191   0       21041   0       0       0       0       0       
+
+----{ Uniprot/SWISSPROT }-------------------------------------------------------
+Connecting to FTP host 'ftp.ebi.ac.uk' for file 'UniprotSWISSPROT/ens-sp.map' 
+Fetching 'ens-sp.map' (size = 2157658)
+Local file is 'UniprotSWISSPROT/ens-sp.map'
+Checksum for 'UniprotSWISSPROT/ens-sp.map' does not match, will parse...
+Parsing 'UniprotSWISSPROT/ens-sp.map' with UniProtDirectParser
+Reading from 'UniprotSWISSPROT/ens-sp.map'...
+Source_id = 232
+19243 entrys added
+0 not found
+
+source                          xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+MIM_MORBID                      0       0       5748    
+protein_id                      0       0       204529  
+HPA                             0       0       21717   
+Uniprot/SWISSPROT               19243   0       0       0       0       0       0       58674   
+EMBL                            0       0       219245  
+Uniprot_genename                0       0       27717   
+MEROPS                          0       0       1148    
+MIM_GENE                        0       0       19913   
+Uniprot/SWISSPROT_translation   0       0       0       0       0       27953   
+PDB                             0       0       34851   
+
+----{ RefSeq_dna }--------------------------------------------------------------
+Parsing script:host=>ens-livemirror,dbname=>ccds_human_65, with RefSeq_CCDSParser
+RefSeq_mRNA source ID = 162
+RefSeq_mRNA_predicted source ID = 164
+Parsed 29980 RefSeq_dna identifiers from script:host=>ens-livemirror,dbname=>ccds_human_65,, added 29536 xrefs and 43349 direct_xrefs  from 29980 lines.
+
+source                             xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+RefSeq_mRNA_predicted              2       0       0       0       0       0       0       0       
+RefSeq_mRNA_predicted_transcript   0       0       0       0       2       
+RefSeq_mRNA_transcript             0       0       0       0       43347   
+RefSeq_mRNA                        29534   0       0       0       0       0       0       0       
+
+----{ HGNC }--------------------------------------------------------------------
+Creating directory 'HGNC'
+Connecting to HTTP host 'www.genenames.org'
+Fetching '/cgi-bin/hgnc_downloads.cgi'
+Local file is 'HGNC/hgnc_downloads.cgi'
+Checksum for 'HGNC/hgnc_downloads.cgi' does not match, will parse...
+Parsing 'HGNC/hgnc_downloads.cgi' with HGNCParser
+Reading from 'HGNC/hgnc_downloads.cgi'...
+Loaded a total of :-	entrezgene_manual	18431
+	ensembl_manual	21589
+	entrezgene_mapped	18885
+	refseq_manual	30665
+	refseq_mapped	38787
+	Locus Specific Databases	176
+	swissprot_manual	38633
+6042 xrefs could not be associated via RefSeq, EntrezGene or ensembl
+
+source                     xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+LRG_HGNC_notransfer        176     0       0       0       0       0       0       522     
+LRG_HGNC_notransfer_gene   0       0       0       176     
+HGNC_gene                  0       0       0       21589   
+HGNC                       122651  0       145401  0       0       0       0       228088  
+
+----{ HGNC }--------------------------------------------------------------------
+Parsing script:host=>ens-staging1,source=>HGNC, with VegaOfficialNameParser
+We have 221829/524288 vega to external source entries
+ We have 76942/131072 vega to external source entries
+Parsed 115264 HGNC identifiers from script:host=>ens-staging1,source=>HGNC,, added 18597 xrefs and 115264 direct_xrefs
+3 ignoreed due to numbers no identifiers being no longer valid :-  25711 (HGNC:25711 ) 18577 (HGNC:18577 ) 18577 (HGNC:18577 )
+
+source            xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+HGNC              18597   0       0       0       0       0       0       253277  
+HGNC_transcript   0       0       0       0       115264  
+
+----{ HGNC }--------------------------------------------------------------------
+Parsing script:wget=>http://www.genenames.org/cgi-bin/hgnc_downloads.cgi?title=HGNC+output+data&hgnc_dbtag=on&col=gd_hgnc_id&col=gd_status&col=gd_ccds_ids&status=Approved&status_opt=2&level=pri&=on&where=&order_by=gd_app_sym_sort&limit=&format=text&submit=submit&.cgifields=&.cgifields=level&.cgifields=chr&.cgifields=status&.cgifields=hgnc_dbtag,host=>ens-livemirror,dbname=>ccds_human_65, with HGNC_CCDSParser
+75 missed as no hgnc for the ccds. Added 26376 HGNC xrefs via CCDS and 52752 direct xrefs
+
+source            xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+HGNC              18394   0       0       0       0       0       0       56859   
+HGNC_transcript   0       0       0       0       52752   
+
+----{ IPI }---------------------------------------------------------------------
+Connecting to FTP host 'ftp.ebi.ac.uk' for file 'IPI/ipi.HUMAN.fasta.gz' 
+Creating directory 'IPI'
+Fetching 'ipi.HUMAN.fasta.gz' (size = 25558620)
+Local file is 'IPI/ipi.HUMAN.fasta.gz'
+'IPI/ipi.HUMAN.fasta.gz' passed (gzip -t) corruption test.
+Checksum for 'IPI/ipi.HUMAN.fasta.gz' does not match, will parse...
+Parsing 'IPI/ipi.HUMAN.fasta.gz' with IPIParser
+Reading from 'IPI/ipi.HUMAN.fasta.gz'...
+count = 91464
+Uploading xrefs
+91464 IPI xrefs succesfully parsed
+
+source   xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+IPI      91464   91464   0       0       0       0       0       0       
+
+----{ UniGene }-----------------------------------------------------------------
+Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'UniGene/Hs.seq.uniq.gz' 
+Creating directory 'UniGene'
+Fetching 'Hs.seq.uniq.gz' (size = 55756319)
+Local file is 'UniGene/Hs.seq.uniq.gz'
+'UniGene/Hs.seq.uniq.gz' passed (gzip -t) corruption test.
+Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'UniGene/Hs.data.gz' 
+Fetching 'Hs.data.gz' (size = 167673440)
+Local file is 'UniGene/Hs.data.gz'
+'UniGene/Hs.data.gz' passed (gzip -t) corruption test.
+Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'UniGene/*.LOG' 
+Fetching '2003.LOG' (size = 38108)
+Local file is 'UniGene/2003.LOG'
+Fetching '2004.LOG' (size = 37143)
+Local file is 'UniGene/2004.LOG'
+Fetching '2005.LOG' (size = 26650)
+Local file is 'UniGene/2005.LOG'
+Fetching '2006.LOG' (size = 32236)
+Local file is 'UniGene/2006.LOG'
+Fetching '2007.LOG' (size = 48006)
+Local file is 'UniGene/2007.LOG'
+Fetching '2008.LOG' (size = 47076)
+Local file is 'UniGene/2008.LOG'
+Fetching '2009.LOG' (size = 12247)
+Local file is 'UniGene/2009.LOG'
+Fetching '2010.LOG' (size = 27499)
+Local file is 'UniGene/2010.LOG'
+Fetching '2011.LOG' (size = 12486)
+Local file is 'UniGene/2011.LOG'
+Checksum for 'UniGene/Hs.seq.uniq.gz' does not match, will parse...
+Checksum for 'UniGene/Hs.data.gz' does not match, will parse...
+Parsing 'UniGene/Hs.seq.uniq.gz', 'UniGene/Hs.data.gz' with UniGeneParser
+UniGene source ID = 198.
+Reading from 'UniGene/Hs.data.gz'...
+Reading from 'UniGene/Hs.seq.uniq.gz'...
+count = 122727
+Uploading xrefs
+Reading from 'UniGene/2011.LOG'...
+UniGene release: '31 May 2011, UniGene Build #230 Homo sapiens'
+Setting release to '31 May 2011, UniGene Build #230 Homo sapiens' for source ID '198'
+
+source    xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+UniGene   122727  122727  0       0       0       0       0       0       
+
+----{ HGNC_curated_transcript_notransfer }--------------------------------------
+Parsing script: with curated_transcriptParser
+source id is 77, curated_source_id is 77
+We have 221829/524288 ott to enst entries
+ 148114 direct xrefs succesfully parsed
+
+source                                          xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+HGNC_curated_transcript_notransfer_transcript   0       0       0       0       118840  
+HGNC_curated_transcript_notransfer              112068  0       0       0       0       0       0       0       
+Clone_based_vega_transcript_transcript          0       0       0       0       29274   
+Clone_based_vega_transcript                     29036   0       0       0       0       0       0       0       
+
+----{ DBASS3 }------------------------------------------------------------------
+Creating directory 'DBASS3'
+Connecting to HTTP host 'www.som.soton.ac.uk'
+Fetching '/research/geneticsdiv/dbass3/download.asp'
+Local file is 'DBASS3/download.asp'
+Checksum for 'DBASS3/download.asp' does not match, will parse...
+Parsing 'DBASS3/download.asp' with DBASSParser
+Reading from 'DBASS3/download.asp'...
+160 direct xrefs succesfully parsed
+
+source        xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+DBASS3        160     0       0       0       0       0       0       5       
+DBASS3_gene   0       0       0       160     
+
+----{ DBASS5 }------------------------------------------------------------------
+Creating directory 'DBASS5'
+Connecting to HTTP host 'www.som.soton.ac.uk'
+Fetching '/research/geneticsdiv/dbass5/download.aspx'
+Local file is 'DBASS5/download.aspx'
+Checksum for 'DBASS5/download.aspx' does not match, will parse...
+Parsing 'DBASS5/download.aspx' with DBASSParser
+Reading from 'DBASS5/download.aspx'...
+246 direct xrefs succesfully parsed
+
+source        xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+DBASS5_gene   0       0       0       246     
+DBASS5        246     0       0       0       0       0       0       8       
+
+----{ HPA }---------------------------------------------------------------------
+Creating directory 'HPA'
+Connecting to HTTP host 'www.proteinatlas.org'
+Fetching '/download/xref.php'
+Local file is 'HPA/xref.php'
+Checksum for 'HPA/xref.php' does not match, will parse...
+Parsing 'HPA/xref.php' with HPAParser
+Reading from 'HPA/xref.php'...
+50502 direct xrefs succesfully parsed
+
+source            xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+HPA               14506   0       0       0       0       0       0       0       
+HPA_translation   0       0       0       0       0       50502   
+
+----{ MIM2GENE }----------------------------------------------------------------
+Connecting to FTP host 'ftp.ncbi.nih.gov' for file 'MIM2GENE/mim2gene' 
+Creating directory 'MIM2GENE'
+Fetching 'mim2gene' (size = 365646)
+Local file is 'MIM2GENE/mim2gene'
+Checksum for 'MIM2GENE/mim2gene' does not match, will parse...
+Parsing 'MIM2GENE/mim2gene' with Mim2GeneParser
+Reading from 'MIM2GENE/mim2gene'...
+0 EntrezGene entries could not be found.
+4 Omim entries could not be found.
+1378 had different types out of 19461 Entries.
+
+source       xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+MIM_MORBID   0       0       5725    
+MIM_GENE     0       0       13741   
+
+----{ Interpro }----------------------------------------------------------------
+Connecting to FTP host 'ftp.ebi.ac.uk' for file 'Interpro/interpro.xml.gz' 
+Creating directory 'Interpro'
+Fetching 'interpro.xml.gz' (size = 16495205)
+Local file is 'Interpro/interpro.xml.gz'
+'Interpro/interpro.xml.gz' passed (gzip -t) corruption test.
+Connecting to FTP host 'ftp.ebi.ac.uk' for file 'Interpro/release_notes.txt' 
+Fetching 'release_notes.txt' (size = 3575)
+Local file is 'Interpro/release_notes.txt'
+Checksum for 'Interpro/interpro.xml.gz' does not match, will parse...
+Parsing 'Interpro/interpro.xml.gz' with InterproParser
+Reading from 'Interpro/interpro.xml.gz'...
+	2007 PRINTS loaded.
+	12015 PFAM loaded.
+	22245 INTERPRO loaded.
+	2926 PIRSF loaded.
+	1292 PROSITE loaded.
+	4003 TIGRFAMs loaded.
+	882 SMART loaded.
+	1203 SSF loaded.
+	897 PROFILE loaded.
+Reading from 'Interpro/release_notes.txt'...
+Interpro release is 'Release 34.0, 23 September 2011'
+Setting release to 'Release 34.0, 23 September 2011' for source ID '100'
+
+source     xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+Interpro   22245   0       0       0       0       0       0       0       
+
+----{ UCSC_hg19 }---------------------------------------------------------------
+Connecting to FTP host 'hgdownload.cse.ucsc.edu' for file 'UCSC_hg19/knownGene.txt.gz' 
+Creating directory 'UCSC_hg19'
+Fetching 'knownGene.txt.gz' (size = 4108721)
+Local file is 'UCSC_hg19/knownGene.txt.gz'
+'UCSC_hg19/knownGene.txt.gz' passed (gzip -t) corruption test.
+Connecting to FTP host 'hgdownload.cse.ucsc.edu' for file 'UCSC_hg19/README.txt' 
+Fetching 'README.txt' (size = 2624)
+Local file is 'UCSC_hg19/README.txt'
+Checksum for 'UCSC_hg19/knownGene.txt.gz' does not match, will parse...
+Parsing 'UCSC_hg19/knownGene.txt.gz' with UCSCParser
+Reading from 'UCSC_hg19/knownGene.txt.gz'...
+
+source   xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+UCSC     0       0       0       0       0       0       77614   
+
+----{ ncRNA }-------------------------------------------------------------------
+Parsing script:host=>genebuild7,port=>3306,dbname=>sw4_ncRNA_Xrefs, with ncRNA_DBParser
+Added 938 Xrefs for ncRNAs
+
+source               xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+RFAM                 249     0       0       0       0       0       0       0       
+miRBase_transcript   0       0       0       0       715     
+RFAM_transcript      0       0       0       0       5199    
+miRBase              689     0       0       0       0       0       0       0       
+
+----{ GO }----------------------------------------------------------------------
+Connecting to FTP host 'ftp.ebi.ac.uk' for file 'GO/gene_association.goa_human.gz' 
+Creating directory 'GO'
+Fetching 'gene_association.goa_human.gz' (size = 13897660)
+Local file is 'GO/gene_association.goa_human.gz'
+'GO/gene_association.goa_human.gz' passed (gzip -t) corruption test.
+Connecting to HTTP host 'archive.geneontology.org'
+Fetching '/latest-termdb/go_daily-termdb.obo-xml.gz'
+Local file is 'GO/go_daily-termdb.obo-xml.gz'
+Connecting to HTTP host 'www.ebi.ac.uk'
+Fetching '/GOA/HUMAN_release.html'
+Local file is 'GO/HUMAN_release.html'
+Checksum for 'GO/gene_association.goa_human.gz' does not match, will parse...
+Checksum for 'GO/go_daily-termdb.obo-xml.gz' does not match, will parse...
+Parsing 'GO/gene_association.goa_human.gz', 'GO/go_daily-termdb.obo-xml.gz' with GOParser
+Reading from 'GO/go_daily-termdb.obo-xml.gz'...
+description file for GO
+Reading from 'GO/gene_association.goa_human.gz'...
+processing for taxon: 9606
+	567692 GO dependent xrefs added 0 refseq not found and 0 Swissprot not found 
+Reading from 'GO/HUMAN_release.html'...
+GO release: 'GOA Human (version 102), released on 20 September, 2011 and assembled using the publicly released data available in the source databases on 17 September, 2011. '
+Setting release to 'GOA Human (version 102), released on 20 September, 2011 and assembled using the publicly released data available in the source databases on 17 September, 2011. ' for source ID '52'
+
+source   xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+GO       11987   0       479945  0       0       0       0       0       
+
+----{ GO }----------------------------------------------------------------------
+Connecting to HTTP host 'www.geneontology.org'
+Fetching '/external2go/interpro2go'
+Local file is 'GO/interpro2go'
+Checksum for 'GO/interpro2go' does not match, will parse...
+Parsing 'GO/interpro2go' with InterproGoParser
+Reading from 'GO/interpro2go'...
+Parsed identifiers from GO/interpro2go
+	added 20056 GO xrefs dependent on InterPro
+	skipped 5119 GO terms due to missing InterPros
+
+source   xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+GO       932     0       20056   0       0       0       0       0       
+
+----{ goslim_goa }--------------------------------------------------------------
+Parsing script:host=>ens-staging1,dbname=>ensembl_ontology_65, with GOSlimParser
+Parsed GOSlim Generic identifiers from script:host=>ens-staging1,dbname=>ensembl_ontology_65,, added 15130 dependent_xrefs
+
+source       xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+goslim_goa   104     0       15130   0       0       0       0       0       
+
+
+================================================================================
+Summary of status
+================================================================================
+                          CCDS CCDSParser          	OKAY
+                        DBASS3 DBASSParser         	OKAY
+                        DBASS5 DBASSParser         	OKAY
+                    EntrezGene EntrezGeneParser    	OKAY
+                            GO GOParser            	OKAY
+                            GO InterproGoParser    	OKAY
+                          HGNC VegaOfficialNameParser	OKAY
+                          HGNC HGNC_CCDSParser     	OKAY
+                          HGNC HGNCParser          	OKAY
+HGNC_curated_transcript_notransfer curated_transcriptParser	OKAY
+                           HPA HPAParser           	OKAY
+                           IPI IPIParser           	OKAY
+                      Interpro InterproParser      	OKAY
+                           MIM MIMParser           	OKAY
+                      MIM2GENE Mim2GeneParser      	OKAY
+                    RefSeq_dna RefSeqParser        	OKAY
+                    RefSeq_dna RefSeq_CCDSParser   	OKAY
+                RefSeq_peptide RefSeqGPFFParser    	OKAY
+                     UCSC_hg19 UCSCParser          	OKAY
+                       UniGene UniGeneParser       	OKAY
+              Uniprot/SPTREMBL UniProtAltParser    	OKAY
+             Uniprot/SWISSPROT UniProtAltParser    	OKAY
+             Uniprot/SWISSPROT UniProtDirectParser 	OKAY
+                    goslim_goa GOSlimParser        	OKAY
+                         ncRNA ncRNA_DBParser      	OKAY
+
+source                                          xrefs   prim    dep     gdir    tdir    tdir    coord   synonyms
+CCDS_transcript                                 0       0       0       0       33689   
+HGNC_curated_transcript_notransfer_transcript   0       0       0       0       118840  
+RefSeq_mRNA_predicted_transcript                0       0       0       0       2       
+Interpro                                        22245   0       0       0       0       0       0       0       
+HPA                                             28944   0       36405   0       0       0       0       0       
+RFAM_transcript                                 0       0       0       0       5199    
+LRG_HGNC_notransfer_gene                        0       0       0       176     
+IPI                                             91464   91464   0       0       0       0       0       0       
+HGNC_gene                                       0       0       0       21589   
+MIM_GENE                                        13877   0       46809   0       0       0       0       497     
+DBASS3                                          160     0       0       0       0       0       0       5       
+HGNC_transcript                                 0       0       0       0       168016  
+RefSeq_peptide                                  31934   31934   0       0       0       0       0       0       
+EntrezGene                                      42047   0       33047   0       0       0       0       74164   
+Uniprot/SPTREMBL                                94154   94154   0       0       0       0       0       965     
+protein_id                                      333693  0       538295  0       0       0       0       0       
+DBASS5                                          246     0       0       0       0       0       0       8       
+EMBL                                            210390  0       635157  0       0       0       0       0       
+Uniprot_genename                                24466   0       119522  0       0       0       0       20730   
+MEROPS                                          839     0       2197    0       0       0       0       0       
+RefSeq_peptide_predicted                        1113    1113    0       0       0       0       0       0       
+Clone_based_vega_transcript                     29036   0       0       0       0       0       0       0       
+GO                                              12919   0       500001  0       0       0       0       0       
+RefSeq_mRNA_transcript                          0       0       0       0       43347   
+RefSeq_mRNA                                     61455   31921   0       0       0       0       0       0       
+HGNC                                            159642  0       145401  0       0       0       0       538224  
+RFAM                                            249     0       0       0       0       0       0       0       
+DBASS5_gene                                     0       0       0       246     
+RefSeq_ncRNA                                    5957    5957    0       0       0       0       0       0       
+HGNC_curated_transcript_notransfer              112068  0       0       0       0       0       0       0       
+HPA_translation                                 0       0       0       0       0       50502   
+CCDS                                            26451   0       0       0       0       0       0       0       
+RefSeq_ncRNA_predicted                          3742    3742    0       0       0       0       0       0       
+miRBase                                         689     0       0       0       0       0       0       0       
+DBASS3_gene                                     0       0       0       160     
+RefSeq_mRNA_predicted                           1115    1113    0       0       0       0       0       0       
+miRBase_transcript                              0       0       0       0       715     
+LRG_HGNC_notransfer                             176     0       0       0       0       0       0       522     
+MIM_MORBID                                      7213    0       15255   0       0       0       0       471     
+Uniprot/SWISSPROT                               39491   20248   0       0       0       0       0       118046  
+UCSC                                            0       0       0       0       0       0       77614   
+WikiGene                                        42047   0       33047   0       0       0       0       0       
+Clone_based_vega_transcript_transcript          0       0       0       0       29274   
+goslim_goa                                      104     0       15130   0       0       0       0       0       
+Uniprot/SWISSPROT_translation                   0       0       0       0       0       27953   
+PDB                                             18329   0       56046   0       0       0       0       0       
+UniGene                                         122727  122727  0       0       0       0       0       0       
+
+
+------------------------------------------------------------
+Sender: LSF System <lsfadmin@bc-17-3-13>
+Subject: Job 546494: <perl ~/src/ensembl/misc-scripts/xref_mapping/xref_parser.pl -user rw -pass passwrod -host ens-research -dbname ianl_human_xref_65 -species human -stats -create -force> Done
+
+Job <perl ~/src/ensembl/misc-scripts/xref_mapping/xref_parser.pl -user rw -pass password -host ens-research -dbname ianl_human_xref_65 -species human -stats -create -force> was submitted from host <farm2-head3> by user <ianl> in cluster <farm2>.
+Job was executed on host(s) <bc-17-3-13>, in queue <normal>, as user <ianl> in cluster <farm2>.
+</lustre/scratch103/ensembl/ianl/release_65/human> was used as the working directory.
+Started at Wed Oct 12 15:24:39 2011
+Results reported at Wed Oct 12 17:20:22 2011
+
+Your job looked like:
+
+------------------------------------------------------------
+# LSBATCH: User input
+perl ~/src/ensembl-live/ensembl/misc-scripts/xref_mapping/xref_parser.pl -user rw -pass password -host ens-research -dbname ianl_human_xref_65 -species human -stats -create -force
+------------------------------------------------------------
+
+Successfully completed.
+
+Resource usage summary:
+
+    CPU time   :   1775.25 sec.
+    Max Memory :       431 MB
+    Max Swap   :       477 MB
+
+    Max Processes  :         5
+    Max Threads    :         6
+
+The output (if any) is above this job summary.
+
+
+
+PS:
+
+Read file <parse.err> for stderr output of this job.
+
-- 
GitLab