Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl
Commits
94ce4a0b
Commit
94ce4a0b
authored
20 years ago
by
Glenn Proctor
Browse files
Options
Downloads
Patches
Plain Diff
Moved here from parent dir
parent
3a060cc3
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
misc-scripts/xref_mapping/sql/populate_metadata.sql
+92
-0
92 additions, 0 deletions
misc-scripts/xref_mapping/sql/populate_metadata.sql
misc-scripts/xref_mapping/sql/table.sql
+124
-0
124 additions, 0 deletions
misc-scripts/xref_mapping/sql/table.sql
with
216 additions
and
0 deletions
misc-scripts/xref_mapping/sql/populate_metadata.sql
0 → 100644
+
92
−
0
View file @
94ce4a0b
#
Populate
the
appropriate
tables
in
an
xref
metadata
database
################################################################################
#
SPECIES
INSERT
INTO
species
(
taxonomy_id
,
name
,
aliases
)
VALUES
(
9606
,
'homo_sapiens'
,
'human,hsapiens,homosapiens'
);
INSERT
INTO
species
(
taxonomy_id
,
name
,
aliases
)
VALUES
(
10090
,
'mus_musculus'
,
'mouse,mmusculus,musmusculus'
);
INSERT
INTO
species
(
taxonomy_id
,
name
,
aliases
)
VALUES
(
10116
,
'rattus_norvegicus'
,
'rat,rnovegicus,rattusnorvegicus'
);
INSERT
INTO
species
(
taxonomy_id
,
name
,
aliases
)
VALUES
(
31033
,
'fugu_rubripes'
,
'pufferfish,fugu,frubripes,fugurubripes'
);
INSERT
INTO
species
(
taxonomy_id
,
name
,
aliases
)
VALUES
(
7165
,
'anopheles_gambiae'
,
'mosquito,anopheles,agambiae,anophelesgambiae'
);
INSERT
INTO
species
(
taxonomy_id
,
name
,
aliases
)
VALUES
(
7227
,
'drosophila_melanogaster'
,
'drosophila,dmelongaster,drosophilamelanogaster'
);
INSERT
INTO
species
(
taxonomy_id
,
name
,
aliases
)
VALUES
(
6239
,
'caenorhabditis_elegans'
,
'elegans,celegans,caenorhabditiselegans'
);
INSERT
INTO
species
(
taxonomy_id
,
name
,
aliases
)
VALUES
(
6238
,
'caenorhabditis_briggsae'
,
'briggsae,cbriggsae,caenorhabditisbriggsae'
);
INSERT
INTO
species
(
taxonomy_id
,
name
,
aliases
)
VALUES
(
7955
,
'danio_rerio'
,
'zebrafish,danio,drerio,daniorerio'
);
INSERT
INTO
species
(
taxonomy_id
,
name
,
aliases
)
VALUES
(
9598
,
'pan_troglodytes'
,
'chimp,chimpanzee,ptroglodytes,pantroglodytes'
);
INSERT
INTO
species
(
taxonomy_id
,
name
,
aliases
)
VALUES
(
9031
,
'gallus_gallus'
,
'chicken,chick,ggallus,gallusgallus'
);
INSERT
INTO
species
(
taxonomy_id
,
name
,
aliases
)
VALUES
(
99883
,
'tetraodon_nigroviridis'
,
'tetraodon,tnigroviridis,tetraodonnigroviridis'
);
INSERT
INTO
species
(
taxonomy_id
,
name
,
aliases
)
VALUES
(
9913
,
'bos_taurus'
,
'cow,btaurus,bostaurus'
);
INSERT
INTO
species
(
taxonomy_id
,
name
,
aliases
)
VALUES
(
9615
,
'canis_familaris'
,
'dog,doggy,cfamiliaris,canisfamiliaris'
);
################################################################################
#
SOURCES
-
types
of
data
we
can
read
#
"High level"
sources
that
we
will
also
download
from
(
via
source_url
)
INSERT
INTO
source
VALUES
(
1
,
"UniProtSwissProt"
,
1
,
'Y'
,
1
);
INSERT
INTO
source
VALUES
(
2
,
"UniProtSPTrEMBL"
,
1
,
'Y'
,
1
);
INSERT
INTO
source
VALUES
(
3
,
"RefSeq"
,
1
,
'Y'
,
1
);
#
Other
sources
-
used
to
create
dependent
xrefs
,
but
not
to
upload
from
INSERT
INTO
source
VALUES
(
1010
,
'EMBL'
,
1
,
'N'
,
2
);
INSERT
INTO
source
VALUES
(
1020
,
'MIM'
,
1
,
'N'
,
2
);
INSERT
INTO
source
VALUES
(
1030
,
'PDB'
,
1
,
'N'
,
2
);
INSERT
INTO
source
VALUES
(
1040
,
'protein_id'
,
1
,
'N'
,
2
);
INSERT
INTO
source
VALUES
(
1050
,
'PUBMED'
,
1
,
'N'
,
2
);
INSERT
INTO
source
VALUES
(
1060
,
'MEDLINE'
,
1
,
'N'
,
2
);
INSERT
INTO
source
VALUES
(
1100
,
'LocusLink'
,
1
,
'N'
,
2
);
INSERT
INTO
source
VALUES
(
1070
,
'GO'
,
1
,
'Y'
,
2
);
INSERT
INTO
source
VALUES
(
1080
,
'MarkerSymbol'
,
1
,
'Y'
,
2
);
INSERT
INTO
source
VALUES
(
1090
,
'HUGO'
,
1
,
'Y'
,
2
);
################################################################################
#
Files
to
fetch
data
from
#
--------------------------------------------------------------------------------
#
UniProt
(
SwissProt
&
SPTrEMBL
)
#
Note
currently
no
UniProt
data
for
fugu
,
anopheles
,
c
.
briggsae
or
chicken
.
###
HUMAN
##
uniprot
INSERT
INTO
source_url
(
source_id
,
species_id
,
url
,
checksum
,
file_modified_date
,
upload_date
,
parser
)
VALUES
(
1
,
1
,
'ftp://ftp.ebi.ac.uk/pub/databases/SPproteomes/swissprot_files/proteomes/9606.SPC'
,
''
,
now
(),
now
(),
"UniProtParser"
);
##
refseq
INSERT
INTO
source_url
(
source_id
,
species_id
,
url
,
checksum
,
file_modified_date
,
upload_date
,
parser
)
VALUES
(
3
,
1
,
'ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.protein.gpff.gz'
,
''
,
now
(),
now
(),
"RefSeqGPFFParser"
);
##
refseq
INSERT
INTO
source_url
(
source_id
,
species_id
,
url
,
checksum
,
file_modified_date
,
upload_date
,
parser
)
VALUES
(
3
,
1
,
'ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.rna.fna.gz'
,
''
,
now
(),
now
(),
"RefSeqParser"
);
##
GO
INSERT
INTO
source_url
(
source_id
,
species_id
,
url
,
checksum
,
file_modified_date
,
upload_date
,
parser
)
VALUES
(
1070
,
1
,
'ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/HUMAN/gene_association.goa_human.gz'
,
''
,
now
(),
now
(),
"GOParser"
);
##
HUGO
INSERT
INTO
source_url
(
source_id
,
species_id
,
url
,
checksum
,
file_modified_date
,
upload_date
,
parser
)
VALUES
(
1090
,
1
,
'http://www.gene.ucl.ac.uk/public-files/nomen/ens4.txt http://www.gene.ucl.ac.uk/public-files/nomen/ens1.txt'
,
''
,
now
(),
now
(),
"HUGOParser"
);
###
MOUSE
##
uniprot
INSERT
INTO
source_url
(
source_id
,
species_id
,
url
,
checksum
,
file_modified_date
,
upload_date
,
parser
)
VALUES
(
1
,
2
,
'ftp://ftp.ebi.ac.uk/pub/databases/SPproteomes/swissprot_files/proteomes/10090.SPC'
,
''
,
now
(),
now
(),
"UniProtParser"
);
##
refseq
INSERT
INTO
source_url
(
source_id
,
species_id
,
url
,
checksum
,
file_modified_date
,
upload_date
,
parser
)
VALUES
(
3
,
2
,
'ftp://ftp.ncbi.nih.gov/refseq/M_musculus/mRNA_Prot/mouse.protein.gpff.gz'
,
''
,
now
(),
now
(),
"RefSeqGPFFParser"
);
##
refseq
INSERT
INTO
source_url
(
source_id
,
species_id
,
url
,
checksum
,
file_modified_date
,
upload_date
,
parser
)
VALUES
(
3
,
2
,
'ftp://ftp.ncbi.nih.gov/refseq/M_musculus/mRNA_Prot/mouse.rna.fna.gz'
,
''
,
now
(),
now
(),
"RefSeqParser"
);
##
mgd
(
MGI
-- MarkerSymbol)
INSERT
INTO
source_url
(
source_id
,
species_id
,
url
,
checksum
,
file_modified_date
,
upload_date
,
parser
)
VALUES
(
1080
,
2
,
'ftp://ftp.informatics.jax.org/pub/reports/MRK_SwissProt_TrEMBL.rpt'
,
''
,
now
(),
now
(),
"MGDParser"
);
##
GO
INSERT
INTO
source_url
(
source_id
,
species_id
,
url
,
checksum
,
file_modified_date
,
upload_date
,
parser
)
VALUES
(
1070
,
2
,
'ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/MOUSE/gene_association.goa_mouse.gz'
,
''
,
now
(),
now
(),
"GOParser"
);
################################################################################
This diff is collapsed.
Click to expand it.
misc-scripts/xref_mapping/sql/table.sql
0 → 100755
+
124
−
0
View file @
94ce4a0b
#
Schema
for
internal
-
external
database
mappings
(
xrefs
)
################################################################################
#
#
General
external
annotation
.
CREATE
TABLE
xref
(
xref_id
int
unsigned
not
null
auto_increment
,
accession
varchar
(
255
)
not
null
,
label
varchar
(
255
),
description
varchar
(
255
),
source_id
int
unsigned
not
null
,
species_id
int
unsigned
not
null
,
PRIMARY
KEY
(
xref_id
),
UNIQUE
acession_idx
(
accession
,
source_id
)
);
################################################################################
CREATE
TABLE
primary_xref
(
xref_id
int
unsigned
not
null
,
sequence
mediumtext
,
sequence_type
enum
(
'dna'
,
'peptide'
),
status
enum
(
'experimental'
,
'predicted'
),
source_id
int
unsigned
not
null
,
PRIMARY
KEY
(
xref_id
)
);
################################################################################
CREATE
TABLE
dependent_xref
(
master_xref_id
int
unsigned
not
null
,
dependent_xref_id
int
unsigned
not
null
,
linkage_annotation
varchar
(
255
),
source_id
int
unsigned
not
null
,
KEY
master_idx
(
master_xref_id
),
KEY
dependent_idx
(
dependent_xref_id
)
);
################################################################################
CREATE
TABLE
synonym
(
xref_id
int
unsigned
not
null
,
synonym_xref_id
int
unsigned
not
null
,
source_id
int
unsigned
not
null
,
KEY
xref_idx
(
xref_id
)
);
################################################################################
CREATE
TABLE
source
(
source_id
int
unsigned
not
null
auto_increment
,
name
varchar
(
255
)
not
null
,
release
varchar
(
255
),
download
enum
(
'Y'
,
'N'
)
default
'Y'
,
ordered
int
unsigned
not
null
,
PRIMARY
KEY
(
source_id
),
KEY
name_idx
(
name
)
);
################################################################################
CREATE
TABLE
source_url
(
source_url_id
int
unsigned
not
null
auto_increment
,
source_id
int
unsigned
not
null
,
species_id
int
unsigned
not
null
,
url
varchar
(
255
),
checksum
varchar
(
255
),
file_modified_date
datetime
,
upload_date
datetime
,
parser
varchar
(
255
),
PRIMARY
KEY
(
source_url_id
),
KEY
source_idx
(
source_id
)
);
################################################################################
CREATE
TABLE
direct_xref
(
general_xref_id
int
unsigned
not
null
,
ensembl_stable_id
varchar
(
255
),
type
enum
(
'gene'
,
'transcript'
,
'translation'
),
linkage_xref
varchar
(
255
),
KEY
primary_idx
(
general_xref_id
),
KEY
ensembl_idx
(
ensembl_stable_id
)
);
################################################################################
CREATE
TABLE
species
(
species_id
int
unsigned
not
null
auto_increment
,
taxonomy_id
int
unsigned
not
null
,
name
varchar
(
255
)
not
null
,
aliases
varchar
(
255
),
PRIMARY
KEY
(
species_id
),
KEY
taxonomy_idx
(
taxonomy_id
),
KEY
name_idx
(
name
)
);
################################################################################
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment