Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ensembl-gh-mirror
ensembl
Commits
914275ae
Commit
914275ae
authored
Nov 15, 2011
by
Monika Komorowska
Browse files
Changes to UniProtDirectParser - it can be used by any species
parent
50fa6136
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
95 additions
and
15 deletions
+95
-15
misc-scripts/xref_mapping/XrefParser/UniProtDirectParser.pm
misc-scripts/xref_mapping/XrefParser/UniProtDirectParser.pm
+51
-11
misc-scripts/xref_mapping/xref_config.ini
misc-scripts/xref_mapping/xref_config.ini
+44
-4
No files found.
misc-scripts/xref_mapping/XrefParser/UniProtDirectParser.pm
View file @
914275ae
...
...
@@ -6,6 +6,7 @@ use Carp;
use
DBI
;
use
base
qw( XrefParser::BaseParser )
;
use
XrefParser::
Database
;
# Parse file of Uniprot records and assign direct xrefs
# All assumed to be linked to translation
...
...
@@ -13,30 +14,67 @@ use base qw( XrefParser::BaseParser );
# --------------------------------------------------------------------------------
sub
run
{
sub
run
_script
{
my
(
$self
,
$ref_arg
)
=
@_
;
my
$source_id
=
$ref_arg
->
{
source_id
};
my
$species_id
=
$ref_arg
->
{
species_id
};
my
$file
s
=
$ref_arg
->
{
file
s
};
my
$file
=
$ref_arg
->
{
file
};
my
$verbose
=
$ref_arg
->
{
verbose
};
if
((
!
defined
$source_id
)
or
(
!
defined
$species_id
)
or
(
!
defined
$file
s
)
){
croak
"
Need to pass source_id, species_id and file
s
as pairs
";
if
((
!
defined
$source_id
)
or
(
!
defined
$species_id
)
or
(
!
defined
$file
)
){
croak
"
Need to pass source_id, species_id and file as pairs
";
}
$verbose
|=
0
;
my
%prefix
=
(
9606
=>
"
ENSP0
",
10090
=>
"
ENSMUSP0
",
10116
=>
"
ENSRNOP0
");
my
$user
=
"
ensro
";
my
$host
;
my
$port
;
my
$dbname
;
my
$wget
=
"";
if
(
!
defined
(
$prefix
{
$species_id
})){
print
"
No prefix known for this species
$species_id
???
\n
";
if
(
$file
=~
/host[=][>](\S+?)[,]/
){
$host
=
$
1
;
}
if
(
$file
=~
/port[=][>](\S+?)[,]/
){
$port
=
$
1
;
}
if
(
$file
=~
/dbname[=][>](\S+?)[,]/
){
$dbname
=
$
1
;
}
if
(
$file
=~
/wget[=][>](\S+?)[,]/
){
$wget
=
$
1
;
}
my
$ua
=
LWP::
UserAgent
->
new
();
$ua
->
timeout
(
10
);
$ua
->
env_proxy
();
my
$response
=
$ua
->
get
(
$wget
);
if
(
!
$response
->
is_success
()
)
{
warn
(
$response
->
status_line
);
return
1
;
}
my
$filename
=
@
{
$files
}[
0
];
my
$production_db
=
XrefParser::
Database
->
new
({
host
=>
$host
,
port
=>
$port
,
user
=>
$user
,
dbname
=>
$dbname
,
pass
=>
""});
my
$prod_dbi
=
$production_db
->
dbi
();
my
$file_io
=
$self
->
get_filehandle
(
$filename
);
if
(
!
defined
(
$file_io
)
)
{
if
(
!
defined
(
$prod_dbi
)){
return
1
;
}
my
(
$prefix
)
=
$prod_dbi
->
selectrow_array
("
SELECT species_prefix FROM species WHERE taxon =
$species_id
");
my
%prefix
=
(
$species_id
=>
$prefix
);
if
(
!
defined
(
$prefix
{
$species_id
})){
print
"
No prefix known for this species
$species_id
???
\n
";
return
1
;
}
...
...
@@ -46,7 +84,9 @@ sub run {
my
%prot2ensembl
;
my
$count
=
0
;
while
(
defined
(
my
$line
=
$file_io
->
getline
()
)
)
{
my
@lines
=
split
(
/\n/
,
$response
->
content
);
foreach
my
$line
(
@lines
){
my
(
$prot
,
$ens
)
=
split
/\s+/
,
$line
;
if
(
$ens
=~
/$prefix{$species_id}/
){
push
@
{
$prot2ensembl
{
$prot
}},
$ens
;
...
...
misc-scripts/xref_mapping/xref_config.ini
View file @
914275ae
...
...
@@ -2598,7 +2598,7 @@ data_uri = ftp://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/unipro
# swissprot entry
[source Uniprot/SWISSPROT::DIRECT]
# Used by homo_sapiens
name
=
Uniprot/SWISSPROT
download
=
Y
order
=
22
...
...
@@ -2607,8 +2607,7 @@ prio_descr = uniprot_mapped
parser
=
UniProtDirectParser
dependent
=
Unprot/SWISSPROT
release_uri
=
data_uri
=
ftp://ftp.ebi.ac.uk/pub/contrib/xrefs/ens-sp.map
data_uri
=
script:wget=>ftp://ftp.ebi.ac.uk/pub/contrib/xrefs/ens-sp.map,host=>ens-staging1,dbname=>ensembl_production,
[source Uniprot/SWISSPROT::MULTI-predicted]
...
...
@@ -3563,6 +3562,7 @@ source = RefSeq_dna::MULTI-vertebrate_mammalian
source
=
RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
[species anopheles_gambiae]
taxonomy_id
=
7165
...
...
@@ -3945,6 +3945,7 @@ source = RefSeq_peptide::bos_taurus
source
=
UniGene::bos_taurus
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
source
=
goslim_goa::MULTI
...
...
@@ -3976,6 +3977,7 @@ source = RefSeq_dna::MULTI-vertebrate_mammalian
source
=
RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
[species canis_familiaris]
taxonomy_id
=
9615
...
...
@@ -3990,6 +3992,7 @@ source = RefSeq_peptide::canis_familiaris
source
=
UniGene::canis_familiaris
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species ciona_intestinalis]
...
...
@@ -4005,6 +4008,7 @@ source = RefSeq_peptide::MULTI-vertebrate_other
source
=
UniGene::ciona_intestinalis
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
cint_aniseed_v1::ciona_intestinalis
source
=
cint_aniseed_v2::ciona_intestinalis
source
=
cint_jgi_v1::ciona_intestinalis
...
...
@@ -4025,6 +4029,7 @@ source = RefSeq_peptide::MULTI-vertebrate_other
source
=
UniGene::ciona_savignyi
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species culex_quinquefasciatus]
...
...
@@ -4056,6 +4061,7 @@ source = RefSeq_peptide::danio_rerio
source
=
UniGene::danio_rerio
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ZFIN_ID::danio_rerio#01
source
=
ZFIN_ID::danio_rerio#02
source
=
ZFIN_ID::danio_rerio#03
...
...
@@ -4072,6 +4078,7 @@ source = InterproGO::MULTI
source
=
Interpro::MULTI
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species dictyostelium_discoideum]
...
...
@@ -4328,6 +4335,7 @@ source = InterproGO::MULTI
source
=
Interpro::MULTI
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species erinaceus_europaeus]
...
...
@@ -4343,6 +4351,7 @@ source = RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
UniGene::MULTI
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species equus_caballus]
...
...
@@ -4358,6 +4367,7 @@ source = RefSeq_peptide::equus_caballus
source
=
UniGene::equus_caballus
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species felis_catus]
...
...
@@ -4370,6 +4380,7 @@ source = InterproGO::MULTI
source
=
Interpro::MULTI
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species gallus_gallus]
...
...
@@ -4386,6 +4397,7 @@ source = RefSeq_peptide::gallus_gallus
source
=
UniGene::gallus_gallus
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species gasterosteus_aculeatus]
...
...
@@ -4401,6 +4413,7 @@ source = RefSeq_dna::gasterosteus_aculeatus
source
=
RefSeq_peptide::MULTI-vertebrate_other
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species gorilla_gorilla]
...
...
@@ -4415,6 +4428,7 @@ source = RefSeq_dna::MULTI-vertebrate_mammalian
source
=
RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
...
...
@@ -4470,6 +4484,7 @@ source = InterproGO::MULTI
source
=
Interpro::MULTI
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
source
=
RefSeq_dna::MULTI-vertebrate_mammalian
source
=
RefSeq_peptide::MULTI-vertebrate_mammalian
...
...
@@ -4486,6 +4501,7 @@ source = RefSeq_dna::MULTI-vertebrate_mammalian
source
=
RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
UniGene::macaca_mulatta
source
=
ncRNA::MULTI
...
...
@@ -4519,6 +4535,7 @@ source = RefSeq_dna::MULTI-vertebrate_mammalian
source
=
RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
UniGene::monodelphis_domestica
source
=
ncRNA::MULTI
...
...
@@ -4565,6 +4582,7 @@ source = RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
UniGene::myotis_lucifugus
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species ochotona_princeps]
...
...
@@ -4579,6 +4597,7 @@ source = RefSeq_dna::MULTI-vertebrate_mammalian
source
=
RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species ornithorhynchus_anatinus]
...
...
@@ -4592,6 +4611,7 @@ source = RefSeq_dna::MULTI-vertebrate_mammalian
source
=
RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
Oxford_FGU_Oa_tscript::ornithorhynchus_anatinus
source
=
Oxford_FGU_Oa_gene::ornithorhynchus_anatinus
source
=
Platypus_olfactory_receptor::ornithorhynchus_anatinus
...
...
@@ -4607,6 +4627,7 @@ source = InterproGO::MULTI
source
=
Interpro::MULTI
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species oryzias_latipes]
...
...
@@ -4622,6 +4643,7 @@ source = RefSeq_peptide::MULTI-vertebrate_other
source
=
UniGene::MULTI
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
UniProt::protein_id
source
=
Uniprot::EMBL
source
=
ncRNA::MULTI
...
...
@@ -4640,6 +4662,7 @@ source = RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
UniGene::MULTI
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
Uniprot::EMBL
source
=
UniProt::protein_id
...
...
@@ -4669,6 +4692,7 @@ source = Interpro::MULTI
source
=
UniGene::aedes_aegypti
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
[species pongo_abelii]
taxonomy_id
=
9601
...
...
@@ -4683,6 +4707,7 @@ source = RefSeq_peptide::pongo_abelii
source
=
UniGene::MULTI
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species procavia_capensis]
...
...
@@ -4697,6 +4722,7 @@ source = RefSeq_dna::MULTI-vertebrate_mammalian
source
=
RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
...
...
@@ -4712,6 +4738,7 @@ source = RefSeq_dna::MULTI-vertebrate_mammalian
source
=
RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species rattus_norvegicus]
...
...
@@ -4747,7 +4774,7 @@ source = RefSeq_peptide::MULTI-fungi
source
=
SGD::saccharomyces_cerevisiae
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
[species schizosaccharomyces_pombe]
taxonomy_id
=
4896
...
...
@@ -4778,6 +4805,7 @@ source = RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
UniGene::MULTI
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
Uniprot::EMBL
source
=
UniProt::protein_id
...
...
@@ -4795,6 +4823,7 @@ source = RefSeq_dna::MULTI-complete
source
=
RefSeq_peptide::MULTI-complete
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::EG
source
=
misc_EG::EG
...
...
@@ -4908,6 +4937,7 @@ source = RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
UniGene::MULTI
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species taeniopygia_guttata]
...
...
@@ -4923,6 +4953,7 @@ source = RefSeq_peptide::MULTI-vertebrate_other
source
=
UniGene::taeniopygia_guttata
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species takifugu_rubripes]
...
...
@@ -4938,6 +4969,7 @@ source = RefSeq_peptide::MULTI-vertebrate_other
source
=
UniGene::takifugu_rubripes
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species tarsius_syrichta]
...
...
@@ -4952,6 +4984,7 @@ source = RefSeq_dna::MULTI-vertebrate_mammalian
source
=
RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species tetraodon_nigroviridis]
...
...
@@ -4966,6 +4999,7 @@ source = RefSeq_dna::MULTI-vertebrate_other
source
=
RefSeq_peptide::MULTI-vertebrate_other
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
source
=
Genoscope_pred_gene::tetraodon_nigroviridis
source
=
Genoscope_pred_transcript::tetraodon_nigroviridis
...
...
@@ -4986,6 +5020,7 @@ source = RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
UniGene::MULTI
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species tursiops_truncatus]
...
...
@@ -5000,6 +5035,7 @@ source = RefSeq_dna::MULTI-vertebrate_mammalian
source
=
RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species vicugna_pacos]
...
...
@@ -5014,6 +5050,7 @@ source = RefSeq_dna::MULTI-vertebrate_mammalian
source
=
RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species choloepus_hoffmanni]
...
...
@@ -5028,6 +5065,7 @@ source = RefSeq_dna::MULTI-vertebrate_mammalian
source
=
RefSeq_peptide::MULTI-vertebrate_mammalian
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
[species xenopus_tropicalis]
...
...
@@ -5043,6 +5081,7 @@ source = RefSeq_peptide::MULTI-vertebrate_other
source
=
UniGene::xenopus_tropicalis
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
Xenopus_Jamboree::xenopus_tropicalis
source
=
ncRNA::MULTI
...
...
@@ -5259,6 +5298,7 @@ source = RefSeq_peptide::sus_scrofa
source
=
UniGene::sus_scrofa
source
=
Uniprot/SPTREMBL::MULTI
source
=
Uniprot/SWISSPROT::MULTI
source
=
Uniprot/SWISSPROT::DIRECT
source
=
ncRNA::MULTI
source
=
goslim_goa::MULTI
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment