Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ensembl-gh-mirror
ensembl
Commits
35f01da3
Commit
35f01da3
authored
Dec 16, 2010
by
Ian Longden
Browse files
Uniprot_genename source added
parent
96dfacc7
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
70 additions
and
5 deletions
+70
-5
misc-scripts/xref_mapping/XrefParser/UniProtParser.pm
misc-scripts/xref_mapping/XrefParser/UniProtParser.pm
+59
-5
misc-scripts/xref_mapping/xref_config.ini
misc-scripts/xref_mapping/xref_config.ini
+11
-0
No files found.
misc-scripts/xref_mapping/XrefParser/UniProtParser.pm
View file @
35f01da3
...
...
@@ -35,6 +35,7 @@ if (!defined(caller())) {
# --------------------------------------------------------------------------------
sub
run
{
my
$self
=
shift
if
(
defined
(
caller
(
1
)));
...
...
@@ -167,11 +168,7 @@ sub create_xrefs {
my
$num_sptr_pred
=
0
;
my
%dependent_sources
=
$self
->
get_dependent_xref_sources
();
# name-id hash
# get from HGNC file
# if(defined($dependent_sources{'HGNC'})){
# $dependent_sources{'HGNC'} = XrefParser::BaseParser->get_source_id_for_source_name("HGNC","uniprot");
# }
my
%GeneNameSynonym
;
if
(
defined
(
$dependent_sources
{'
MGI
'})){
$dependent_sources
{'
MGI
'}
=
XrefParser::
BaseParser
->
get_source_id_for_source_name
("
MGI
","
uniprot
");
...
...
@@ -293,8 +290,11 @@ sub create_xrefs {
foreach
my
$line
(
@all_lines
)
{
my
(
$accessions_only
)
=
$line
=~
/^AC\s+(.+)/
;
push
(
@accessions
,
(
split
/;\s*/
,
$accessions_only
))
if
(
$accessions_only
);
}
$xref
->
{
INFO_TYPE
}
=
"
SEQUENCE_MATCH
";
$xref
->
{
ACCESSION
}
=
$accessions
[
0
];
for
(
my
$a
=
1
;
$a
<=
$#accessions
;
$a
++
)
{
...
...
@@ -393,6 +393,40 @@ sub create_xrefs {
$xref
->
{
SEQUENCE
}
=
$parsed_seq
;
#print "Adding " . $xref->{ACCESSION} . " " . $xref->{LABEL} ."\n";
my
(
$gns
)
=
$_
=~
/(GN\s+Name.+)/
;
# /s allows . to match newline
my
@gn_lines
=
();
if
(
defined
$gns
)
{
@gn_lines
=
split
/\n/
,
$gns
}
foreach
my
$gn
(
@gn_lines
){
my
$gene_name
=
undef
;
my
%depe
;
if
(
$gn
=~
/Name=(\S+);/
){
$depe
{
ACCESSION
}
=
uc
(
$
1
);
$gene_name
=
$depe
{
ACCESSION
};
$depe
{
SOURCE_NAME
}
=
"
Uniprot_genename
";
$depe
{
SOURCE_ID
}
=
$dependent_sources
{"
Uniprot_genename
"};
$depe
{
LINKAGE_SOURCE_ID
}
=
$xref
->
{
SOURCE_ID
};
push
@
{
$xref
->
{
DEPENDENT_XREFS
}},
\
%depe
;
$dependent_xrefs
{"
Uniprot_genename
"}
++
;
my
@syn
;
if
(
$gn
=~
/Synonyms=([^;]+);/
){
my
$syn
=
$
1
;
$syn
=~
s/\s+//g
;
@syn
=
split
(
/,/
,
$syn
);
foreach
my
$ent
(
@syn
){
$GeneNameSynonym
{
$gene_name
}{
uc
(
$ent
)}
=
1
;
# print "$gene_name\t$ent\n";
}
}
}
}
my
(
$deps
)
=
$_
=~
/(DR\s+.+)/s
;
# /s allows . to match newline
my
@dep_lines
=
();
if
(
defined
$deps
)
{
@dep_lines
=
split
/\n/
,
$deps
}
# dependent xrefs - only store those that are from sources listed in the source table
my
(
$deps
)
=
$_
=~
/(DR\s+.+)/s
;
# /s allows . to match newline
...
...
@@ -535,11 +569,31 @@ sub create_xrefs {
print
"
Read
$num_sp
SwissProt xrefs and
$num_sptr
SPTrEMBL xrefs from
$file
\n
"
if
(
$verbose
);
print
"
Found
$num_sp_pred
predicted SwissProt xrefs and
$num_sptr_pred
predicted SPTrEMBL xrefs
\n
"
if
((
$num_sp_pred
>
0
||
$num_sptr_pred
>
0
)
and
$verbose
);
# my $kount=0;
my
$genename_source_id
=
$dependent_sources
{"
Uniprot_genename
"};
foreach
my
$namekey
(
keys
%GeneNameSynonym
){
#add xref
my
$xref_id
=
$self
->
add_xref
(
$namekey
,"",
$namekey
,
"",
$genename_source_id
,
$species_id
,"
DEPENDENT
");
# $kount++;
# print $namekey."\t";
foreach
my
$synkey
(
keys
%
{
$GeneNameSynonym
{
$namekey
}}){
#add synonyms for xref
$self
->
add_synonym
(
$xref_id
,
$synkey
);
# print "$synkey, ";
}
# print "\n";
}
# print "$kount gene anmes added\n";
print
"
Added the following dependent xrefs:-
\n
"
if
(
$verbose
);
foreach
my
$key
(
keys
%dependent_xrefs
){
print
$key
.
"
\t
"
.
$dependent_xrefs
{
$key
}
.
"
\n
"
if
(
$verbose
);
}
return
\
@xrefs
;
#TODO - currently include records from other species - filter on OX line??
...
...
misc-scripts/xref_mapping/xref_config.ini
View file @
35f01da3
...
...
@@ -2373,6 +2373,17 @@ prio_descr =
parser
=
UniProtParser
release_uri
=
[source Uniprot_genename]
# Special source used in UniProtParser foir gene names..
name
=
Uniprot_genename
download
=
N
order
=
20
priority
=
1
prio_descr
=
parser
=
UniProtParser
release_uri
=
data_uri
=
[source Uniprot/SWISSPROT::drosophila_melanogaster]
# Used by drosophila_melanogaster
name
=
Uniprot/SWISSPROT
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment