Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl
Commits
1d4e365f
Commit
1d4e365f
authored
19 years ago
by
Glenn Proctor
Browse files
Options
Downloads
Patches
Plain Diff
Speeded up synonym dumping.
Fixed bug causing some gene display_xrefs to be incorrectly assigned.
parent
d8fd649b
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm
+28
-35
28 additions, 35 deletions
misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm
with
28 additions
and
35 deletions
misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm
+
28
−
35
View file @
1d4e365f
...
@@ -1235,17 +1235,6 @@ sub dump_core_xrefs {
...
@@ -1235,17 +1235,6 @@ sub dump_core_xrefs {
# build cache of source id -> external_db id; note %source_to_external_db is global
# build cache of source id -> external_db id; note %source_to_external_db is global
%source_to_external_db
=
$self
->
map_source_to_external_db
();
%source_to_external_db
=
$self
->
map_source_to_external_db
();
my
$sql_syn
=
"
select s.xref_id, s.synonym
";
$sql_syn
.=
"
from xref x, synonym s, dependent_xref d
";
$sql_syn
.=
"
where d.dependent_xref_id = x.xref_id and
";
$sql_syn
.=
"
s.xref_id = x.xref_id and
";
$sql_syn
.=
"
x.xref_id = ?
";
my
$dep_syn_sth
=
$self
->
xref
->
dbc
->
prepare
(
$sql_syn
);
# execute several queries with a max of 200 entries in each IN clause - more efficient
# execute several queries with a max of 200 entries in each IN clause - more efficient
my
$batch_size
=
200
;
my
$batch_size
=
200
;
...
@@ -1318,16 +1307,6 @@ sub dump_core_xrefs {
...
@@ -1318,16 +1307,6 @@ sub dump_core_xrefs {
$xrefs_written
{
$xref_id
}
=
1
;
$xrefs_written
{
$xref_id
}
=
1
;
$source_ids
{
$source_id
}
=
$source_id
;
$source_ids
{
$source_id
}
=
$source_id
;
}
}
#dump synonyms for dependent_xref
$dep_syn_sth
->
execute
(
$xref_id
);
my
(
$syn_xref
,
$syn
);
$dep_syn_sth
->
bind_columns
(
\
$syn_xref
,
\
$syn
);
while
(
$dep_syn_sth
->
fetch
())
{
print
EXTERNAL_SYNONYM
(
$syn_xref
+
$xref_id_offset
)
.
"
\t
"
.
$syn
.
"
\n
";
}
# create an object_xref linking this (dependent) xref with any objects it maps to
# create an object_xref linking this (dependent) xref with any objects it maps to
# write to file and add to object_xref_mappings
# write to file and add to object_xref_mappings
...
@@ -1359,22 +1338,36 @@ sub dump_core_xrefs {
...
@@ -1359,22 +1338,36 @@ sub dump_core_xrefs {
}
}
}
}
# Now get the synonyms for each of these xrefs and write them to the external_synonym table
#print "source_ids: " . join(" ", keys(%source_ids)) . "\n";
$sql
=
"
SELECT DISTINCT xref_id, synonym FROM synonym WHERE xref_id
$id_str
";
}
# while @xref_ids
my
$syn_sth
=
$self
->
xref
->
dbc
->
prepare
(
$sql
);
# Dump any synonyms for xrefs we've written
$syn_sth
->
execute
();
# Do one big query to get a list of all the synonyms; note each xref may have
# more than one synonym so they are stored in a hash of lists
print
"
Dumping synonyms
\n
";
my
$syn_count
;
my
%synonyms
;
my
$syn_sth
=
$self
->
xref
->
dbc
->
prepare
("
SELECT xref_id, synonym FROM synonym
");
$syn_sth
->
execute
();
$syn_sth
->
bind_columns
(
\
$xref_id
,
\
$accession
);
my
(
$xref_id
,
$synonym
);
while
(
$syn_sth
->
fetch
())
{
$syn_sth
->
bind_columns
(
\
$xref_id
,
\
$synonym
);
while
(
$syn_sth
->
fetch
())
{
print
EXTERNAL_SYNONYM
(
$xref_id
+
$xref_id_offset
)
.
"
\t
"
.
$accession
.
"
\n
"
;
push
@
{
$synonyms
{
$xref_id
}},
$synonym
;
}
}
#print "source_ids: " . join(" ", keys(%source_ids)) . "\n";
# Now write the ones we want to the file
foreach
my
$xref_id
(
keys
%synonyms
)
{
foreach
my
$syn
(
@
{
$synonyms
{
$xref_id
}})
{
print
EXTERNAL_SYNONYM
(
$xref_id
+
$xref_id_offset
)
.
"
\t
"
.
$syn
.
"
\n
";
$syn_count
++
;
}
}
}
# while @xref_ids
print
"
Wrote
$syn_count
synonyms
\n
";
close
(
XREF
);
close
(
XREF
);
close
(
OBJECT_XREF
);
close
(
OBJECT_XREF
);
...
@@ -1649,15 +1642,15 @@ sub build_gene_display_xrefs {
...
@@ -1649,15 +1642,15 @@ sub build_gene_display_xrefs {
$trans_xref
++
;
$trans_xref
++
;
}
}
my
(
$xref_id
,
$priority
)
=
split
(
/\|/
,
$transcript_display_xrefs
->
{
$transcript_id
});
my
(
$xref_id
,
$priority
)
=
split
(
/\|/
,
$transcript_display_xrefs
->
{
$transcript_id
});
#print "gene $gene_id orig:" . $transcript_display_xrefs->{$transcript_id} . " xref id: " . $xref_id . " pri " . $priority . "\n";
# 2 separate if clauses to avoid having to fetch transcripts unnecessarily
# 2 separate if clauses to avoid having to fetch transcripts unnecessarily
if
((
$priority
lt
$best_xref_priority_idx
))
{
if
((
$priority
<
$best_xref_priority_idx
))
{
$best_xref_priority_idx
=
$priority
;
$best_xref_priority_idx
=
$priority
;
$best_xref
=
$xref_id
;
$best_xref
=
$xref_id
;
}
elsif
(
$priority
eq
$best_xref_priority_idx
)
{
}
elsif
(
$priority
==
$best_xref_priority_idx
)
{
# compare transcript lengths and use longest
# compare transcript lengths and use longest
my
$transcript
=
$ta
->
fetch_by_dbID
(
$transcript_id
);
my
$transcript
=
$ta
->
fetch_by_dbID
(
$transcript_id
);
...
@@ -2062,7 +2055,7 @@ sub compare_xref_descriptions {
...
@@ -2062,7 +2055,7 @@ sub compare_xref_descriptions {
my
$query_identity_a
=
$object_xref_identities
{
$key_a
}
->
{
$a
}
->
{"
query_identity
"};
my
$query_identity_a
=
$object_xref_identities
{
$key_a
}
->
{
$a
}
->
{"
query_identity
"};
my
$query_identity_b
=
$object_xref_identities
{
$key_b
}
->
{
$b
}
->
{"
query_identity
"};
my
$query_identity_b
=
$object_xref_identities
{
$key_b
}
->
{
$b
}
->
{"
query_identity
"};
print
"
gene 78163
"
.
$xref_accessions
{
$a
}
.
"
key a
$key_a
qia
$query_identity_a
"
.
$xref_accessions
{
$b
}
.
"
key b
$key_b
qib
$query_identity_b
\n
"
if
(
$gene_id
==
78163
);
#
print "gene 78163 " . $xref_accessions{$a} . " key a $key_a qia $query_identity_a " . $xref_accessions{$b} . " key b $key_b qib $query_identity_b \n" if ($gene_id==78163);
return
(
$query_identity_a
<=>
$query_identity_b
)
if
(
$query_identity_a
!=
$query_identity_b
);
return
(
$query_identity_a
<=>
$query_identity_b
)
if
(
$query_identity_a
!=
$query_identity_b
);
my
$target_identity_a
=
$object_xref_identities
{
$key_a
}
->
{
$a
}
->
{"
target_identity
"};
my
$target_identity_a
=
$object_xref_identities
{
$key_a
}
->
{
$a
}
->
{"
target_identity
"};
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment