Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl
Commits
6b4d4460
Commit
6b4d4460
authored
13 years ago
by
Monika Komorowska
Browse files
Options
Downloads
Patches
Plain Diff
get mapping between vega stable ids and ensembl gene ids from the core db
parent
de4242fe
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
misc-scripts/alt_alleles/alt_alleles.pl
+61
-98
61 additions, 98 deletions
misc-scripts/alt_alleles/alt_alleles.pl
with
61 additions
and
98 deletions
misc-scripts/alt_alleles/alt_alleles.pl
+
61
−
98
View file @
6b4d4460
...
...
@@ -38,44 +38,75 @@ if(!defined($cdbname)){
$cdbname
=
"
homo_sapiens_core_
"
.
$api_version
.
"
_37
";
}
#
# Connect to the core database
#
my
$core_dba
=
Bio::EnsEMBL::DBSQL::
DBAdaptor
->
new
(
-
host
=>
$chost
||
'
ens-staging1
',
-
user
=>
$cuser
||
'
ensadmin
',
-
pass
=>
$cpass
,
-
species
=>
"
test
",
-
dbname
=>
$cdbname
||
"
homo_sapiens_core_63_37
");
#
# get ensembl gene ids and vega stable ids from the core database
#
my
%vega_to_ens_id
;
my
(
$vega_stable_id
,
$gene_id
);
my
$sth
=
$core_dba
->
dbc
->
prepare
("
select ensembl_id, display_label from object_xref join xref using(xref_id) join external_db using(external_db_id) where db_name = 'OTTG' and ensembl_object_type = 'Gene'
");
$sth
->
execute
;
$sth
->
bind_columns
(
\
$gene_id
,
\
$vega_stable_id
);
while
(
$sth
->
fetch
){
$vega_to_ens_id
{
$vega_stable_id
}
=
$gene_id
;
}
$sth
->
finish
;
my
$vega_dba
=
Bio::EnsEMBL::DBSQL::
DBAdaptor
->
new
(
-
host
=>
$vhost
||
'
ens-staging1
',
-
user
=>
$vuser
||
'
ensro
',
-
port
=>
$vport
||
3306
,
-
dbname
=>
$vdbname
||
"
homo_sapiens_vega_63_37
");
#
#
the magic
SQL to get
th
e data from vega
# SQL to get
alt_allel
e data from vega
#
my
$sql
=
(
<<
EOS
);
select
aa
.
alt_allele_id
,
g
.
stable_id
,
x2
.
display_label
from
alt_allele
aa
,
xref
x1
,
gene
g
left
join
object_xref
ox
on
g
.
gene_id
=
ox
.
ensembl_id
left
join
xref
x2
on
ox
.
xref_id
=
x2
.
xref_id
left
join
external_db
edb
on
x2
.
external_db_id
=
edb
.
external_db_id
select
aa
.
alt_allele_id
,
g
.
stable_id
from
alt_allele
aa
,
gene
g
where
aa
.
gene_id
=
g
.
gene_id
and
g
.
display_xref_id
=
x1
.
xref_id
and
ox
.
ensembl_object_type
=
'
Gene
'
and
edb
.
db_name
=
'
ENSG
';
EOS
#
# Store data in a hash where the key is the alt_id and the
stabl
e ids
# Store data in a hash where the key is the alt_id and the
ensembl gen
e ids
# stored in an anonymous array (value of the hash).
#
my
$sth
=
$vega_dba
->
dbc
->
prepare
(
$sql
);
$sth
->
execute
;
my
(
$alt_id
,
$vega_stable_id
,
$core_stable_id
);
my
%alt_to_
stabl
e
;
$sth
->
bind_columns
(
\
$alt_id
,
\
$vega_stable_id
,
\
$core_stable_id
);
my
(
$alt_id
,
$vega_stable_id
);
my
%alt_to_
gen
e
;
$sth
->
bind_columns
(
\
$alt_id
,
\
$vega_stable_id
);
my
$max_alt_id
=
0
;
my
%no_gene_id
;
while
(
$sth
->
fetch
()){
push
@
{
$alt_to_stable
{
$alt_id
}},
$core_stable_id
;
my
$gene_id
=
$vega_to_ens_id
{
$vega_stable_id
};
if
(
defined
(
$gene_id
)
)
{
push
@
{
$alt_to_gene
{
$alt_id
}},
$gene_id
;
}
else
{
push
@
{
$no_gene_id
{
$alt_id
}},
$vega_stable_id
;
print
STDERR
"
no ensembl gene_id found for vega stable id
$vega_stable_id
in core
\n
";
}
if
(
$alt_id
>
$max_alt_id
){
$max_alt_id
=
$alt_id
;
}
...
...
@@ -84,62 +115,6 @@ $sth->finish;
$max_alt_id
+=
1000
;
#
# Test case for invalid stable id. (Delete after testing
#
push
@
{
$alt_to_stable
{
9999
}},
"
INVALID1
";
# initial testing to make sure sensible info was obtained.
#foreach my $key (keys %alt_to_stable){
# my @arr = @{$alt_to_stable{$key}};
# if(scalar(@arr) > 1){
# print $key;
# foreach my $stable_id (@arr){
# print "\t".$stable_id;
# }
# print "\n";
# }
# else{
# print "$key has only one ensembl stable id ".$arr[0]." so ignoring\n";
# }
#}
#EXAMPLE:
#54 ENSG00000206285 ENSG00000236802 ENSG00000226936 ENSG00000235155 ENSG00000235863
#
# Connect to the core database to store the data in.
#
my
$core_dba
=
Bio::EnsEMBL::DBSQL::
DBAdaptor
->
new
(
-
host
=>
$chost
||
'
ens-staging1
',
-
user
=>
$cuser
||
'
ensadmin
',
-
pass
=>
$cpass
,
-
species
=>
"
test
",
-
dbname
=>
$cdbname
||
"
homo_sapiens_core_63_37
");
#
# for each stable_id look it up in the gene_stable_id table and get the gene_id
# store this in a hash.
my
%stable_id_to_gene_id
;
$sth
=
$core_dba
->
dbc
->
prepare
("
Select stable_id, gene_id from gene
");
$sth
->
execute
;
my
(
$gene_id
);
$sth
->
bind_columns
(
\
$core_stable_id
,
\
$gene_id
);
while
(
$sth
->
fetch
){
$stable_id_to_gene_id
{
$core_stable_id
}
=
$gene_id
;
}
$sth
->
finish
;
my
%non_reference
;
# $non_reference{$gene_id} = 1;
...
...
@@ -160,7 +135,6 @@ while($sth->fetch()){
}
#
# Delete the old data
#
...
...
@@ -170,28 +144,17 @@ $sth->execute;
#
# Check that all stable_ids are valid.
# NOTE: if one is invalid then if the alt_allele only had two members
# then with the reduction of one member will invalidate the alt_allele.
# Check that all alt_alleles are valid.
# NOTE: if an alt allele has only one gene_id delete it from the hash
#
foreach
my
$key
(
keys
%alt_to_stable
){
my
@arr
=
@
{
$alt_to_stable
{
$key
}};
my
@arr2
;
if
(
scalar
(
@arr
)
>
1
){
foreach
my
$stable_id
(
@arr
){
if
(
defined
(
$stable_id_to_gene_id
{
$stable_id
})){
push
@arr2
,
$stable_id
;
}
else
{
print
STDERR
"
$stable_id
in vega database but not in core
\n
";
}
}
}
else
{
print
"
$key
has only one ensembl stable id
"
.
join
("
,
",
@arr
)
.
"
so ignoring
\n
";
foreach
my
$key
(
keys
%alt_to_gene
){
my
@arr
=
@
{
$alt_to_gene
{
$key
}};
if
(
scalar
(
@arr
)
<=
1
){
delete
$alt_to_gene
{
$key
};
my
@unmapped_vega
=
$no_gene_id
{
$key
};
print
STDERR
"
ignoring alt allele
$key
: unmapped vega stable id(s):
"
.
join
("
,
",
@unmapped_vega
)
.
"
\n
";
}
$alt_to_stable
{
$key
}
=
\
@arr2
;
}
#
...
...
@@ -210,20 +173,20 @@ my $alt_id_count = 0;
#
#my %gene_id_to_alt_id;
foreach
my
$key
(
keys
%alt_to_
stabl
e
){
my
@arr
=
@
{
$alt_to_
stabl
e
{
$key
}};
foreach
my
$key
(
keys
%alt_to_
gen
e
){
my
@arr
=
@
{
$alt_to_
gen
e
{
$key
}};
if
(
scalar
(
@arr
)
>
1
){
my
$sanity_check
=
0
;
# should be 1 refernce gene only if 0 or more than 1 we may have a problem
foreach
my
$
stabl
e_id
(
@arr
){
foreach
my
$
gen
e_id
(
@arr
){
my
$ref
=
1
;
if
(
defined
(
$non_reference
{
$
stable_id_to_gene_id
{
$stable_id
}
})){
if
(
defined
(
$non_reference
{
$
gene_id
})){
$ref
=
0
;
}
else
{
$sanity_check
++
;
}
$insert_sth
->
execute
(
$key
,
$
stable_id_to_gene_id
{
$stable_id
}
,
$ref
);
# $gene_id_to_alt_id{$
stable_id_to_gene_id{$stable_id}
}= $key;
$insert_sth
->
execute
(
$key
,
$
gene_id
,
$ref
);
# $gene_id_to_alt_id{$
gene_id
}= $key;
$count
++
;
}
if
(
$sanity_check
!=
1
){
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment