Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ensembl-gh-mirror
ensembl
Commits
6b4d4460
Commit
6b4d4460
authored
Nov 22, 2011
by
Monika Komorowska
Browse files
get mapping between vega stable ids and ensembl gene ids from the core db
parent
de4242fe
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
61 additions
and
98 deletions
+61
-98
misc-scripts/alt_alleles/alt_alleles.pl
misc-scripts/alt_alleles/alt_alleles.pl
+61
-98
No files found.
misc-scripts/alt_alleles/alt_alleles.pl
View file @
6b4d4460
...
@@ -38,44 +38,75 @@ if(!defined($cdbname)){
...
@@ -38,44 +38,75 @@ if(!defined($cdbname)){
$cdbname
=
"
homo_sapiens_core_
"
.
$api_version
.
"
_37
";
$cdbname
=
"
homo_sapiens_core_
"
.
$api_version
.
"
_37
";
}
}
#
# Connect to the core database
#
my
$core_dba
=
Bio::EnsEMBL::DBSQL::
DBAdaptor
->
new
(
-
host
=>
$chost
||
'
ens-staging1
',
-
user
=>
$cuser
||
'
ensadmin
',
-
pass
=>
$cpass
,
-
species
=>
"
test
",
-
dbname
=>
$cdbname
||
"
homo_sapiens_core_63_37
");
#
# get ensembl gene ids and vega stable ids from the core database
#
my
%vega_to_ens_id
;
my
(
$vega_stable_id
,
$gene_id
);
my
$sth
=
$core_dba
->
dbc
->
prepare
("
select ensembl_id, display_label from object_xref join xref using(xref_id) join external_db using(external_db_id) where db_name = 'OTTG' and ensembl_object_type = 'Gene'
");
$sth
->
execute
;
$sth
->
bind_columns
(
\
$gene_id
,
\
$vega_stable_id
);
while
(
$sth
->
fetch
){
$vega_to_ens_id
{
$vega_stable_id
}
=
$gene_id
;
}
$sth
->
finish
;
my
$vega_dba
=
Bio::EnsEMBL::DBSQL::
DBAdaptor
->
new
(
-
host
=>
$vhost
||
'
ens-staging1
',
my
$vega_dba
=
Bio::EnsEMBL::DBSQL::
DBAdaptor
->
new
(
-
host
=>
$vhost
||
'
ens-staging1
',
-
user
=>
$vuser
||
'
ensro
',
-
user
=>
$vuser
||
'
ensro
',
-
port
=>
$vport
||
3306
,
-
port
=>
$vport
||
3306
,
-
dbname
=>
$vdbname
||
"
homo_sapiens_vega_63_37
");
-
dbname
=>
$vdbname
||
"
homo_sapiens_vega_63_37
");
#
#
#
the magic
SQL to get
th
e data from vega
# SQL to get
alt_allel
e data from vega
#
#
my
$sql
=
(
<<
EOS
);
my
$sql
=
(
<<
EOS
);
select
aa
.
alt_allele_id
,
g
.
stable_id
,
x2
.
display_label
select
aa
.
alt_allele_id
,
g
.
stable_id
from
alt_allele
aa
,
xref
x1
,
gene
g
from
alt_allele
aa
,
gene
g
left
join
object_xref
ox
on
g
.
gene_id
=
ox
.
ensembl_id
left
join
xref
x2
on
ox
.
xref_id
=
x2
.
xref_id
left
join
external_db
edb
on
x2
.
external_db_id
=
edb
.
external_db_id
where
aa
.
gene_id
=
g
.
gene_id
where
aa
.
gene_id
=
g
.
gene_id
and
g
.
display_xref_id
=
x1
.
xref_id
and
ox
.
ensembl_object_type
=
'
Gene
'
and
edb
.
db_name
=
'
ENSG
';
EOS
EOS
#
#
# Store data in a hash where the key is the alt_id and the
stabl
e ids
# Store data in a hash where the key is the alt_id and the
ensembl gen
e ids
# stored in an anonymous array (value of the hash).
# stored in an anonymous array (value of the hash).
#
#
my
$sth
=
$vega_dba
->
dbc
->
prepare
(
$sql
);
my
$sth
=
$vega_dba
->
dbc
->
prepare
(
$sql
);
$sth
->
execute
;
$sth
->
execute
;
my
(
$alt_id
,
$vega_stable_id
,
$core_stable_id
);
my
(
$alt_id
,
$vega_stable_id
);
my
%alt_to_
stabl
e
;
my
%alt_to_
gen
e
;
$sth
->
bind_columns
(
\
$alt_id
,
\
$vega_stable_id
,
\
$core_stable_id
);
$sth
->
bind_columns
(
\
$alt_id
,
\
$vega_stable_id
);
my
$max_alt_id
=
0
;
my
$max_alt_id
=
0
;
my
%no_gene_id
;
while
(
$sth
->
fetch
()){
while
(
$sth
->
fetch
()){
push
@
{
$alt_to_stable
{
$alt_id
}},
$core_stable_id
;
my
$gene_id
=
$vega_to_ens_id
{
$vega_stable_id
};
if
(
defined
(
$gene_id
)
)
{
push
@
{
$alt_to_gene
{
$alt_id
}},
$gene_id
;
}
else
{
push
@
{
$no_gene_id
{
$alt_id
}},
$vega_stable_id
;
print
STDERR
"
no ensembl gene_id found for vega stable id
$vega_stable_id
in core
\n
";
}
if
(
$alt_id
>
$max_alt_id
){
if
(
$alt_id
>
$max_alt_id
){
$max_alt_id
=
$alt_id
;
$max_alt_id
=
$alt_id
;
}
}
...
@@ -84,62 +115,6 @@ $sth->finish;
...
@@ -84,62 +115,6 @@ $sth->finish;
$max_alt_id
+=
1000
;
$max_alt_id
+=
1000
;
#
# Test case for invalid stable id. (Delete after testing
#
push
@
{
$alt_to_stable
{
9999
}},
"
INVALID1
";
# initial testing to make sure sensible info was obtained.
#foreach my $key (keys %alt_to_stable){
# my @arr = @{$alt_to_stable{$key}};
# if(scalar(@arr) > 1){
# print $key;
# foreach my $stable_id (@arr){
# print "\t".$stable_id;
# }
# print "\n";
# }
# else{
# print "$key has only one ensembl stable id ".$arr[0]." so ignoring\n";
# }
#}
#EXAMPLE:
#54 ENSG00000206285 ENSG00000236802 ENSG00000226936 ENSG00000235155 ENSG00000235863
#
# Connect to the core database to store the data in.
#
my
$core_dba
=
Bio::EnsEMBL::DBSQL::
DBAdaptor
->
new
(
-
host
=>
$chost
||
'
ens-staging1
',
-
user
=>
$cuser
||
'
ensadmin
',
-
pass
=>
$cpass
,
-
species
=>
"
test
",
-
dbname
=>
$cdbname
||
"
homo_sapiens_core_63_37
");
#
# for each stable_id look it up in the gene_stable_id table and get the gene_id
# store this in a hash.
my
%stable_id_to_gene_id
;
$sth
=
$core_dba
->
dbc
->
prepare
("
Select stable_id, gene_id from gene
");
$sth
->
execute
;
my
(
$gene_id
);
$sth
->
bind_columns
(
\
$core_stable_id
,
\
$gene_id
);
while
(
$sth
->
fetch
){
$stable_id_to_gene_id
{
$core_stable_id
}
=
$gene_id
;
}
$sth
->
finish
;
my
%non_reference
;
# $non_reference{$gene_id} = 1;
my
%non_reference
;
# $non_reference{$gene_id} = 1;
...
@@ -160,7 +135,6 @@ while($sth->fetch()){
...
@@ -160,7 +135,6 @@ while($sth->fetch()){
}
}
#
#
# Delete the old data
# Delete the old data
#
#
...
@@ -170,28 +144,17 @@ $sth->execute;
...
@@ -170,28 +144,17 @@ $sth->execute;
#
#
# Check that all stable_ids are valid.
# Check that all alt_alleles are valid.
# NOTE: if one is invalid then if the alt_allele only had two members
# NOTE: if an alt allele has only one gene_id delete it from the hash
# then with the reduction of one member will invalidate the alt_allele.
#
#
foreach
my
$key
(
keys
%alt_to_stable
){
foreach
my
$key
(
keys
%alt_to_gene
){
my
@arr
=
@
{
$alt_to_stable
{
$key
}};
my
@arr
=
@
{
$alt_to_gene
{
$key
}};
my
@arr2
;
if
(
scalar
(
@arr
)
<=
1
){
if
(
scalar
(
@arr
)
>
1
){
delete
$alt_to_gene
{
$key
};
foreach
my
$stable_id
(
@arr
){
my
@unmapped_vega
=
$no_gene_id
{
$key
};
if
(
defined
(
$stable_id_to_gene_id
{
$stable_id
})){
print
STDERR
"
ignoring alt allele
$key
: unmapped vega stable id(s):
"
.
join
("
,
",
@unmapped_vega
)
.
"
\n
";
push
@arr2
,
$stable_id
;
}
else
{
print
STDERR
"
$stable_id
in vega database but not in core
\n
";
}
}
}
else
{
print
"
$key
has only one ensembl stable id
"
.
join
("
,
",
@arr
)
.
"
so ignoring
\n
";
}
}
$alt_to_stable
{
$key
}
=
\
@arr2
;
}
}
#
#
...
@@ -210,20 +173,20 @@ my $alt_id_count = 0;
...
@@ -210,20 +173,20 @@ my $alt_id_count = 0;
#
#
#my %gene_id_to_alt_id;
#my %gene_id_to_alt_id;
foreach
my
$key
(
keys
%alt_to_
stabl
e
){
foreach
my
$key
(
keys
%alt_to_
gen
e
){
my
@arr
=
@
{
$alt_to_
stabl
e
{
$key
}};
my
@arr
=
@
{
$alt_to_
gen
e
{
$key
}};
if
(
scalar
(
@arr
)
>
1
){
if
(
scalar
(
@arr
)
>
1
){
my
$sanity_check
=
0
;
# should be 1 refernce gene only if 0 or more than 1 we may have a problem
my
$sanity_check
=
0
;
# should be 1 refernce gene only if 0 or more than 1 we may have a problem
foreach
my
$
stabl
e_id
(
@arr
){
foreach
my
$
gen
e_id
(
@arr
){
my
$ref
=
1
;
my
$ref
=
1
;
if
(
defined
(
$non_reference
{
$
stable_id_to_gene_id
{
$stable_id
}
})){
if
(
defined
(
$non_reference
{
$
gene_id
})){
$ref
=
0
;
$ref
=
0
;
}
}
else
{
else
{
$sanity_check
++
;
$sanity_check
++
;
}
}
$insert_sth
->
execute
(
$key
,
$
stable_id_to_gene_id
{
$stable_id
}
,
$ref
);
$insert_sth
->
execute
(
$key
,
$
gene_id
,
$ref
);
# $gene_id_to_alt_id{$
stable_id_to_gene_id{$stable_id}
}= $key;
# $gene_id_to_alt_id{$
gene_id
}= $key;
$count
++
;
$count
++
;
}
}
if
(
$sanity_check
!=
1
){
if
(
$sanity_check
!=
1
){
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment