Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl
Commits
62ab49c2
Commit
62ab49c2
authored
11 years ago
by
Andy Yates
Browse files
Options
Downloads
Patches
Plain Diff
Making this script new alt allele compatible
parent
88bf9877
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
misc-scripts/alt_alleles/alt_alleles.pl
+72
-81
72 additions, 81 deletions
misc-scripts/alt_alleles/alt_alleles.pl
with
72 additions
and
81 deletions
misc-scripts/alt_alleles/alt_alleles.pl
+
72
−
81
View file @
62ab49c2
#!/usr/bin/env perl
#!/usr/bin/env perl
use
strict
;
use
strict
;
use
warnings
;
use
Bio::EnsEMBL::DBSQL::
DBAdaptor
;
use
Bio::EnsEMBL::DBSQL::
DBAdaptor
;
use
Bio::EnsEMBL::
AltAlleleGroup
;
use
Getopt::
Long
qw(:config pass_through)
;
use
Getopt::
Long
qw(:config pass_through)
;
# (make sure api version is correct
# (make sure api version is correct
...
@@ -41,113 +43,102 @@ if(!defined($cdbname)){
...
@@ -41,113 +43,102 @@ if(!defined($cdbname)){
}
}
#
#
# Connect to the core database
# Connect to the core
& vega
database
#
#
my
$core_dba
=
Bio::EnsEMBL::DBSQL::
DBAdaptor
->
new
(
-
host
=>
$chost
||
'
ens-staging1
',
my
$core_dba
=
Bio::EnsEMBL::DBSQL::
DBAdaptor
->
new
(
-
user
=>
$cuser
||
'
ensadmin
',
-
host
=>
$chost
||
'
ens-staging1
',
-
pass
=>
$cpass
,
-
user
=>
$cuser
||
'
ensadmin
',
-
species
=>
"
test
",
-
pass
=>
$cpass
,
-
dbname
=>
$cdbname
||
"
homo_sapiens_core_63_37
");
-
group
=>
'
core
',
-
dbname
=>
$cdbname
);
my
$vega_dba
=
Bio::EnsEMBL::DBSQL::
DBAdaptor
->
new
(
-
host
=>
$vhost
||
'
ens-staging1
',
-
user
=>
$vuser
||
'
ensadmin
',
-
pass
=>
$vpass
,
-
group
=>
'
vega
',
-
dbname
=>
$vdbname
);
#
#
# get ensembl gene ids and vega stable ids from the core database
# get ensembl gene ids and vega stable ids from the
*
core
*
database
#
#
my
$vega_core_sql
=
<<'SQL';
select ensembl_id, display_label
my
%vega_to_ens_id
;
from object_xref
my
(
$vega_stable_id
,
$gene_id
);
join xref using(xref_id)
join external_db using(external_db_id)
my
$sth
=
$core_dba
->
dbc
->
prepare
("
select ensembl_id, display_label from object_xref join xref using(xref_id) join external_db using(external_db_id) where db_name = 'OTTG' and ensembl_object_type = 'Gene'
");
where db_name = 'OTTG'
$sth
->
execute
;
and ensembl_object_type = 'Gene'
SQL
$sth
->
bind_columns
(
\
$gene_id
,
\
$vega_stable_id
);
# sometimes we will see more than one gene associated with an OTTG
while
(
$sth
->
fetch
){
# this happens when an OTTG on the primary assemby has been projected to a patch.
# sometimes we will see more than one gene associated with an OTTG
my
%vega_to_ensembl_core_gene_id
;
# this happens when an OTTG on the primary assemby has been projected to a patch
$core_dba
->
dbc
->
sql_helper
()
->
execute_no_return
(
-
SQL
=>
$vega_core_sql
,
-
CALLBACK
=>
sub
{
$vega_to_ens_id
{
$vega_stable_id
}{
$gene_id
}
=
$gene_id
;
my
(
$row
)
=
@_
;
}
my
(
$vega_stable_id
,
$gene_id
)
=
@
{
$row
};
$sth
->
finish
;
$vega_to_ensembl_core_gene_id
{
$vega_stable_id
}{
$gene_id
}
=
$gene_id
;
});
print
"
\n
Fetched
"
.
(
scalar
(
keys
%vega_to_ens_id
))
.
"
vega_stable_ids
\n
";
print
"
\n
Fetched
"
.
(
scalar
(
keys
%vega_to_ensembl_core_gene_id
))
.
"
Vega Stable IDs
\n
";
my
$vega_dba
=
Bio::EnsEMBL::DBSQL::
DBAdaptor
->
new
(
-
host
=>
$vhost
||
'
ens-staging1
',
-
user
=>
$vuser
||
'
ensro
',
#
-
port
=>
$vport
||
3306
,
# Get AltAlleles from vega
-
dbname
=>
$vdbname
||
"
homo_sapiens_vega_63_37
");
#
my
$vega_aaga
=
$vega_dba
->
get_AltAlleleGroupAdaptor
();
#
#TODO deprecated call in 74
# SQL to get alt_allele data from vega
my
$vega_groups
=
$vega_aaga
->
fetch_all_Groups
();
#
# my $groups = $vega_aaga->fetch_all(); #replace the above with me ASAP
my
$sql
=
(
<<
EOS
);
my
$cnt_vega_rows
=
@
{
$vega_groups
};
select
aa
.
alt_allele_id
,
g
.
stable_id
print
STDERR
"
Fetched
$cnt_vega_rows
rows from the vega db alt_allele table
\n
";
from
alt_allele
aa
,
gene
g
where
aa
.
gene_id
=
g
.
gene_id
EOS
#
# Store data in a hash where the key is the alt_id and the ensembl gene ids
# stored in an anonymous array (value of the hash).
#
my
$sth
=
$vega_dba
->
dbc
->
prepare
(
$sql
);
$sth
->
execute
;
my
(
$alt_id
,
$vega_stable_id
);
my
%alt_alleles
;
$sth
->
bind_columns
(
\
$alt_id
,
\
$vega_stable_id
);
my
%no_gene_id
;
my
%no_gene_id
;
my
@new_groups
;
my
$cnt_vega_rows
=
0
;
foreach
my
$group
(
@
{
$vega_groups
})
{
while
(
$sth
->
fetch
()){
my
$members
=
$group
->
get_all_Genes_types
();
$cnt_vega_rows
++
;
my
$new_core_group
=
undef
;
if
(
exists
$vega_to_ens_id
{
$vega_stable_id
}
)
{
foreach
my
$member
(
@
{
$members
})
{
foreach
my
$gene_id
(
keys
%
{
$vega_to_ens_id
{
$vega_stable_id
}}
)
{
my
(
$vega_gene
,
$attribs_hash
)
=
@
{
$member
};
push
@
{
$alt_alleles
{
$alt_id
}},
$gene_id
;
if
(
exists
$vega_to_ensembl_core_gene_id
{
$vega_gene
->
stable_id
()})
{
$new_core_group
||=
Bio::EnsEMBL::
AltAlleleGroup
->
new
();
# initalise if we don't already have one
foreach
my
$gene_id
(
keys
%
{
$vega_to_ens_id
{
$vega_stable_id
}}
)
{
#Add each gene in. If we had a 1:m relationship then we copy the attribute already assigned
#across
$new_core_group
->
add_member
(
$gene_id
,
$attribs_hash
);
}
}
else
{
push
@
{
$no_gene_id
{
$group
->
dbID
()}},
$vega_stable_id
;
print
STDERR
"
no ensembl gene_id found for vega stable id
$vega_stable_id
in core
\n
";
}
}
}
else
{
push
@
{
$no_gene_id
{
$alt_id
}},
$vega_stable_id
;
print
STDERR
"
no ensembl gene_id found for vega stable id
$vega_stable_id
in core
\n
";
}
}
push
(
@new_groups
,
$new_core_group
);
}
}
$sth
->
finish
;
print
STDERR
"
Fetched
$cnt_vega_rows
rows from the vega db alt_allele table
\n
";
#
#
# Delete the old data
# Delete the old data
#
#
print
STDERR
"
\n\n
Deleting all alt_alleles...
\n\n
";
print
STDERR
"
\n\n
Deleting all alt_alleles...
\n\n
";
my
$sth
=
$core_dba
->
dbc
->
prepare
("
delete from alt_allele
");
$core_dba
->
dbc
->
do
("
delete from alt_allele
");
$sth
->
execute
;
#
#
# Store alt_alleles.
# Store alt_alleles.
#
#
print
STDERR
"
Storing new alt alleles...
\n\n
";
print
STDERR
"
Storing new alt alleles...
\n\n
";
my
$alt_allele_count
=
0
;
my
$alt_allele_count
=
0
;
my
$gene_count
=
0
;
my
$gene_count
=
0
;
my
$ga
=
$core_dba
->
get_adaptor
("
gene
");
my
$core_aaga
=
$core_dba
->
get_AltAlleleGroupAdaptor
();
foreach
my
$group
(
@new_groups
)
{
foreach
my
$key
(
keys
%alt_alleles
){
my
$alt_allele_id
=
$core_aaga
->
store
(
$group
);
my
@gene_ids
=
@
{
$alt_alleles
{
$key
}};
$alt_allele_count
++
;
my
@genes
;
$gene_count
+=
$group
->
size
()
foreach
my
$gene_id
(
@gene_ids
)
{
push
@genes
,
$ga
->
fetch_by_dbID
(
$gene_id
);
}
my
$alt_allele_id
=
$ga
->
store_alt_alleles
(
\
@genes
);
$alt_allele_count
++
if
(
$alt_allele_id
);
$gene_count
+=
scalar
(
@genes
)
if
(
$alt_allele_id
);
}
}
print
"
Added
$alt_allele_count
alt_allele ids for
$gene_count
genes
\n
DONE
\n
";
print
"
Added
$alt_allele_count
alt_allele ids for
$gene_count
genes
\n
DONE
\n
";
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment