Skip to content
Snippets Groups Projects
Commit 62ab49c2 authored by Andy Yates's avatar Andy Yates
Browse files

Making this script new alt allele compatible

parent 88bf9877
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env perl #!/usr/bin/env perl
use strict; use strict;
use warnings;
use Bio::EnsEMBL::DBSQL::DBAdaptor; use Bio::EnsEMBL::DBSQL::DBAdaptor;
use Bio::EnsEMBL::AltAlleleGroup;
use Getopt::Long qw(:config pass_through); use Getopt::Long qw(:config pass_through);
# (make sure api version is correct # (make sure api version is correct
...@@ -41,113 +43,102 @@ if(!defined($cdbname)){ ...@@ -41,113 +43,102 @@ if(!defined($cdbname)){
} }
# #
# Connect to the core database # Connect to the core & vega database
# #
my $core_dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(-host => $chost||'ens-staging1', my $core_dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(
-user => $cuser||'ensadmin', -host => $chost||'ens-staging1',
-pass => $cpass, -user => $cuser||'ensadmin',
-species => "test", -pass => $cpass,
-dbname => $cdbname||"homo_sapiens_core_63_37"); -group => 'core',
-dbname => $cdbname
);
my $vega_dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(
-host => $vhost||'ens-staging1',
-user => $vuser||'ensadmin',
-pass => $vpass,
-group => 'vega',
-dbname => $vdbname
);
# #
# get ensembl gene ids and vega stable ids from the core database # get ensembl gene ids and vega stable ids from the *core* database
# #
my $vega_core_sql = <<'SQL';
select ensembl_id, display_label
my %vega_to_ens_id; from object_xref
my ($vega_stable_id, $gene_id); join xref using(xref_id)
join external_db using(external_db_id)
my $sth = $core_dba->dbc->prepare("select ensembl_id, display_label from object_xref join xref using(xref_id) join external_db using(external_db_id) where db_name = 'OTTG' and ensembl_object_type = 'Gene'"); where db_name = 'OTTG'
$sth->execute; and ensembl_object_type = 'Gene'
SQL
$sth->bind_columns(\$gene_id, \$vega_stable_id);
# sometimes we will see more than one gene associated with an OTTG
while ($sth->fetch){ # this happens when an OTTG on the primary assemby has been projected to a patch.
# sometimes we will see more than one gene associated with an OTTG my %vega_to_ensembl_core_gene_id;
# this happens when an OTTG on the primary assemby has been projected to a patch $core_dba->dbc->sql_helper()->execute_no_return(-SQL => $vega_core_sql, -CALLBACK => sub {
$vega_to_ens_id{$vega_stable_id}{$gene_id} = $gene_id; my ($row) = @_;
} my ($vega_stable_id, $gene_id) = @{$row};
$sth->finish; $vega_to_ensembl_core_gene_id{$vega_stable_id}{$gene_id} = $gene_id;
});
print "\nFetched ".(scalar(keys %vega_to_ens_id))." vega_stable_ids\n";
print "\nFetched ".(scalar(keys %vega_to_ensembl_core_gene_id))." Vega Stable IDs\n";
my $vega_dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(-host => $vhost||'ens-staging1',
-user => $vuser||'ensro', #
-port => $vport||3306, # Get AltAlleles from vega
-dbname => $vdbname||"homo_sapiens_vega_63_37"); #
my $vega_aaga = $vega_dba->get_AltAlleleGroupAdaptor();
# #TODO deprecated call in 74
# SQL to get alt_allele data from vega my $vega_groups = $vega_aaga->fetch_all_Groups();
# # my $groups = $vega_aaga->fetch_all(); #replace the above with me ASAP
my $sql =(<<EOS); my $cnt_vega_rows = @{$vega_groups};
select aa.alt_allele_id, g.stable_id print STDERR "Fetched $cnt_vega_rows rows from the vega db alt_allele table\n";
from alt_allele aa, gene g
where aa.gene_id = g.gene_id
EOS
#
# Store data in a hash where the key is the alt_id and the ensembl gene ids
# stored in an anonymous array (value of the hash).
#
my $sth = $vega_dba->dbc->prepare($sql);
$sth->execute;
my ($alt_id, $vega_stable_id);
my %alt_alleles;
$sth->bind_columns(\$alt_id, \$vega_stable_id);
my %no_gene_id; my %no_gene_id;
my @new_groups;
my $cnt_vega_rows = 0; foreach my $group (@{$vega_groups}) {
while($sth->fetch()){ my $members = $group->get_all_Genes_types();
$cnt_vega_rows++; my $new_core_group = undef;
if (exists $vega_to_ens_id{$vega_stable_id} ) { foreach my $member (@{$members}) {
foreach my $gene_id ( keys %{$vega_to_ens_id{$vega_stable_id}} ) { my ($vega_gene, $attribs_hash) = @{$member};
push @{$alt_alleles{$alt_id}}, $gene_id ; if(exists $vega_to_ensembl_core_gene_id{$vega_gene->stable_id()}) {
$new_core_group ||= Bio::EnsEMBL::AltAlleleGroup->new(); # initalise if we don't already have one
foreach my $gene_id (keys %{$vega_to_ens_id{$vega_stable_id}} ) {
#Add each gene in. If we had a 1:m relationship then we copy the attribute already assigned
#across
$new_core_group->add_member($gene_id, $attribs_hash);
}
}
else {
push @{$no_gene_id{$group->dbID()}}, $vega_stable_id;
print STDERR "no ensembl gene_id found for vega stable id $vega_stable_id in core\n";
} }
} else {
push @{$no_gene_id{$alt_id}}, $vega_stable_id;
print STDERR "no ensembl gene_id found for vega stable id $vega_stable_id in core\n";
} }
push(@new_groups, $new_core_group);
} }
$sth->finish;
print STDERR "Fetched $cnt_vega_rows rows from the vega db alt_allele table\n";
# #
# Delete the old data # Delete the old data
# #
print STDERR "\n\nDeleting all alt_alleles...\n\n"; print STDERR "\n\nDeleting all alt_alleles...\n\n";
my $sth = $core_dba->dbc->prepare("delete from alt_allele"); $core_dba->dbc->do("delete from alt_allele");
$sth->execute;
# #
# Store alt_alleles. # Store alt_alleles.
# #
print STDERR "Storing new alt alleles...\n\n"; print STDERR "Storing new alt alleles...\n\n";
my $alt_allele_count=0; my $alt_allele_count=0;
my $gene_count = 0; my $gene_count = 0;
my $ga = $core_dba->get_adaptor("gene"); my $core_aaga = $core_dba->get_AltAlleleGroupAdaptor();
foreach my $group (@new_groups) {
foreach my $key (keys %alt_alleles){ my $alt_allele_id = $core_aaga->store($group);
my @gene_ids = @{$alt_alleles{$key}}; $alt_allele_count++;
my @genes; $gene_count += $group->size()
foreach my $gene_id (@gene_ids) {
push @genes, $ga->fetch_by_dbID($gene_id);
}
my $alt_allele_id = $ga->store_alt_alleles(\@genes);
$alt_allele_count ++ if ($alt_allele_id);
$gene_count += scalar(@genes) if ($alt_allele_id);
} }
print "Added $alt_allele_count alt_allele ids for $gene_count genes\nDONE\n"; print "Added $alt_allele_count alt_allele ids for $gene_count genes\nDONE\n";
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment