From 62ab49c206ba3a29d7e637a117d147d51e144c5a Mon Sep 17 00:00:00 2001 From: Andrew Yates <ayates@ebi.ac.uk> Date: Fri, 27 Sep 2013 14:38:30 +0000 Subject: [PATCH] Making this script new alt allele compatible --- misc-scripts/alt_alleles/alt_alleles.pl | 153 +++++++++++------------- 1 file changed, 72 insertions(+), 81 deletions(-) diff --git a/misc-scripts/alt_alleles/alt_alleles.pl b/misc-scripts/alt_alleles/alt_alleles.pl index 84a9204b1b..c171ad757a 100644 --- a/misc-scripts/alt_alleles/alt_alleles.pl +++ b/misc-scripts/alt_alleles/alt_alleles.pl @@ -1,7 +1,9 @@ #!/usr/bin/env perl use strict; +use warnings; use Bio::EnsEMBL::DBSQL::DBAdaptor; +use Bio::EnsEMBL::AltAlleleGroup; use Getopt::Long qw(:config pass_through); # (make sure api version is correct @@ -41,113 +43,102 @@ if(!defined($cdbname)){ } # -# Connect to the core database +# Connect to the core & vega database # -my $core_dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(-host => $chost||'ens-staging1', - -user => $cuser||'ensadmin', - -pass => $cpass, - -species => "test", - -dbname => $cdbname||"homo_sapiens_core_63_37"); +my $core_dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new( + -host => $chost||'ens-staging1', + -user => $cuser||'ensadmin', + -pass => $cpass, + -group => 'core', + -dbname => $cdbname +); +my $vega_dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new( + -host => $vhost||'ens-staging1', + -user => $vuser||'ensadmin', + -pass => $vpass, + -group => 'vega', + -dbname => $vdbname +); # -# get ensembl gene ids and vega stable ids from the core database +# get ensembl gene ids and vega stable ids from the *core* database # - - -my %vega_to_ens_id; -my ($vega_stable_id, $gene_id); - -my $sth = $core_dba->dbc->prepare("select ensembl_id, display_label from object_xref join xref using(xref_id) join external_db using(external_db_id) where db_name = 'OTTG' and ensembl_object_type = 'Gene'"); -$sth->execute; - -$sth->bind_columns(\$gene_id, \$vega_stable_id); - -while ($sth->fetch){ - # sometimes we will see more than one gene associated with an OTTG - # this happens when an OTTG on the primary assemby has been projected to a patch - $vega_to_ens_id{$vega_stable_id}{$gene_id} = $gene_id; -} -$sth->finish; - -print "\nFetched ".(scalar(keys %vega_to_ens_id))." vega_stable_ids\n"; - -my $vega_dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(-host => $vhost||'ens-staging1', - -user => $vuser||'ensro', - -port => $vport||3306, - -dbname => $vdbname||"homo_sapiens_vega_63_37"); - -# -# SQL to get alt_allele data from vega -# - -my $sql =(<<EOS); -select aa.alt_allele_id, g.stable_id - from alt_allele aa, gene g - where aa.gene_id = g.gene_id -EOS - - -# -# Store data in a hash where the key is the alt_id and the ensembl gene ids -# stored in an anonymous array (value of the hash). -# - -my $sth = $vega_dba->dbc->prepare($sql); -$sth->execute; -my ($alt_id, $vega_stable_id); -my %alt_alleles; -$sth->bind_columns(\$alt_id, \$vega_stable_id); - +my $vega_core_sql = <<'SQL'; +select ensembl_id, display_label +from object_xref +join xref using(xref_id) +join external_db using(external_db_id) +where db_name = 'OTTG' +and ensembl_object_type = 'Gene' +SQL + +# sometimes we will see more than one gene associated with an OTTG +# this happens when an OTTG on the primary assemby has been projected to a patch. +my %vega_to_ensembl_core_gene_id; +$core_dba->dbc->sql_helper()->execute_no_return(-SQL => $vega_core_sql, -CALLBACK => sub { + my ($row) = @_; + my ($vega_stable_id, $gene_id) = @{$row}; + $vega_to_ensembl_core_gene_id{$vega_stable_id}{$gene_id} = $gene_id; +}); + +print "\nFetched ".(scalar(keys %vega_to_ensembl_core_gene_id))." Vega Stable IDs\n"; + +# +# Get AltAlleles from vega +# +my $vega_aaga = $vega_dba->get_AltAlleleGroupAdaptor(); +#TODO deprecated call in 74 +my $vega_groups = $vega_aaga->fetch_all_Groups(); +# my $groups = $vega_aaga->fetch_all(); #replace the above with me ASAP + +my $cnt_vega_rows = @{$vega_groups}; +print STDERR "Fetched $cnt_vega_rows rows from the vega db alt_allele table\n"; my %no_gene_id; - -my $cnt_vega_rows = 0; -while($sth->fetch()){ - $cnt_vega_rows++; - if (exists $vega_to_ens_id{$vega_stable_id} ) { - foreach my $gene_id ( keys %{$vega_to_ens_id{$vega_stable_id}} ) { - push @{$alt_alleles{$alt_id}}, $gene_id ; +my @new_groups; +foreach my $group (@{$vega_groups}) { + my $members = $group->get_all_Genes_types(); + my $new_core_group = undef; + foreach my $member (@{$members}) { + my ($vega_gene, $attribs_hash) = @{$member}; + if(exists $vega_to_ensembl_core_gene_id{$vega_gene->stable_id()}) { + $new_core_group ||= Bio::EnsEMBL::AltAlleleGroup->new(); # initalise if we don't already have one + foreach my $gene_id (keys %{$vega_to_ens_id{$vega_stable_id}} ) { + #Add each gene in. If we had a 1:m relationship then we copy the attribute already assigned + #across + $new_core_group->add_member($gene_id, $attribs_hash); + } + } + else { + push @{$no_gene_id{$group->dbID()}}, $vega_stable_id; + print STDERR "no ensembl gene_id found for vega stable id $vega_stable_id in core\n"; } - } else { - push @{$no_gene_id{$alt_id}}, $vega_stable_id; - print STDERR "no ensembl gene_id found for vega stable id $vega_stable_id in core\n"; } + push(@new_groups, $new_core_group); } -$sth->finish; -print STDERR "Fetched $cnt_vega_rows rows from the vega db alt_allele table\n"; # # Delete the old data # - print STDERR "\n\nDeleting all alt_alleles...\n\n"; -my $sth = $core_dba->dbc->prepare("delete from alt_allele"); -$sth->execute; +$core_dba->dbc->do("delete from alt_allele"); # # Store alt_alleles. # - print STDERR "Storing new alt alleles...\n\n"; my $alt_allele_count=0; my $gene_count = 0; -my $ga = $core_dba->get_adaptor("gene"); - -foreach my $key (keys %alt_alleles){ - my @gene_ids = @{$alt_alleles{$key}}; - my @genes; - foreach my $gene_id (@gene_ids) { - push @genes, $ga->fetch_by_dbID($gene_id); - } - - my $alt_allele_id = $ga->store_alt_alleles(\@genes); - $alt_allele_count ++ if ($alt_allele_id); - $gene_count += scalar(@genes) if ($alt_allele_id); +my $core_aaga = $core_dba->get_AltAlleleGroupAdaptor(); +foreach my $group (@new_groups) { + my $alt_allele_id = $core_aaga->store($group); + $alt_allele_count++; + $gene_count += $group->size() } print "Added $alt_allele_count alt_allele ids for $gene_count genes\nDONE\n"; -- GitLab