#!/usr/bin/env perl # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute # Copyright [2016-2019] EMBL-European Bioinformatics Institute # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. use strict; use warnings; use Bio::EnsEMBL::DBSQL::DBAdaptor; use Bio::EnsEMBL::AltAlleleGroup; use Getopt::Long qw(:config pass_through); # (make sure api version is correct # Usage: # perl alt_alleles.pl -cpass XXXX > & human_release_63_alt_alleles # # # long way # perl alt_alleles.pl -vhost ens-staging1 -vport 3306 -vdbname homo_sapiens_vega_63_37 -cdbname homo_sapiens_core_63_37 -chost ens-staging1 -cpass XXXX > & human_release_63_alt_alleles # my ($vhost, $vpass, $vport, $vdbname, $vuser, $chost, $cpass, $cport, $cdbname, $cuser); GetOptions( 'vuser=s' => \$vuser, 'vpass=s' => \$vpass, 'vhost=s' => \$vhost, 'vport=i' => \$vport, 'vdbname=s' => \$vdbname, 'cuser=s' => \$cuser, 'cpass=s' => \$cpass, 'chost=s' => \$chost, 'cport=i' => \$cport, 'cdbname=s' => \$cdbname); # # Connect to the vgea databse to get the alt allele data. # my $api_version = Bio::EnsEMBL::ApiVersion->software_version(); if(!defined($vdbname)){ $vdbname = "homo_sapiens_vega_".$api_version."_37"; } if(!defined($cdbname)){ $cdbname = "homo_sapiens_core_".$api_version."_37"; } # # Connect to the core & vega database # my $core_dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new( -host => $chost||'ens-staging1', -user => $cuser||'ensadmin', -pass => $cpass, -group => 'core', -dbname => $cdbname, -port => $cport ); my $vega_dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new( -host => $vhost||'ens-staging1', -user => $vuser||'ensadmin', -pass => $vpass, -group => 'vega', -dbname => $vdbname, -port => $vport ); # # get ensembl gene ids and vega stable ids from the *core* database # my $vega_core_sql = <<'SQL'; select display_label, ensembl_id from object_xref join xref using(xref_id) join external_db using(external_db_id) where db_name = 'OTTG' and ensembl_object_type = 'Gene' SQL # sometimes we will see more than one gene associated with an OTTG # this happens when an OTTG on the primary assemby has been projected to a patch. my %vega_to_ensembl_core_gene_id; $core_dba->dbc->sql_helper()->execute_no_return(-SQL => $vega_core_sql, -CALLBACK => sub { my ($row) = @_; my ($vega_stable_id, $gene_id) = @{$row}; $vega_to_ensembl_core_gene_id{$vega_stable_id}{$gene_id} = $gene_id; }); print "\nFetched ".(scalar(keys %vega_to_ensembl_core_gene_id))." Vega Stable IDs\n"; # # Get AltAlleles from vega # my $vega_aaga = $vega_dba->get_AltAlleleGroupAdaptor(); my $vega_groups = $vega_aaga->fetch_all(); my $cnt_vega_rows = @{$vega_groups}; print STDERR "Fetched $cnt_vega_rows rows from the vega db alt_allele table\n"; my %no_gene_id; my @new_groups; foreach my $group (@{$vega_groups}) { my $members = $group->get_all_Genes_types(); my $new_core_group = Bio::EnsEMBL::AltAlleleGroup->new(); foreach my $member (@{$members}) { my ($vega_gene, $attribs_hash) = @{$member}; my $vega_stable_id = $vega_gene->stable_id(); if(exists $vega_to_ensembl_core_gene_id{$vega_stable_id}) { foreach my $gene_id (keys %{$vega_to_ensembl_core_gene_id{$vega_stable_id}} ) { #Add each gene in. If we had a 1:m relationship then we copy the attribute already assigned #across $new_core_group->add_member($gene_id, $attribs_hash); } } else { push @{$no_gene_id{$group->dbID()}}, $vega_stable_id; print STDERR "no ensembl gene_id found for vega stable id $vega_stable_id in core\n"; } } if($new_core_group->size() > 0) { push(@new_groups, $new_core_group); } } # # Delete the old data # print STDERR "\n\nDeleting all alt_alleles...\n\n"; $core_dba->dbc->do("delete from alt_allele"); $core_dba->dbc->do("delete from alt_allele_attrib"); $core_dba->dbc->do("delete from alt_allele_group"); # # Store alt_alleles. # print STDERR "Storing new alt alleles...\n\n"; my $alt_allele_count=0; my $gene_count = 0; my $core_aaga = $core_dba->get_AltAlleleGroupAdaptor(); foreach my $group (@new_groups) { my $alt_allele_id = $core_aaga->store($group); $alt_allele_count++; $gene_count += $group->size() } print "Added $alt_allele_count alt_allele ids for $gene_count genes\nDONE\n";