Commit d412b135 authored by Kieron Taylor's avatar Kieron Taylor 😠
Browse files

The big post-merge alt allele commit. Changes to alt_allele table, new tests,...

The big post-merge alt allele commit. Changes to alt_allele table, new tests, and a couple of new objects.
parent 5bdad69a
=head1 LICENSE
Copyright (c) 1999-2013 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license.
For license details, please see
http://www.ensembl.org/info/about/code_licence.html
=head1 CONTACT
Please email comments or questions to the public Ensembl
developers list at <dev@ensembl.org>.
Questions may also be sent to the Ensembl help desk at
<helpdesk@ensembl.org>.
=cut
=head1 NAME
Bio::EnsEMBL::AltAlleleGroup
=head1 SYNOPSIS
use Bio::EnsEMBL::AltAlleleGroup;
use Bio::EnsEMBL::DBSQL::AltAlleleGroupAdaptor;
my $aag_adaptor = Bio::EnsEMBL::Registry->get_DBAdaptor("Human","core","AltAlleleGroup");
# For a known Gene, find the reference alternative allele
my $aag = $aag_adaptor->fetch_Group_by_dbID($gene->dbID);
my $reference_gene = $aag->get_representative_Gene;
# Get a list of AltAlleleGroups
my $list = $aag_adaptor->fetch_all_Groups_by_type('HAS_CODING_POTENTIAL');
$list = $aag_adaptor->fetch_all_Groups();
while ($aag = shift @$list) {
$aag->get_all_Genes;
# Do your important things ...
}
# Creating and editing an AltAlleleGroup
my $type_flags = [qw(IS_MOST_COMMON_ALLELE AUTOMATICALLY_ASSIGNED)];
$aag = Bio::EnsEMBL::AltAlleleGroup->new(
-MEMBERS => [ [$gene_id,$is_rep,$type_flags ] ],
);
$aag->remove_all_members;
$aag->add_member([$gene_id,$is_rep,$type_flags]);
my $dbID = $aag_adaptor->store($aag);
=head1 DESCRIPTION
Alt allele groups keep track of which alleles are tied to a particular Gene
They allow related genes to be located. This class allows fetching of both
IDs and fully fledged Gene objects.
AltAlleleGroup members are assigned types to differentiate them by their
origin. These types are set as flags, allowing you to select the union of
types as well as by individual ones.
No flags set denotes a situation of no information.
Valid flags are as follows:
'IS_REPRESENTATIVE',
'IS_MOST_COMMON_ALLELE',
'IN_CORRECTED_ASSEMBLY',
'HAS_CODING_POTENTIAL',
'IN_ARTIFICIALLY_DUPLICATED_ASSEMBLY',
'IN_SYNTENIC_REGION',
'HAS_SAME_UNDERLYING_DNA_SEQUENCE',
'IN_BROKEN_ASSEMBLY_REGION',
'IS_VALID_ALTERNATE',
'SAME_AS_REPRESENTATIVE',
'SAME_AS_ANOTHER_ALLELE',
'MANUALLY_ASSIGNED',
'AUTOMATICALLY_ASSIGNED'
=cut
package Bio::EnsEMBL::AltAlleleGroup;
use strict;
use warnings;
#use constant {
# IS_REPRESENTATIVE => 1,
# IS_MOST_COMMON_ALLELE => 2,
# IN_CORRECTED_ASSEMBLY => 3,
# HAS_CODING_POTENTIAL => 4,
# IN_ARTIFICIALLY_DUPLICATED_ASSEMBLY => 5,
# IN_SYNTENIC_REGION => 6,
# HAS_SAME_UNDERLYING_DNA_SEQUENCE => 7,
# IN_BROKEN_ASSEMBLY_REGION => 8,
# IS_VALID_ALTERNATE => 9,
# SAME_AS_REPRESENTATIVE => 10,
# SAME_AS_ANOTHER_ALLELE => 11,
# MANUALLY_ASSIGNED => 12,
# AUTOMATICALLY_ASSIGNED => 13,
#};
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Exception qw(warning throw);
use base qw/Bio::EnsEMBL::Storable/;
=head2 new
Arg [-MEMBERS]: A list reference of [gene_id,type_flags]
: gene_id is a dbID for Gene (consistent only within one release)
: type_flags is a hash ref of attributes for this member
Example : $aag = Bio::EnsEMBL::AltAlleleGroup->new(
-MEMBERS => [ [1,{$type} ], [2,{$other_type}],[3,{$type}],
);
Description: Creates a new alt-allele group object
Returntype : Bio::EnsEMBL::AltAlleleGroup
Exceptions : none
Caller : general
=cut
sub new {
my $caller = shift;
my $class = ref($caller) || $caller;
my $self = $class->SUPER::new(@_);
my ( $list ) = rearrange( [ 'MEMBERS'], @_ );
$self->{'MEMBERS'} = $list;
return $self;
}
=head2 add_member
Arg [1] : Gene dbID
Arg [2] : Type List, used for assigning type flags of this member, see Description above
Description : Adds a record of one new member to the AltAlleleGroup. Once a
change is made, this must be persisted to the database with
AltAlleleGroupAdaptor->store or ->update
Example : $aag->add_member(1040032,$types_hash);
$aaga->update($aag); # updating the whole group is necessary.
=cut
sub add_member {
my $self = shift;
my ($gene_id,$type_hash) = @_;
my $members = $self->{'MEMBERS'};
push @$members,[$gene_id,$type_hash];
$self->{'MEMBERS'} = $members;
return;
}
sub get_all_members_with_type {
my $self = shift;
my $type = shift;
my @filtered_members;
my $members = $self->{'MEMBERS'};
foreach my $member (@$members) {
if (exists($member->[1]->{$type})) {
push @filtered_members,$member;
}
}
return \@filtered_members;
}
sub attribs {
my $self = shift;
my $member_id = shift;
}
=head2 remove_all_members
Description: Remove members from this object, but NOT the database. See
AltAlleleGroupAdaptor->remove()
Use in conjunction with add_member if members need to be altered
=cut
sub remove_all_members {
my $self = shift;
$self->{'MEMBERS'} = [];
return;
}
=head2 rep_Gene_id
Arg[1] : Optional - set a new representative Gene id for the group
Description: Reports or sets the representative Gene for this AltAlleleGroup
If you wish to remove the representative status of all genes without
setting a new one, see unset_rep_Gene_id
Returntype : Integer or undef if none set
=cut
sub rep_Gene_id {
my $self = shift;
my $new_id = shift;
my $list = $self->{'MEMBERS'};
my $change;
foreach my $allele (@$list) {
my ($gene_id,$type) = @$allele;
if (exists($type->{IS_REPRESENTATIVE}) && !defined($new_id) ) {
return $gene_id;
}
if ($new_id) {
unless ($gene_id == $new_id) {delete($allele->[1]->{IS_REPRESENTATIVE})}
else {
$allele->[1]->{IS_REPRESENTATIVE} = 1;
$change = $new_id;
}
}
}
if ($change) {
$self->{'MEMBERS'} = $list;
return $new_id;
} elsif ($new_id && !$change) {
throw("Requested representative gene ID was not set because it is not in this AltAlleleGroup, ID ".$self->dbID);
}
else {
warning("No representative allele currently set for this AltAlleleGroup");
return;
}
}
=head2 unset_rep_Gene_id
Description: Removes the representative Gene flag from this AltAlleleGroup.
This action is not possible through rep_Gene_id due to
validation of inputs.
Returntype :
=cut
sub unset_rep_Gene_id {
my $self = shift;
my $list = $self->{'MEMBERS'};
foreach my $allele (@$list) {
delete($allele->[1]->{IS_REPRESENTATIVE});
}
$self->{'MEMBERS'} = $list;
return;
}
=head2 get_all_Gene_ids
Arg[1] : Boolean - Do not include representative gene in list of ids.
Description : fetches all the Gene dbIDs within the allele group. It can also
be used to list those ids that are not the representative Gene.
Returntype : listref of gene dbIDs
=cut
sub get_all_Gene_ids {
my $self = shift;
my $all_but_rep = shift;
my $list = $self->{'MEMBERS'};
my @gene_ids;
foreach my $allele (@$list) {
my ($gene_id,$type) = @$allele;
if ($all_but_rep && $type->{IS_REPRESENTATIVE}) {next;}
push @gene_ids,$gene_id;
}
return \@gene_ids;
}
sub get_representative_Gene {
my $self = shift;
my $ga = $self->adaptor->db->get_GeneAdaptor;
return $ga->fetch_by_dbID($self->rep_Gene_id);
}
sub get_all_Genes {
my $self = shift;
my $all_but_rep = shift; # falls through to get_all_Gene_ids
my $gene_ids = $self->get_all_Gene_ids($all_but_rep);
my $genes;
my $ga = $self->adaptor->db->get_GeneAdaptor;
$genes = $ga->fetch_all_by_dbID_list($gene_ids);
return $genes;
}
sub size {
my $self = shift;
my $list = $self->{'MEMBERS'};
return scalar(@$list);
}
=head2 get_all_members
Description: Retrieves all of the information about all members.
Returntype : Listref of triplets: [gene_id,type]
Caller : AltAlleleGroupAdaptor->store
=cut
sub get_all_members {
my $self = shift;
my $members = $self->{'MEMBERS'};
return $members;
}
1;
\ No newline at end of file
=head1 LICENSE
Copyright (c) 1999-2013 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license.
For license details, please see
http://www.ensembl.org/info/about/code_licence.html
=head1 CONTACT
Please email comments or questions to the public Ensembl
developers list at <dev@ensembl.org>.
Questions may also be sent to the Ensembl help desk at
<helpdesk@ensembl.org>.
=cut
=head1 NAME
Bio::EnsEMBL::DBSQL::AltAlleleGroupAdaptor - Adaptor for the manipulation of
Alternative allele groupings
=head1 SYNOPSIS
use Bio::EnsEMBL::AltAlleleGroup;
use Bio::EnsEMBL::DBSQL::AltAlleleGroupAdaptor;
my $aag_adaptor = Bio::EnsEMBL::Registry->get_DBAdaptor("Human","core","AltAlleleGroup");
# For a known Gene, find the reference alternative allele
my $aag = $aag_adaptor->fetch_Group_by_dbID($gene->dbID);
my $reference_gene = $aag->get_ref_Gene;
# Get a list of AltAlleleGroups
my $list = $aag_adaptor->fetch_all_Groups_by_type('IS_REPRESENTATIVE');
$list = $aag_adaptor->fetch_all_Groups();
my $dbID = $aag_adaptor->store($aag);
$aag = $aag_adaptor->fetch_Group_by_id($dbID);
$aag_adaptor->remove($aag);
=head1 DESCRIPTION
The AltAlleleGroupAdaptor provides CRUD for AltAlleleGroup objects. It allows
groups of alleles to be retrieved by group and gene ids.
=cut
package Bio::EnsEMBL::DBSQL::AltAlleleGroupAdaptor;
use strict;
use warnings;
use base qw/Bio::EnsEMBL::DBSQL::BaseAdaptor/;
use Bio::EnsEMBL::AltAlleleGroup;
use Bio::EnsEMBL::Utils::Exception;
use DBI qw( :sql_types );
=head2 fetch_all_Groups
Arg[1] : (optional) String - type of group
Description : Fetches all the alt-allele groups, creates objects to represent
them and returns them in a list
Multispecies support is triggered by the is_multispecies flag
and species_id of the DBAdaptor.
Specifying a group type identifies all groups containing a
member of this type. It does not filter out the other members
Returntype : Listref of Bio::EnsEMBL::AltAlleleGroup
=cut
sub fetch_all_Groups {
my $self = shift;
my $type = shift;
$type = uc($type) if ($type);
my @group_list = ();
my @members;
my $species_id;
my $get_all_sql;
if ($self->db->is_multispecies()) {
# multispecies databases must be restricted in their treatment
$species_id = $self->db->species_id;
if ($type) {
$get_all_sql = q(
SELECT DISTINCT alt_allele_group_id FROM alt_allele a
JOIN (gene g, seq_region s, coord_system c, alt_allele_attrib b)
ON (
c.coord_system_id = s.coord_system_id
AND s.seq_region_id = g.seq_region_id
AND g.gene_id = a.gene_id
AND a.alt_allele_id = b.alt_allele_id
)
WHERE c.species_id = ? AND b.attrib = ?
);
}
$get_all_sql = q(
SELECT DISTINCT alt_allele_group_id FROM alt_allele a
JOIN (gene g, seq_region s, coord_system c)
ON (
c.coord_system_id = s.coord_system_id
AND s.seq_region_id = g.seq_region_id
AND g.gene_id = a.gene_id
)
WHERE c.species_id = ?
);
} else {
if ($type) {
$get_all_sql = q(SELECT DISTINCT alt_allele_group_id
FROM alt_allele a, alt_allele_attrib b
WHERE a.alt_allele_id = b.alt_allele_id
AND b.attrib = ?);
} else {
$get_all_sql = q(SELECT DISTINCT alt_allele_group_id FROM alt_allele);
}
}
my $sth = $self->prepare($get_all_sql);
my $x = 1;
if ($self->db->is_multispecies()) {
$sth->bind_param($x,$species_id, SQL_INTEGER);
$x++;
}
$sth->bind_param($x,$type, SQL_VARCHAR) if ($type);
eval { $sth->execute() };
if ($@) {
throw("Query error in AltAlleleGroupAdaptor: $@");
}
my $group_id;
$sth->bind_col(1, \$group_id );
while ( $sth->fetch() ) {
my $aag = $self->fetch_Group_by_id($group_id);
push @group_list, $aag;
}
$sth->finish;
return \@group_list;
}
=head2 fetch_all_Groups_by_type
Arg[1] : String - type of group
Description : Convenience method for restricting group fetches to just one
type. Technically it selects out mixed-annotation groups where
a single member contains that type.
Returntype : Listref of Bio::EnsEMBL::AltAlleleGroup
=cut
sub fetch_all_Groups_by_type {
my $self = shift;
my $type = shift; # refers to alt_allele_attrib type
my $group_list = $self->fetch_all_Groups($type);
return $group_list;
}
=head2 fetch_Group_by_id
Arg[1] : AltAlleleGroup dbID.
Description : Creates and returns an AltAlleleGroup for the given group id
Returntype : Bio::EnsEMBL::AltAlleleGroup
=cut
sub fetch_Group_by_id {
my $self = shift;
my $group_id = shift;
my @members;
my $get_alt_allele_sql = q(
SELECT alt_allele_id, gene_id FROM alt_allele
WHERE alt_allele_group_id = ? ORDER BY alt_allele_id
);
my $sth = $self->prepare($get_alt_allele_sql);
$sth->bind_param(1,$group_id, SQL_INTEGER);
$sth->execute();
my ($alt_allele_id, $gene_id);
$sth->bind_columns( \($alt_allele_id,$gene_id) );
my $attrib_fetch = q(
SELECT attrib FROM alt_allele_attrib WHERE alt_allele_id = ?
);
my $attrib_sth = $self->prepare($attrib_fetch);
my $attrib;
while ($sth->fetch()) {
# fetch alt_allele attributes
$attrib_sth->execute($alt_allele_id);
$attrib_sth->bind_col(1,\$attrib);
my %attrib_list;
while ($attrib_sth->fetch) {
$attrib_list{$attrib} = 1;
}
push @members,[$gene_id, \%attrib_list];
}
$attrib_sth->finish;
$sth->finish;
if ($group_id && scalar(@members) > 0) {
my $aag = Bio::EnsEMBL::AltAlleleGroup->new(
-dbID => $group_id,
-MEMBERS => \@members,
-ADAPTOR => $self,
);
return $aag;
}
return;
}
sub fetch_Group_by_Gene_dbID {
my $self = shift;
my $gene_id = shift;
my $gene_id_sql = q(
SELECT alt_allele_group_id FROM alt_allele
WHERE gene_id = ?
);
my $sth = $self->prepare($gene_id_sql);
$sth->bind_param(1,$gene_id, SQL_INTEGER);
my $group_id;
$sth->execute();
$sth->bind_col(1,\$group_id);
$sth->fetch;
$sth->finish;
if (!$@ && $group_id) {
return $self->fetch_Group_by_id($group_id);
}
}
=head2 store
Arg[0] : Bio::EnsEMBL::AltAlleleGroup
Description: Used for persisting new groups to the database.
It updates the dbID of the object handed to it to match the
database.
Returntype : Integer Alt Allele Group id
=cut
sub store {
my $self = shift;
my $allele_group = shift;
if (ref($allele_group) ne "Bio::EnsEMBL::AltAlleleGroup") {
throw ("Can only store Bio::EnsEMBL::AltAlleleGroup objects.");
} else {
if ($allele_group->size < 2) {
warning('At least 2 genes must be provided to construct alternative alleles. Ignoring.');
return;
}
my $dbID = $allele_group->dbID;
my $new_group_sth = $self->prepare("INSERT INTO alt_allele_group (alt_allele_group_id) VALUES (?)");
my $group_sth = $self->prepare("SELECT alt_allele_group_id FROM alt_allele_group WHERE alt_allele_group_id = ?");
my $altered_rows;
# Do not create a new group ID if one already exists, such as when updating a group.
my $existing_rows = $group_sth->execute($dbID);
if ($existing_rows == 0) {
$altered_rows = $new_group_sth->execute($dbID);
if ($altered_rows > 0) {
$dbID = $self->last_insert_id(undef,undef,undef,'alt_allele_group');
$allele_group->dbID($dbID);
}
}
my $sth = $self->prepare("INSERT INTO alt_allele (alt_allele_id, alt_allele_group_id, gene_id) VALUES (?,?,?)");
my $attrib_sth = $self->prepare("INSERT INTO alt_allele_attrib (alt_allele_id,attrib) VALUES (?,?)");
foreach my $allele (@{ $allele_group->get_all_members() }) {
my $gene_id = $allele->[0];
my %flags = %{$allele->[1]};
$sth->bind_param(1, undef, SQL_INTEGER);
$sth->bind_param(2, $dbID, SQL_INTEGER