Commit 14c33f4e authored by premanand17's avatar premanand17
Browse files

Updates to add align_type column to dna_align_feature and...

Updates to add align_type column to dna_align_feature and protein_align_feature tables to support multiple align types and remove external_data column from dna_align_feature table - ENSCORESW-1937
parent a6b79aea
......@@ -44,7 +44,8 @@ implmentation for alignment features
-hstart => 200,
-hend => 220,
-analysis => $analysis,
-cigar_string => '10M3D5M2I'
-cigar_string => '10M3D5M2I',
-align_type => 'ensembl'
);
where $analysis is a Bio::EnsEMBL::Analysis object.
......@@ -154,11 +155,11 @@ use strict;
=head2 new
Arg [..] : List of named arguments. (-cigar_string , -features) defined
Arg [..] : List of named arguments. (-cigar_string , -features, -align_type) defined
in this constructor, others defined in FeaturePair and
SeqFeature superclasses. Either cigar_string or a list
of ungapped features should be provided - not both.
Example : $baf = new BaseAlignFeatureSubclass(-cigar_string => '3M3I12M');
Example : $baf = new BaseAlignFeatureSubclass(-cigar_string => '3M3I12M', -align_type => 'ensembl');
Description: Creates a new BaseAlignFeature using either a cigar string or
a list of ungapped features. BaseAlignFeature is an abstract
baseclass and should not actually be instantiated - rather its
......@@ -166,6 +167,7 @@ use strict;
Returntype : Bio::EnsEMBL::BaseAlignFeature
Exceptions : thrown if both feature and cigar string args are provided
thrown if neither feature nor cigar string args are provided
warn if cigar string is provided without cigar type
Caller : general
Status : Stable
......@@ -179,7 +181,14 @@ sub new {
my $self = $class->SUPER::new(@_);
my ($cigar_string,$features) = rearrange([qw(CIGAR_STRING FEATURES)], @_);
my ($cigar_string,$align_type,$features) = rearrange([qw(CIGAR_STRING ALIGN_TYPE FEATURES)], @_);
if (defined($align_type)) {
$self->{'align_type'} = $align_type;
} else {
warning("No align_type provided, using ensembl as default");
$self->{'align_type'} = 'ensembl';
}
if (defined($cigar_string) && defined($features)) {
throw("CIGAR_STRING or FEATURES argument is required - not both.");
......@@ -191,7 +200,7 @@ sub new {
} else {
throw("CIGAR_STRING or FEATURES argument is required");
}
return $self;
}
......@@ -223,6 +232,28 @@ sub cigar_string {
}
=head2 align_type
Arg [1] : type $align_type
Example : $feature->align_type( "ensembl" );
Description: get/set for attribute align_type.
align_type specifies which cigar string
is used to describe the alignment:
The default is 'ensembl'
Returntype : string
Exceptions : none
Caller : general
Status : Stable
=cut
sub align_type {
my $self = shift;
$self->{'align_type'} = shift if(@_);
return $self->{'align_type'};
}
=head2 alignment_length
Arg [1] : None
......@@ -237,8 +268,31 @@ sub cigar_string {
sub alignment_length {
my $self = shift;
if ($self->{'align_type'} eq 'ensembl') {
return $self->_ensembl_cigar_alignment_length();
} else {
throw("No alignment_length method available for " . $self->{'align_type'});
}
}
=head2 _ensembl_cigar_alignment_length
Arg [1] : None
Description: return the alignment length (including indels) based on the cigar_string
Returntype : int
Exceptions :
Caller :
Status : Stable
=cut
sub _ensembl_cigar_alignment_length {
my $self = shift;
if (! defined $self->{'_alignment_length'} && defined $self->cigar_string) {
my @pieces = ( $self->cigar_string =~ /(\d*[MDI])/g );
unless (@pieces) {
print STDERR "Error parsing cigar_string\n";
......@@ -308,10 +362,11 @@ sub strands_reversed {
return $self->{'strands_reversed'};
}
=head2 reverse_complement
Args : none
Description: reverse complement the FeaturePair,
Description: reverse complement the FeaturePair based on the cigar type
modifing strand, hstrand and cigar_string in consequence
Returntype : none
Exceptions : none
......@@ -320,9 +375,32 @@ sub strands_reversed {
=cut
sub reverse_complement {
my ($self) = @_;
if ($self->{'align_type'} eq 'ensembl') {
return $self->_ensembl_reverse_complement();
} else {
throw("no reverse_complement method implemented for " . $self->{'align_type'});
}
}
=head2 _ensembl_reverse_complement
Args : none
Description: reverse complement the FeaturePair for ensembl cigar string,
modifing strand, hstrand and cigar_string in consequence
Returntype : none
Exceptions : none
Caller : general
Status : Stable
=cut
sub _ensembl_reverse_complement {
my ($self) = @_;
# reverse strand in both sequences
$self->strand($self->strand * -1);
$self->hstrand($self->hstrand * -1);
......@@ -385,7 +463,7 @@ sub transform {
my @segments = @{ $self->project(@_) };
if ( !@segments ) {
return;
return undef;
}
my @ungapped;
......@@ -396,7 +474,7 @@ sub transform {
} else {
warning( "Failed to transform alignment feature; "
. "ungapped component could not be transformed" );
return;
return undef;
}
}
......@@ -404,7 +482,7 @@ sub transform {
if ($@) {
warning($@);
return;
return undef;
}
} ## end if ( !defined($new_feature...))
......@@ -412,11 +490,11 @@ sub transform {
}
=head2 _parse_cigar
=head2 _parse_ensembl_cigar
Args : none
Description: PRIVATE (internal) method - creates ungapped features from
internally stored cigar line
internally stored cigar line in ensembl format
Returntype : list of Bio::EnsEMBL::FeaturePair
Exceptions : none
Caller : ungapped_features
......@@ -424,7 +502,7 @@ sub transform {
=cut
sub _parse_cigar {
sub _parse_ensembl_cigar {
my ( $self ) = @_;
my $query_unit = $self->_query_unit();
......@@ -559,6 +637,16 @@ sub _parse_cigar {
}
sub _parse_cigar {
my $self = shift;
if ($self->{'align_type'} eq 'ensembl') {
return $self->_parse_ensembl_cigar();
} else {
throw("No parsing method implemented for " . $self->{'align_type'});
}
}
=head2 _parse_features
......@@ -574,9 +662,18 @@ sub _parse_cigar {
=cut
sub _parse_features {
my ($self, $features) = @_;
if ($self->{'align_type'} eq 'ensembl') {
$self->_parse_ensembl_features($features);
} else {
throw("No _parse_features method implemented for " . $self->{'align_type'});
}
}
my $message_only_once = 1;
sub _parse_features {
sub _parse_ensembl_features {
my ($self,$features ) = @_;
my $query_unit = $self->_query_unit();
......@@ -604,7 +701,7 @@ sub _parse_features {
my $hstrand = $f[0]->hstrand;
my $slice = $f[0]->slice();
my $hslice = $f[0]->hslice();
my $hslice = $f[0]->hslice();
my $name = $slice ? $slice->name() : undef;
my $hname = $f[0]->hseqname;
my $score = $f[0]->score;
......
......@@ -119,7 +119,7 @@ sub _columns {
daf.score
daf.external_db_id
daf.hcoverage
daf.external_data
daf.align_type
exdb.db_name
exdb.db_display_name);
}
......@@ -161,8 +161,8 @@ sub store {
seq_region_end, seq_region_strand,
hit_start, hit_end, hit_strand, hit_name,
cigar_line, analysis_id, score, evalue,
perc_ident, external_db_id, hcoverage, external_data)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,NULL)" # 16 arguments, external data is being removed
perc_ident, external_db_id, hcoverage, align_type)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" # 16 arguments, external_data removed and align_type added
);
FEATURE:
......@@ -188,6 +188,7 @@ FEATURE:
warning( "DnaDnaAlignFeature does not define a cigar_string.\n"
. "Assuming ungapped block with cigar_line=$cigar_string ." );
}
my $align_type = $feat->align_type();
my $hseqname = $feat->hseqname();
if ( !$hseqname ) {
......@@ -223,6 +224,7 @@ FEATURE:
$sth->bind_param( 13, $feat->percent_id, SQL_FLOAT );
$sth->bind_param( 14, $feat->external_db_id, SQL_INTEGER );
$sth->bind_param( 15, $feat->hcoverage, SQL_DOUBLE );
$sth->bind_param( 16, $feat->align_type, SQL_VARCHAR);
$sth->execute();
my $dbId = $self->last_insert_id("${tablename}_id", undef, $tablename);
......@@ -335,7 +337,7 @@ sub _objs_from_sth {
$cigar_line, $evalue,
$perc_ident, $score,
$external_db_id, $hcoverage,
$extra_data,
$align_type,
$external_db_name, $external_display_db_name );
$sth->bind_columns( \( $dna_align_feature_id, $seq_region_id,
......@@ -346,7 +348,7 @@ sub _objs_from_sth {
$cigar_line, $evalue,
$perc_ident, $score,
$external_db_id, $hcoverage,
$extra_data,
$align_type,
$external_db_name, $external_display_db_name )
);
......@@ -529,7 +531,7 @@ sub _objs_from_sth {
'dbID' => $dna_align_feature_id,
'external_db_id' => $external_db_id,
'hcoverage' => $hcoverage,
'extra_data' => undef,
'align_type' => $align_type,
'dbname' => $external_db_name,
'db_display_name' => $external_display_db_name
} ) );
......
......@@ -93,8 +93,8 @@ sub store{
"INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end,
seq_region_strand, hit_start, hit_end,
hit_name, cigar_line,
analysis_id, score, evalue, perc_ident, external_db_id, hcoverage)
VALUES (?,?,?,?,?,?,?,?,?,?, ?, ?, ?, ?)");
analysis_id, score, evalue, perc_ident, external_db_id, hcoverage, align_type)
VALUES (?,?,?,?,?,?,?,?,?,?, ?, ?, ?, ?,?)");
FEATURE: foreach my $feat ( @feats ) {
if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaPepAlignFeature") ) {
......@@ -157,6 +157,7 @@ sub store{
$sth->bind_param(12,$feat->percent_id,SQL_REAL);
$sth->bind_param(13,$feat->external_db_id,SQL_INTEGER);
$sth->bind_param(14,$feat->hcoverage,SQL_DOUBLE);
$sth->bind_param(15,$feat->align_type,SQL_VARCHAR);
$sth->execute();
my $dbId = $self->last_insert_id("${tablename}_id", undef, $tablename);
......@@ -207,7 +208,7 @@ sub _objs_from_sth {
$seq_region_end, $analysis_id, $seq_region_strand,
$hit_start, $hit_end, $hit_name,
$cigar_line, $evalue, $perc_ident,
$score, $external_db_id, $hcoverage,
$score, $external_db_id, $hcoverage, $align_type,
$external_db_name, $external_display_db_name );
$sth->bind_columns(\(
......@@ -215,7 +216,7 @@ sub _objs_from_sth {
$seq_region_end, $analysis_id, $seq_region_strand,
$hit_start, $hit_end, $hit_name,
$cigar_line, $evalue, $perc_ident,
$score, $external_db_id, $hcoverage,
$score, $external_db_id, $hcoverage, $align_type,
$external_db_name, $external_display_db_name ));
my $dest_slice_start;
......@@ -399,6 +400,7 @@ sub _objs_from_sth {
'dbID' => $protein_align_feature_id,
'external_db_id' => $external_db_id,
'hcoverage' => $hcoverage,
'align_type' => $align_type,
'dbname' => $external_db_name,
'db_display_name' => $external_display_db_name
} ) );
......@@ -436,6 +438,7 @@ sub _columns {
paf.score
paf.external_db_id
paf.hcoverage
paf.align_type
exdb.db_name
exdb.db_display_name);
}
......
......@@ -255,6 +255,30 @@ sub restrict_between_positions {
sub alignment_strings {
my ($self, @flags) = @_;
if ($self->align_type eq 'ensembl') {
return $self->_ensembl_alignment_strings(@flags);
} else {
throw("alignment_strings method not implemented for " . $self->align_type);
}
}
=head2 _ensembl_alignment_strings
Arg [1] : list of string $flags
Description: Allows to rebuild the alignment string of both the seq and hseq sequence
using the cigar_string information for ensembl cigar strings
Returntype : array reference containing 2 strings
the first corresponds to seq
the second corresponds to hseq
Exceptions :
Caller :
Status : Stable
=cut
sub _ensembl_alignment_strings {
my ( $self, @flags ) = @_;
# set the flags
......@@ -333,6 +357,29 @@ sub alignment_strings {
sub get_SimpleAlign {
my ( $self, @flags ) = @_;
if ($self->align_type eq 'ensembl') {
return $self->_ensembl_SimpleAlign();
} else {
throw("No get_SimpleAlign method implemented for " . $self->align_type);
}
}
=head2 _ensembl_SimpleAlign
Arg [1] : list of string $flags
Description: Internal method to build alignment string
for ensembl type cigar strings
using the cigar_string information and the slice and hslice objects
Returntype : a Bio::SimpleAlign object
Exceptions :
Caller :
Status : Stable
=cut
sub _ensembl_SimpleAlign {
my ( $self, @flags ) = @_;
# setting the flags
my $uc = 0;
my $translated = 0;
......
......@@ -15,7 +15,7 @@
use strict;
use Test::More;
use Test::Warnings;
use Test::Warnings qw(warning);
use Bio::EnsEMBL::Test::MultiTestDB;
use Bio::EnsEMBL::DnaDnaAlignFeature;
......@@ -92,7 +92,6 @@ is(@$feats, 452, "Found 452 features");
#
# Test store
#
$multi->save('core', 'dna_align_feature', 'attrib_type', 'dna_align_feature_attrib');
my $analysis = $feat->analysis;
......@@ -109,9 +108,11 @@ my $score = 80;
my $percent_id = 90;
my $evalue = 23.2;
my $cigar_string = '100M';
my $align_type = 'ensembl';
my $hcoverage = 99.5;
my $external_db_id = 2200;
warning {
$feat = Bio::EnsEMBL::DnaDnaAlignFeature->new
(-START => $start,
-END => $end,
......@@ -128,6 +129,7 @@ $feat = Bio::EnsEMBL::DnaDnaAlignFeature->new
-ANALYSIS => $analysis,
-HCOVERAGE => $hcoverage,
-EXTERNAL_DB_ID => $external_db_id);
};
my $attrib = Bio::EnsEMBL::Attribute->new(
-NAME => 'test_daf_name',
......@@ -194,5 +196,4 @@ sub print_features {
}
}
}
done_testing();
......@@ -15,7 +15,7 @@
use strict;
use Test::More;
use Test::Warnings;
use Test::Warnings qw(warning);
use Bio::EnsEMBL::Test::MultiTestDB;
use Bio::EnsEMBL::DnaDnaAlignFeature;
use Bio::EnsEMBL::FeaturePair;
......@@ -68,7 +68,8 @@ push @feats, Bio::EnsEMBL::FeaturePair->new
#
# Test DnaDnaAlignFeature::new(-features)
#
my $dnaf = Bio::EnsEMBL::DnaDnaAlignFeature->new( -features => \@feats );
my $dnaf;
warning { $dnaf = Bio::EnsEMBL::DnaDnaAlignFeature->new( -features => \@feats ); };
ok(ref($dnaf) && $dnaf->isa('Bio::EnsEMBL::DnaDnaAlignFeature'));
#
......@@ -111,7 +112,6 @@ ok( scalar($dnaf->ungapped_features) == 2);
#
# Test alignment strings
#
my $alignment_strings = $dnaf->alignment_strings('NO_HSEQ');
is($alignment_strings->[0], 'AAATTAAGGG', 'Retrieved query sequence');
is($alignment_strings->[1], '', 'No target sequence');
......@@ -121,11 +121,11 @@ is($alignment_strings->[0], 'AAATTAAGGG', 'Retrieved fixed query sequence');
#
# Test restrict_between_positions
#
my $new_daf = $dnaf->restrict_between_positions($dnaf->start + 2, $dnaf->end, 'SEQ');
my $new_daf;
warning { $new_daf = $dnaf->restrict_between_positions($dnaf->start + 2, $dnaf->end, 'SEQ'); };
is($new_daf->start, $dnaf->start + 2, 'New daf start');
is($new_daf->end, $dnaf->end, 'End has not changed');
$new_daf = $dnaf->restrict_between_positions(1, $dnaf->end - 2, 'SEQ');
warning { $new_daf = $dnaf->restrict_between_positions(1, $dnaf->end - 2, 'SEQ'); };
is($new_daf->start, $dnaf->start, 'No start change if beyond the start');
is($new_daf->end, $dnaf->end - 2, 'New daf end');
......
......@@ -15,7 +15,7 @@
use strict;
use Test::More;
use Test::Warnings;
use Test::Warnings qw(warning);
use Bio::EnsEMBL::Test::MultiTestDB;
use Bio::EnsEMBL::DnaPepAlignFeature;
......@@ -73,7 +73,8 @@ push @feats, Bio::EnsEMBL::FeaturePair->new
#
# Test DnaPepAlignFeature::new(-features)
#
my $dnaf = Bio::EnsEMBL::DnaPepAlignFeature->new( -features => \@feats );
my $dnaf;
warning { $dnaf = Bio::EnsEMBL::DnaPepAlignFeature->new( -features => \@feats ); };
ok(ref($dnaf) && $dnaf->isa('Bio::EnsEMBL::DnaPepAlignFeature'));
#
......
......@@ -49,8 +49,8 @@ my $ontology = Bio::EnsEMBL::Test::MultiTestDB->new('ontology');
my $odb = $ontology->get_DBAdaptor("ontology");
note("Ontology database instatiated");
ok($odb);
my $go_adaptor = $odb->get_OntologyTermAdaptor();
my $go_adaptor;
warning { $go_adaptor = $odb->get_OntologyTermAdaptor(); };
my $gene;
my $ga = $db->get_GeneAdaptor();
......@@ -185,7 +185,8 @@ push(@feats, $fp);
#
# 2 Test DnaDnaAlignFeature::new(-features)
#
my $dnaf = Bio::EnsEMBL::DnaDnaAlignFeature->new(-features => \@feats);
my $dnaf;
warning { $dnaf = Bio::EnsEMBL::DnaDnaAlignFeature->new(-features => \@feats); };
$dnaf->analysis($f_analysis);
$ex1->add_supporting_features($dnaf);
......@@ -230,7 +231,7 @@ push(@feats, $fp);
#
# 2 Test DnaDnaAlignFeature::new(-features)
#
$dnaf = Bio::EnsEMBL::DnaDnaAlignFeature->new(-features => \@feats);
warning { $dnaf = Bio::EnsEMBL::DnaDnaAlignFeature->new(-features => \@feats); };
$dnaf->analysis($f_analysis);
$ex2->add_supporting_features($dnaf);
......@@ -726,7 +727,7 @@ is(scalar(@{$gene->get_all_alt_alleles()}), 0, 'Checking we have no alleles retr
# Gene remove test
#
$multi->save("core", "gene", "transcript", "translation", "protein_feature", "exon", "exon_transcript", "supporting_feature", "object_xref", "ontology_xref", "identity_xref", "dna_align_feature", "protein_align_feature", 'meta_coord');
warning { $multi->save("core", "gene", "transcript", "translation", "protein_feature", "exon", "exon_transcript", "supporting_feature", "object_xref", "ontology_xref", "identity_xref", "dna_align_feature", "protein_align_feature", 'meta_coord'); };
$gene = $ga->fetch_by_stable_id("ENSG00000171456");
......
......@@ -14,7 +14,7 @@
# limitations under the License.
use Test::More;
use Test::Warnings;
use Test::Warnings qw(warning);
use strict;
use warnings;
......@@ -30,7 +30,8 @@ my $multi = Bio::EnsEMBL::Test::MultiTestDB->new();
ok(1);
my $db = $multi->get_DBAdaptor('core');
my $pafa = $db->get_ProteinAlignFeatureAdaptor();
my $pafa;
warning { $pafa = $db->get_ProteinAlignFeatureAdaptor(); };
# list_dbIDs
debug("ProteinAlignFeatureAdaptor->list_dbIDs");
......@@ -96,7 +97,7 @@ print_features($feats);
# Test store
#
$multi->save('core', 'protein_align_feature', 'meta_coord');
warning { $multi->save('core', 'protein_align_feature', 'meta_coord'); };
my $analysis = $feat->analysis;
my $slice =
......@@ -115,6 +116,7 @@ my $cigar_string = '100M';
my $hcoverage = 99.5;
my $external_db_id = 2200;
warning {
$feat = Bio::EnsEMBL::DnaPepAlignFeature->new
(-START => $start,
-END => $end,
......@@ -131,6 +133,7 @@ $feat = Bio::EnsEMBL::DnaPepAlignFeature->new
-ANALYSIS => $analysis,
-HCOVERAGE => $hcoverage,
-EXTERNAL_DB_ID => $external_db_id );
};
ok(!$feat->is_stored($db));
......
--
-- Created by SQL::Translator::Producer::SQLite
-- Created on Fri Sep 22 15:51:19 2017
-- Created on Mon Sep 25 13:13:36 2017