Skip to content
Snippets Groups Projects
Commit 06476068 authored by Abel Ureta-Vidal's avatar Abel Ureta-Vidal
Browse files

Slightly modified restrict_between_positions method. Added alignment_strings method.

parent 89b4f49d
No related branches found
No related tags found
No related merge requests found
......@@ -63,16 +63,20 @@ sub _query_unit {
=head2 restrict_between_positions
Arg 1 : int $start
Arg 2 : int $end
Arg 3 : string $sequence_name
The Feature can either be restricted on Ensembl coords
or on hit coords. The sequence_name decides which one.
Example : none
Description: Makes a new AlignFeature that sits in the new coords
Returntype : DnaDnaAlignFeature
Exceptions : returns undef if it cant do the job
Caller : general
Arg [1] : int $start
Arg [2] : int $end
Arg [3] : string $flags
SEQ = $start and $end apply to the seq sequence
i.e. start and end methods
HSEQ = $start and $end apply to the hseq sequence
i.e. hstart and hend methods
Example : $daf->restrict_between_positions(150,543,"SEQ")
Description: Build a new DnaDnaAlignFeature object that fits within
the new specified coordinates and sequence reference, cutting
any pieces hanging upstream and downstream.
Returntype : Bio::EnsEMBL::DnaDnaAlignFeature object
Exceptions :
Caller :
=cut
......@@ -86,16 +90,17 @@ sub restrict_between_positions {
$self->throw("The second argument is not defined or is not an integer");
}
unless (defined $seqref &&
($seqref eq "seqname" || $seqref eq "hseqname")) {
$self->throw("The third argument is not defined or is not equal to 'seqname' or 'hseqname'");
($seqref eq "SEQ" || $seqref eq "HSEQ")) {
$self->throw("The third argument is not defined or is not equal to 'SEQ' or 'HSEQ'");
}
# symbolic method references should be forbidden!
# need to be rewrite at some stage.
my ($start_method1,$end_method1,$strand_method1,$start_method2,$end_method2,$strand_method2) =
qw(start end strand hstart hend hstrand);
if ($seqref eq "hseqname") {
if ($seqref eq "HSEQ") {
($start_method1,$end_method1,$strand_method1,$start_method2,$end_method2,$strand_method2) =
qw(hstart hend hstrand start end strand);
}
......@@ -168,4 +173,86 @@ sub restrict_between_positions {
}
}
=head2 alignment_strings
Arg [1] : list of string $flags
FIX_SEQ = does not introduce gaps (dashes) in seq aligned sequence
and delete the corresponding insertions in hseq aligned sequence
FIX_HSEQ = does not introduce gaps (dashes) in hseq aligned sequence
and delete the corresponding insertions in seq aligned sequence
NO_SEQ = return the seq aligned sequence as an empty string
NO_HSEQ = return the hseq aligned sequence as an empty string
This 2 last flags would save a bit of time as doing so no querying to the core
database in done to get the sequence.
Example : $daf->alignment_strings or
$daf->alignment_strings("FIX_HSEQ") or
$daf->alignment_strings("NO_SEQ","FIX_SEQ")
Description: Allows to rebuild the alignment string of both the seq and hseq sequence
using the cigar_string information and the slice and hslice objects
Returntype : array reference containing 2 strings
the first corresponds to seq
the second corresponds to hseq
Exceptions :
Caller :
=cut
sub alignment_strings {
my ( $self, @flags ) = @_;
# set the flags
my $seq_flag = 1;
my $hseq_flag = 1;
my $fix_seq_flag = 0;
my $fix_hseq_flag = 0;
for my $flag ( @flags ) {
$seq_flag = 0 if ($flag eq "NO_SEQ");
$hseq_flag = 0 if ($flag eq "NO_HSEQ");
$fix_seq_flag = 1 if ($flag eq "FIX_SEQ");
$fix_hseq_flag = 1 if ($flag eq "FIX_HSEQ");
}
my ($seq, $hseq);
$seq = $self->slice->subseq($self->start, $self->end) if ($seq_flag || $fix_seq_flag);
$hseq = $self->hslice->subseq($self->hstart, $self->hend) if ($hseq_flag || $fix_hseq_flag);
my $rseq= "";
# rseq - result sequence
my $rhseq= "";
# rhseq - result hsequence
my $seq_pos = 0;
my $hseq_pos = 0;
my @cig = ( $self->cigar_string =~ /(\d*[DIM])/g );
for my $cigElem ( @cig ) {
my $cigType = substr( $cigElem, -1, 1 );
my $cigCount = substr( $cigElem, 0 ,-1 );
$cigCount = 1 unless $cigCount;
if( $cigType eq "M" ) {
$rseq .= substr( $seq, $seq_pos, $cigCount ) if ($seq_flag);
$rhseq .= substr( $hseq, $hseq_pos, $cigCount ) if ($hseq_flag);
$seq_pos += $cigCount;
$hseq_pos += $cigCount;
} elsif( $cigType eq "D" ) {
if( ! $fix_seq_flag ) {
$rseq .= "-" x $cigCount if ($seq_flag);
$rhseq .= substr( $hseq, $hseq_pos, $cigCount ) if ($hseq_flag);
}
$hseq_pos += $cigCount;
} elsif( $cigType eq "I" ) {
if( ! $fix_hseq_flag ) {
$rseq .= substr( $seq, $seq_pos, $cigCount ) if ($seq_flag);
$rhseq .= "-" x $cigCount if ($hseq_flag);
}
$seq_pos += $cigCount;
}
}
return [ $rseq,$rhseq ];
}
1;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment