Skip to content
Snippets Groups Projects
Commit dfe5663c authored by Eduardo Eyras's avatar Eduardo Eyras
Browse files

added method to clip sequences on both ends, the length to be clipped is passed as an argument

parent f096291e
No related branches found
No related tags found
No related merge requests found
......@@ -124,6 +124,20 @@ sub mask{
############################################################
# $seq is a string and $length is the length we want to clip from both ends
sub clip_ends{
my ($self,$seq,$length) = @_;
my $seq_length = length( $seq );
my $subseq = substr( $seq, $length, $seq_length - 2*$length );
return $subseq;
}
############################################################
sub _find_polyA{
my ($self, $seq) = @_;
my $new_seq;
......@@ -137,7 +151,6 @@ sub _find_polyA{
my $t_count = $check_polyT =~ tr/Tt//;
my $a_count = $check_polyA =~ tr/Aa//;
#### polyA ####
if ( $a_count >= 5 && $a_count > $t_count ){
......@@ -293,76 +306,22 @@ sub has_polyA_track{
my $length = length($seq);
# is it a polyA or polyT?
my $check_polyT = substr( $seq, 0, 6 );
my $check_polyT = substr( $seq, 0, 10 );
my $check_polyA = substr( $seq, -6 );
my $check_polyA = substr( $seq, -10 );
print STDERR "polyA: $check_polyA\n";
my $t_count = $check_polyT =~ tr/Tt//;
my $a_count = $check_polyA =~ tr/Aa//;
my $length_to_mask = 0;
#### polyA ####
if ( $a_count >= 5 && $a_count > $t_count ){
# we calculate the number of bases we want to chop
my $length_to_mask = 0;
# we start with 3 bases
my ($piece, $count ) = (3,0);
# count also the number of Ns, consider the Ns as potential As
my $n_count = 0;
# take 3 by 3 bases from the end
while( $length_to_mask < $length ){
my $chunk = substr( $seq, ($length - ($length_to_mask + 3)), $piece);
$count = $chunk =~ tr/Aa//;
$n_count = $chunk =~ tr/Nn//;
if ( ($count + $n_count) >= 2*( $piece )/3 ){
$length_to_mask += 3;
}
else{
last;
}
}
}
#### polyT ####
elsif( $t_count >=5 && $t_count > $a_count ){
# calculate the number of bases to chop
my $length_to_mask = -3;
# we start with 3 bases:
my ($piece, $count) = (3,3);
# count also the number of Ns, consider the Ns as potential As
my $n_count = 0;
# take 3 by 3 bases from the beginning
while ( $length_to_mask < $length ){
my $chunk = substr( $seq, $length_to_mask + 3, $piece );
#print STDERR "length to mask: $length_to_mask\n";
#print "chunk: $chunk\n";
$count = $chunk =~ tr/Tt//;
$n_count = $chunk =~ tr/Nn//;
if ( ($count+$n_count) >= 2*( $piece )/3 ){
$length_to_mask +=3;
}
else{
last;
}
}
}
if ( $length_to_mask >= 5 ){
## testing with this short cut
if ( $a_count >=7 || $t_count >=7 ){
return 1;
}
else{
return 0;
}
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment