From dfe5663c3a055cc0f6d0d2c4c6c84ffca30736c7 Mon Sep 17 00:00:00 2001
From: Eduardo Eyras <eae@sanger.ac.uk>
Date: Thu, 22 May 2003 12:21:54 +0000
Subject: [PATCH] added method to clip sequences on both ends, the length to be
 clipped is passed as an argument

---
 modules/Bio/EnsEMBL/Utils/PolyA.pm | 81 ++++++++----------------------
 1 file changed, 20 insertions(+), 61 deletions(-)

diff --git a/modules/Bio/EnsEMBL/Utils/PolyA.pm b/modules/Bio/EnsEMBL/Utils/PolyA.pm
index b16a593647..59fc71c1af 100644
--- a/modules/Bio/EnsEMBL/Utils/PolyA.pm
+++ b/modules/Bio/EnsEMBL/Utils/PolyA.pm
@@ -124,6 +124,20 @@ sub mask{
 
 ############################################################
 
+# $seq is a string and $length is the length we want to clip from both ends
+
+sub clip_ends{
+  my ($self,$seq,$length) = @_;
+  
+  my $seq_length = length( $seq );
+  my $subseq = substr( $seq, $length, $seq_length - 2*$length );
+
+  return $subseq;
+}
+  
+
+
+############################################################
 sub _find_polyA{
   my ($self, $seq) = @_;
   my $new_seq;
@@ -137,7 +151,6 @@ sub _find_polyA{
   my $t_count = $check_polyT =~ tr/Tt//;
   my $a_count = $check_polyA =~ tr/Aa//;
   
-  
   #### polyA ####
   if ( $a_count >= 5 && $a_count > $t_count ){
     
@@ -293,76 +306,22 @@ sub has_polyA_track{
   my $length = length($seq);
   
   # is it a polyA or polyT?
-  my $check_polyT = substr( $seq, 0, 6 );
+  my $check_polyT = substr( $seq, 0, 10 );
   
-  my $check_polyA = substr( $seq, -6 );
+  my $check_polyA = substr( $seq, -10 );
   
+  print STDERR "polyA: $check_polyA\n";
+
   my $t_count = $check_polyT =~ tr/Tt//;
   my $a_count = $check_polyA =~ tr/Aa//;
   
-  
-  my $length_to_mask = 0;
-  #### polyA ####
-  if ( $a_count >= 5 && $a_count > $t_count ){
-    
-    # we calculate the number of bases we want to chop
-    my $length_to_mask = 0;
-    
-    # we start with 3 bases
-    my ($piece, $count ) = (3,0);
-    
-    # count also the number of Ns, consider the Ns as potential As
-    my $n_count = 0;
-
-    # take 3 by 3 bases from the end
-    while( $length_to_mask < $length ){
-      my $chunk  = substr( $seq, ($length - ($length_to_mask + 3)), $piece);
-      $count   = $chunk =~ tr/Aa//;
-      $n_count = $chunk =~ tr/Nn//;
-      if ( ($count + $n_count) >= 2*( $piece )/3 ){
-	$length_to_mask += 3;
-      }
-      else{
-	last;
-      }
-    }
-  }
-  #### polyT ####
-  elsif( $t_count >=5 && $t_count > $a_count ){
-    
-    # calculate the number of bases to chop
-    my $length_to_mask = -3;
-    
-    # we start with 3 bases:
-    my ($piece, $count) = (3,3);
-    
-    # count also the number of Ns, consider the Ns as potential As
-    my $n_count = 0;
-    
-    # take 3 by 3 bases from the beginning
-    while ( $length_to_mask < $length ){
-      my $chunk = substr( $seq, $length_to_mask + 3, $piece );
-      #print STDERR "length to mask: $length_to_mask\n";
-      #print "chunk: $chunk\n";
-      $count = $chunk =~ tr/Tt//;
-       $n_count = $chunk =~ tr/Nn//;
-      if ( ($count+$n_count)  >= 2*( $piece )/3 ){
-	$length_to_mask +=3;
-      }
-      else{
-	last;
-	
-      }
-    }
-  }
-  
-  if ( $length_to_mask >= 5 ){
+  ## testing with this short cut
+  if ( $a_count >=7 || $t_count >=7 ){
     return 1;
   }
   else{
     return 0;
   }
-
 }
 
 
-- 
GitLab