diff --git a/modules/Bio/EnsEMBL/Utils/SliceUtils.pm b/modules/Bio/EnsEMBL/Utils/SliceUtils.pm new file mode 100644 index 0000000000000000000000000000000000000000..fcd9e658a4efb2133f3949ac95eb4a56f3ff4750 --- /dev/null +++ b/modules/Bio/EnsEMBL/Utils/SliceUtils.pm @@ -0,0 +1,111 @@ +# EnsEMBL module for Bio::EnsEMBL::Utils::SliceUtils +# +# + +=head1 NAME + +Bio::EnsEMBL::Utils::SliceUtils - Utility functions for slices + +=head1 SYNOPSIS + + use Bio::EnsEMBL::Utils::SliceUtils qw(split_Slices); + +=cut + + + +use strict; +use warnings; + +package Bio::EnsEMBL::Utils::SliceUtils; + +use Exporter; + +use vars qw(@ISA @EXPORT_OK); + +@ISA = qw(Exporter); + +@EXPORT_OK = qw(&split_Slices); + +use Bio::EnsEMBL::Utils::Exception qw(throw); +use POSIX; + +=head2 split_Slices + + Arg [1] : ref to list of slices + Arg [2] : int maxlength of sub slices + Arg [3] : int overlap length (optional) + Example : my $sub_slices = split_Slices($slices,$maxlen,$overlap) + Description: splits a slice into smaller slices + Returntype : ref to list of slices + Exceptions : maxlen <1 or overlap < 0 + +=cut + +sub split_Slices{ + my ($slice_big,$max_length,$overlap)=@_; + + if(!defined($max_length) or $max_length < 1){ + throw("maxlength needs to be set and > 0"); + } + + if(!defined($overlap)){ + $overlap = 0; + } + elsif($overlap < 0){ + throw("negative overlaps not allowed"); + } + + my @out=(); + + foreach my $slice (@$slice_big){ + + my $start = 1; + my $end; + my $multiple; + my $number; + my $length = $slice->length; + + if($max_length && ($length > $overlap)) { + #No seq region may be longer than max_length but we want to make + #them all similar size so that the last one isn't much shorter. + #Divide the seq_region into the largest equal pieces that are shorter + #than max_length + + #calculate number of slices to create + $number = ($length-$overlap) / ($max_length-$overlap); + $number = ceil($number); #round up to int + + #calculate length of created slices + $multiple = $length / $number; + $multiple = floor($multiple); #round down to int + } else { + #just one slice of the whole seq_region + $number = 1; + $multiple = $length; + } + + my $i; + for(my $i=0; $i < $number; $i++) { + $end = $start + $multiple + $overlap; + + #any remainder gets added to the last slice of the seq_region + $end = $length if($i == $number-1); + + push @out, Bio::EnsEMBL::Slice->new(-START => $start, + -END => $end, + -STRAND => 1, + -SEQ_REGION_NAME => $slice->seq_region_name, + -SEQ_REGION_LENGTH => $length, + -COORD_SYSTEM => $slice->coord_system, + -ADAPTOR => $slice->adaptor); + $start += $multiple; + } + } + + return \@out; +} + + + +