From 85b9cea078a9838ac1a8cc0f1e5ff770c2a292e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kusalananda=20K=C3=A4h=C3=A4ri?= <ak4@sanger.ac.uk> Date: Fri, 27 Aug 2010 10:38:25 +0000 Subject: [PATCH] In map_coordinates(): Fix documentation and argument checking error message. From Michael S. Also, reformat method. --- modules/Bio/EnsEMBL/Mapper.pm | 347 +++++++++++++++++----------------- 1 file changed, 170 insertions(+), 177 deletions(-) diff --git a/modules/Bio/EnsEMBL/Mapper.pm b/modules/Bio/EnsEMBL/Mapper.pm index 2b07b5b4bd..a60dc33cc8 100644 --- a/modules/Bio/EnsEMBL/Mapper.pm +++ b/modules/Bio/EnsEMBL/Mapper.pm @@ -138,7 +138,7 @@ sub flush { end coordinate of 'source' sequence Arg 4 int $strand raw contig orientation (+/- 1) - Arg 5 int $type + Arg 5 string $type nature of transform - gives the type of coordinates to be transformed *from* Function generic map method @@ -149,209 +149,202 @@ sub flush { =cut -sub map_coordinates{ - my ($self, $id, $start, $end, $strand, $type) = @_; - - unless(defined($id) && defined($start) && defined($end) && - defined($strand) && defined($type) ) { - throw("Must start,end,strand,id,type as coordinates"); - } - - - - - - # special case for handling inserts: - if($start == $end + 1) { - return $self->map_insert($id, $start, $end, $strand, $type); - } - - if( ! $self->{'_is_sorted'} ) { $self->_sort() } - - my $hash = $self->{"_pair_$type"}; +sub map_coordinates { + my ( $self, $id, $start, $end, $strand, $type ) = @_; - my ($from, $to, $cs); + unless ( defined($id) + && defined($start) + && defined($end) + && defined($strand) + && defined($type) ) + { + throw("Expecting 5 arguments"); + } - if($type eq $self->{'to'}) { - $from = 'to'; - $to = 'from'; - $cs = $self->{'from_cs'}; - } else { - $from = 'from'; - $to = 'to'; - $cs = $self->{'to_cs'}; - } + # special case for handling inserts: + if ( $start == $end + 1 ) { + return $self->map_insert( $id, $start, $end, $strand, $type ); + } - unless(defined $hash) { - throw("Type $type is neither to or from coordinate systems"); - } + if ( !$self->{'_is_sorted'} ) { $self->_sort() } - if( !defined $hash->{uc($id)} ) { - # one big gap! - my $gap = Bio::EnsEMBL::Mapper::Gap->new($start, $end); - return $gap; - } + my $hash = $self->{"_pair_$type"}; - my $last_used_pair; - my @result; + my ( $from, $to, $cs ); - my ( $start_idx, $end_idx, $mid_idx, $pair, $self_coord ); - my $lr = $hash->{uc($id)}; - - $start_idx = 0; - $end_idx = $#$lr; - - # binary search the relevant pairs - # helps if the list is big - while(( $end_idx - $start_idx ) > 1 ) { - $mid_idx = ($start_idx+$end_idx)>>1; - $pair = $lr->[$mid_idx]; - $self_coord = $pair->{$from}; - if( $self_coord->{'end'} < $start ) { - $start_idx = $mid_idx; - } else { - $end_idx = $mid_idx; - } - } + if ( $type eq $self->{'to'} ) { + $from = 'to'; + $to = 'from'; + $cs = $self->{'from_cs'}; + } else { + $from = 'from'; + $to = 'to'; + $cs = $self->{'to_cs'}; + } - - my $rank = 0; - my $orig_start = $start; - my $last_target_coord = undef; - for( my $i = $start_idx; $i<=$#$lr; $i++ ) { - $pair = $lr->[$i]; - my $self_coord = $pair->{$from}; - my $target_coord = $pair->{$to}; + unless ( defined $hash ) { + throw("Type $type is neither to or from coordinate systems"); + } + if ( !defined $hash->{ uc($id) } ) { + # one big gap! + my $gap = Bio::EnsEMBL::Mapper::Gap->new( $start, $end ); + return $gap; + } + my $last_used_pair; + my @result; -# -# But not the case for haplotypes!! need to test for this case??? -# so removing this till a better solution is found -# -# -# if($self_coord->{'start'} < $start){ -# $start = $orig_start; -# $rank++; -# } + my ( $start_idx, $end_idx, $mid_idx, $pair, $self_coord ); + my $lr = $hash->{ uc($id) }; + $start_idx = 0; + $end_idx = $#$lr; - if(defined($last_target_coord) and $target_coord->{'id'} ne $last_target_coord){ - if($self_coord->{'start'} < $start){ # i.e. the same bit is being mapped to another assembled bit - $start = $orig_start; - } - } - else{ - $last_target_coord = $target_coord->{'id'}; - } + # binary search the relevant pairs + # helps if the list is big + while ( ( $end_idx - $start_idx ) > 1 ) { + $mid_idx = ( $start_idx + $end_idx ) >> 1; + $pair = $lr->[$mid_idx]; + $self_coord = $pair->{$from}; + if ( $self_coord->{'end'} < $start ) { + $start_idx = $mid_idx; + } else { + $end_idx = $mid_idx; + } + } + my $rank = 0; + my $orig_start = $start; + my $last_target_coord = undef; + for ( my $i = $start_idx; $i <= $#$lr; $i++ ) { + $pair = $lr->[$i]; + my $self_coord = $pair->{$from}; + my $target_coord = $pair->{$to}; + + # + # But not the case for haplotypes!! need to test for this case??? + # so removing this till a better solution is found + # + # + # if($self_coord->{'start'} < $start){ + # $start = $orig_start; + # $rank++; + # } + + if ( defined($last_target_coord) + and $target_coord->{'id'} ne $last_target_coord ) + { + if ( $self_coord->{'start'} < $start ) + { # i.e. the same bit is being mapped to another assembled bit + $start = $orig_start; + } + } else { + $last_target_coord = $target_coord->{'id'}; + } + # if we haven't even reached the start, move on + if ( $self_coord->{'end'} < $orig_start ) { + next; + } + # if we have over run, break + if ( $self_coord->{'start'} > $end ) { + last; + } + if ( $start < $self_coord->{'start'} ) { + # gap detected + my $gap = Bio::EnsEMBL::Mapper::Gap->new( $start, + $self_coord->{'start'} - 1, $rank ); + push( @result, $gap ); + $start = $gap->{'end'} + 1; + } + my ( $target_start, $target_end, $target_ori ); + my $res; + if ( exists $pair->{'indel'} ) { + # When next pair is an IndelPair and not a Coordinate, create the + # new mapping Coordinate, the IndelCoordinate. + $target_start = $target_coord->{'start'}; + $target_end = $target_coord->{'end'}; + + #create a Gap object + my $gap = Bio::EnsEMBL::Mapper::Gap->new( $start, + ( $self_coord->{'end'} < $end ? $self_coord->{'end'} : $end ) ); + #create the Coordinate object + my $coord = + Bio::EnsEMBL::Mapper::Coordinate->new( $target_coord->{'id'}, + $target_start, $target_end, $pair->{'ori'}*$strand, $cs ); + #and finally, the IndelCoordinate object with + $res = Bio::EnsEMBL::Mapper::IndelCoordinate->new( $gap, $coord ); + } else { + # start is somewhere inside the region + if ( $pair->{'ori'} == 1 ) { + $target_start = + $target_coord->{'start'} + + ( $start - $self_coord->{'start'} ); + } else { + $target_end = + $target_coord->{'end'} - ( $start - $self_coord->{'start'} ); + } - # if we haven't even reached the start, move on - if( $self_coord->{'end'} < $orig_start ) { - next; - } - - # if we have over run, break - if( $self_coord->{'start'} > $end ) { - last; - } - - if( $start < $self_coord->{'start'} ) { - # gap detected - my $gap = Bio::EnsEMBL::Mapper::Gap->new($start, - $self_coord->{'start'}-1, $rank); + # Either we are enveloping this map or not. If yes, then end + # point (self perspective) is determined solely by target. If + # not we need to adjust. + + if ( $end > $self_coord->{'end'} ) { + # enveloped + if ( $pair->{'ori'} == 1 ) { + $target_end = $target_coord->{'end'}; + } else { + $target_start = $target_coord->{'start'}; + } + } else { + # need to adjust end + if ( $pair->{'ori'} == 1 ) { + $target_end = + $target_coord->{'start'} + + ( $end - $self_coord->{'start'} ); + } else { + $target_start = + $target_coord->{'end'} - ( $end - $self_coord->{'start'} ); + } + } - push(@result,$gap); - $start = $gap->{'end'}+1; - } - my ($target_start,$target_end,$target_ori); - my $res; - if (exists $pair->{'indel'}){ - #when next pair is an IndelPair and not a Coordinate, create the new mapping Coordinate, the IndelCoordinate - $target_start = $target_coord->{'start'}; - $target_end = $target_coord->{'end'}; - - #create a Gap object - my $gap = Bio::EnsEMBL::Mapper::Gap->new($start, ($self_coord->{'end'} < $end ? $self_coord->{'end'} : $end)); - #create the Coordinate object - my $coord = Bio::EnsEMBL::Mapper::Coordinate->new($target_coord->{'id'}, - $target_start, - $target_end, - $pair->{'ori'} * $strand, - $cs); - #and finally, the IndelCoordinate object with - $res = Bio::EnsEMBL::Mapper::IndelCoordinate->new($gap,$coord); - } - else{ - # start is somewhere inside the region - if( $pair->{'ori'} == 1 ) { - $target_start = - $target_coord->{'start'} + ($start - $self_coord->{'start'}); - } else { - $target_end = - $target_coord->{'end'} - ($start - $self_coord->{'start'}); - } - - # either we are enveloping this map or not. If yes, then end - # point (self perspective) is determined solely by target. If not - # we need to adjust - - if( $end > $self_coord->{'end'} ) { - # enveloped - if( $pair->{'ori'} == 1 ) { - $target_end = $target_coord->{'end'}; - } else { - $target_start = $target_coord->{'start'}; - } - } else { - # need to adjust end - if( $pair->{'ori'} == 1 ) { - $target_end = - $target_coord->{'start'} + ($end - $self_coord->{'start'}); - } else { - $target_start = - $target_coord->{'end'} - ($end - $self_coord->{'start'}); - } - } - - $res = Bio::EnsEMBL::Mapper::Coordinate->new($target_coord->{'id'}, - $target_start, - $target_end, - $pair->{'ori'} * $strand, - $cs, $rank); + $res = + Bio::EnsEMBL::Mapper::Coordinate->new( $target_coord->{'id'}, + $target_start, $target_end, $pair->{'ori'}*$strand, + $cs, $rank ); - } + } ## end else [ if ( exists $pair->{'indel'...})] - push(@result,$res); - - $last_used_pair = $pair; - $start = $self_coord->{'end'}+1; + push( @result, $res ); - } + $last_used_pair = $pair; + $start = $self_coord->{'end'} + 1; - if( !defined $last_used_pair ) { - my $gap = Bio::EnsEMBL::Mapper::Gap->new($start, $end); - push(@result,$gap); + } ## end for ( my $i = $start_idx...) - } elsif( $last_used_pair->{$from}->{'end'} < $end ) { - # gap at the end - my $gap = Bio::EnsEMBL::Mapper::Gap->new( - $last_used_pair->{$from}->{'end'} + 1, - $end, $rank); - push(@result,$gap); - } + if ( !defined $last_used_pair ) { + my $gap = Bio::EnsEMBL::Mapper::Gap->new( $start, $end ); + push( @result, $gap ); - if ( $strand == -1 ) { - @result = reverse ( @result); - } + } elsif ( $last_used_pair->{$from}->{'end'} < $end ) { + # gap at the end + my $gap = + Bio::EnsEMBL::Mapper::Gap->new( + $last_used_pair->{$from}->{'end'} + 1, + $end, $rank ); + push( @result, $gap ); + } + if ( $strand == -1 ) { + @result = reverse(@result); + } - return @result; -} + return @result; +} ## end sub map_coordinates -- GitLab