Skip to content
Snippets Groups Projects
Commit 1cc3d77c authored by Kieron Taylor's avatar Kieron Taylor Committed by GitHub
Browse files

Merge pull request #154 from Ensembl/ENSCORESW-1481

Enscoresw 1481 - allow mapper to return original coordinates as well as mapped coordinates in the result
parents 787af0d0 37bf83ab
No related branches found
No related tags found
No related merge requests found
......@@ -150,8 +150,6 @@ use Bio::EnsEMBL::Mapper::Gap;
use Bio::EnsEMBL::Utils::Exception qw(throw);
# use Data::Dumper;
=head2 new
Arg [1] : string $from
......@@ -230,6 +228,10 @@ sub flush {
Arg 5 string $type
nature of transform - gives the type of
coordinates to be transformed *from*
Arg 6 boolean (0 or 1) $include_original_region
option to include original input coordinate region mappings in the result
Arg 7 int $cdna_coding_start
cdna coding start
Function generic map method
Returntype array of Bio::EnsEMBL::Mapper::Coordinate
and/or Bio::EnsEMBL::Mapper::Gap
......@@ -239,16 +241,18 @@ sub flush {
=cut
sub map_coordinates {
my ( $self, $id, $start, $end, $strand, $type ) = @_;
my ( $self, $id, $start, $end, $strand, $type, $include_original_region, $cdna_coding_start ) = @_;
unless ( defined($id)
&& defined($start)
&& defined($end)
&& defined($strand)
&& defined($type) )
{
throw("Expecting 5 arguments");
throw("Expecting at least 5 arguments");
}
$cdna_coding_start = defined $cdna_coding_start ? $cdna_coding_start : 1;
# special case for handling inserts:
if ( $start == $end + 1 ) {
return $self->map_insert( $id, $start, $end, $strand, $type );
......@@ -282,6 +286,7 @@ sub map_coordinates {
my $last_used_pair;
my @result;
my @paired_result;
my ( $start_idx, $end_idx, $mid_idx, $pair, $self_coord );
my $lr = $hash->{ uc($id) };
......@@ -295,6 +300,7 @@ sub map_coordinates {
$mid_idx = ( $start_idx + $end_idx ) >> 1;
$pair = $lr->[$mid_idx];
$self_coord = $pair->{$from};
if ( $self_coord->{'end'} < $start ) {
$start_idx = $mid_idx;
} else {
......@@ -310,6 +316,7 @@ sub map_coordinates {
my $self_coord = $pair->{$from};
my $target_coord = $pair->{$to};
#
# But not the case for haplotypes!! need to test for this case???
# so removing this till a better solution is found
......@@ -341,14 +348,20 @@ sub map_coordinates {
push( @result, $gap );
$start = $gap->{'end'} + 1;
}
my ( $target_start, $target_end, $target_ori );
my ( $target_start, $target_end);
my ( $ori_start, $ori_end);
my $res;
if ( exists $pair->{'indel'} ) {
# When next pair is an IndelPair and not a Coordinate, create the
# new mapping Coordinate, the IndelCoordinate.
$target_start = $target_coord->{'start'};
$target_end = $target_coord->{'end'};
#original coordinates
$ori_start = $self_coord->{'start'};
$ori_end = $self_coord->{'end'};
#create a Gap object
my $gap = Bio::EnsEMBL::Mapper::Gap->new( $start,
( $self_coord->{'end'} < $end ? $self_coord->{'end'} : $end ) );
......@@ -358,15 +371,15 @@ sub map_coordinates {
$target_start, $target_end, $pair->{'ori'}*$strand, $cs );
#and finally, the IndelCoordinate object with
$res = Bio::EnsEMBL::Mapper::IndelCoordinate->new( $gap, $coord );
} else {
# start is somewhere inside the region
if ( $pair->{'ori'} == 1 ) {
$target_start = $target_coord->{'start'} + ( $start - $self_coord->{'start'} );
} else {
$target_end =
$target_coord->{'end'} - ( $start - $self_coord->{'start'} );
$target_end = $target_coord->{'end'} - ( $start - $self_coord->{'start'} );
}
# Either we are enveloping this map or not. If yes, then end
# point (self perspective) is determined solely by target. If
# not we need to adjust.
......@@ -385,16 +398,28 @@ sub map_coordinates {
$target_coord->{'start'} +
( $end - $self_coord->{'start'} );
} else {
$target_start =
$target_coord->{'end'} - ( $end - $self_coord->{'start'} );
}
}
$res = Bio::EnsEMBL::Mapper::Coordinate->new( $target_coord->{'id'},
$target_start, $target_end, $pair->{'ori'}*$strand,$cs, $rank );
#save the ori coordinate info
$ori_start = ($start - $cdna_coding_start) + 1;
$ori_end = $ori_start + ($target_end - $target_start);
} ## end else [ if ( exists $pair->{'indel'...})]
push( @result, $res );
my $res_ori = Bio::EnsEMBL::Mapper::Coordinate->new( $self_coord->{'id'},
$ori_start, $ori_end, $pair->{'ori'}*$strand,$cs, $rank);
push(@paired_result, { 'original' => $res_ori, 'mapped' => $res });
$last_used_pair = $pair;
$start = $self_coord->{'end'} + 1;
......@@ -404,6 +429,8 @@ sub map_coordinates {
if ( !defined $last_used_pair ) {
my $gap = Bio::EnsEMBL::Mapper::Gap->new( $start, $end );
push( @result, $gap );
push(@paired_result, { 'original' => $gap, 'mapped' => $gap });
} elsif ( $last_used_pair->{$from}->{'end'} < $end ) {
# gap at the end
......@@ -411,13 +438,21 @@ sub map_coordinates {
$last_used_pair->{$from}->{'end'} + 1,
$end, $rank );
push( @result, $gap );
push(@paired_result, { 'original' => $gap, 'mapped' => $gap });
}
if ( $strand == -1 ) {
@result = reverse(@result);
@paired_result = reverse(@paired_result);
}
if ($include_original_region){
return @paired_result;
}else{
return @result;
}
return @result;
} ## end sub map_coordinates
......
......@@ -248,15 +248,16 @@ sub _load_mapper {
sub cdna2genomic {
my ($self,$start,$end) = @_;
my ($self, $start, $end, $include_original_region, $cdna_coding_start) = @_;
if( !defined $end ) {
throw("Must call with start/end");
}
$cdna_coding_start = defined $cdna_coding_start ? $cdna_coding_start : 1;
my $mapper = $self->{'exon_coord_mapper'};
return $mapper->map_coordinates( 'cdna', $start, $end, 1, "cdna" );
return $mapper->map_coordinates('cdna', $start, $end, 1, "cdna", $include_original_region, $cdna_coding_start);
}
......@@ -305,6 +306,8 @@ sub genomic2cdna {
start position in cds coords
Arg [2] : int $end
end position in cds coords
Arg [3] boolean (0 or 1) $include_original_region
option to include original input coordinate region mappings in the result
Example : @genomic_coords = $transcript_mapper->cds2genomic(69, 306);
Description: Converts cds coordinates into genomic coordinates. The
coordinates returned are relative to the same slice that the
......@@ -318,7 +321,7 @@ sub genomic2cdna {
=cut
sub cds2genomic {
my ( $self, $start, $end ) = @_;
my ( $self, $start, $end, $include_original_region ) = @_;
if ( !( defined($start) && defined($end) ) ) {
throw("Must call with start and end");
......@@ -327,8 +330,18 @@ sub cds2genomic {
# Move start end into translate cDNA coordinates now.
$start = $start +( $self->{'cdna_coding_start'} - 1 ) ;
$end = $end + ( $self->{'cdna_coding_start'} - 1 );
return $self->cdna2genomic( $start, $end );
#Check if the start exceeds the cdna_coding_end, if yes, return
if($start > $self->{'cdna_coding_end'}){
return undef;
}
#Check if the end exceeds the cdna_coding_end, if yes, truncate it otherwise we will be including gaps
if($end > $self->{'cdna_coding_end'}){
$end = $self->{'cdna_coding_end'};
}
return $self->cdna2genomic( $start, $end, $include_original_region, $self->{'cdna_coding_start'} );
}
=head2 pep2genomic
......
......@@ -373,4 +373,269 @@ chr1 625359 1214016 1216330 1 2315 1
#
sub isgap { my ($obj) = @_; return !$obj->can ('strand') }
# Test map_coordinates routine with optional boolean argument to include original region coordinates or not in the returned result array
#=============For CDNA reverse
#Chromosome 20: 32,192,503-32,207,791 reverse strand (ENST00000246229)
# We know already what to expect of the mappings, so check if we are getting the right mappings back
# Note that the last mappings will be truncated based on user query
my @coords_mapped = (32207641,32207791,32201919,32202292,32197208,32197682);
my @coords_ori = (1,151,152,525,526,1000);
#TranscriptMapper while initializing loads the Mapper Pairs and is populated with from and to mappings
my $trmapper_reverse_strand = bless( {
'to_cs' => undef,
'_is_sorted' => 1,
'pair_count' => 3,
'from_cs' => undef,
'_pair_cdna' => {
'CDNA' => [
bless( {
'ori' => -1,
'to' => bless( {
'id' => 'genome',
'end' => 32207791,
'start' => 32207641
}, 'Bio::EnsEMBL::Mapper::Unit' ),
'from' => bless( {
'id' => 'cdna',
'end' => 151,
'start' => 1
}, 'Bio::EnsEMBL::Mapper::Unit' )
}, 'Bio::EnsEMBL::Mapper::Pair' ),
bless( {
'ori' => -1,
'to' => bless( {
'id' => 'genome',
'end' => 32202292,
'start' => 32201919
}, 'Bio::EnsEMBL::Mapper::Unit' ),
'from' => bless( {
'id' => 'cdna',
'end' => 525,
'start' => 152
}, 'Bio::EnsEMBL::Mapper::Unit' )
}, 'Bio::EnsEMBL::Mapper::Pair' ),
bless( {
'ori' => -1,
'to' => bless( {
'id' => 'genome',
'end' => 32197682,
'start' => 32192503
}, 'Bio::EnsEMBL::Mapper::Unit' ),
'from' => bless( {
'id' => 'cdna',
'end' => 5705,
'start' => 526
}, 'Bio::EnsEMBL::Mapper::Unit' )
}, 'Bio::EnsEMBL::Mapper::Pair' )
]
},
'to' => 'genomic',
'from' => 'cdna'
}, 'Bio::EnsEMBL::Mapper' );
my $pair_cdna = $trmapper_reverse_strand->{'_pair_cdna'};
ok(defined($pair_cdna), "_pair_cdna defined");
ok(defined($pair_cdna->{ 'CDNA' }), "CDNA defined" );
# Check if the difference in base count between 'to' start and end and 'from' start and end is equal
my $to_total = 0;
my $from_total = 0;
foreach my $mapper_pair(@{$pair_cdna->{ 'CDNA' }}){
my $to_start = $mapper_pair->{'to'}->{'start'};
my $to_end = $mapper_pair->{'to'}->{'end'};
my $to_diff = $to_end - $to_start + 1;
$to_total += $to_diff;
my $from_start = $mapper_pair->{'from'}->{'start'};
my $from_end = $mapper_pair->{'from'}->{'end'};
my $from_diff = $from_end - $from_start + 1;
$from_total += $from_diff;
ok($to_diff == $from_diff , "to (genome) and from (cdna) diff is equal ");
}
# Check if the total base count between to and from is equal
ok($to_total == $from_total, "Base counts between to and from mapper units are equal");
#id, start, end, strand, type, include_original_region (0)
my @mappings = $trmapper_reverse_strand->map_coordinates( "cdna", 1, 1000, 1, "cdna", 0 );
is(3, scalar(@mappings), "Got back 3 regions mapped");
#Test mappings without including the original for reverse strand
test_mappings(\@mappings, \@coords_mapped);
#id, start, end, strand, type, include_original_region (1)
my @mappings_include_original = $trmapper_reverse_strand->map_coordinates( "cdna", 1, 1000, 1, "cdna", 1, 1);
is(3, scalar(@mappings_include_original), "Got back 3 regions mapped");
my $mapper_coordinates = $mappings_include_original[0];
isnt($mapper_coordinates, "Bio::EnsEMBL::Mapper::Coordinate", "Not a Bio::EnsEMBL::Mapper::Coordinate");
isa_ok($mapper_coordinates, "HASH", "Got a HASH ref");
ok(exists $mapper_coordinates->{'original'}, "original mappings exists");
ok(exists $mapper_coordinates->{'mapped'}, "mapped mappings exists");
#Test mappings including the originals for forward strand
test_mappings_include_ori(\@mappings_include_original, \@coords_mapped, \@coords_ori);
#Test for CDNA forward strand
#Chromosome 8: 18,210,093-18,223,689 forward strand. (ENST00000307719)
my @coords_mapped_forward = (18210093,18210180,18219411,18219489,18222042,18222874);
my @coords_ori_forward = (1,88,89,167,168,1000);
my $trmapper_positive_strand = bless( {
'to_cs' => undef,
'_is_sorted' => 1,
'pair_count' => 3,
'from_cs' => undef,
'_pair_cdna' => {
'CDNA' => [
bless( {
'ori' => 1,
'to' => bless( {
'id' => 'genome',
'end' => 18210180,
'start' => 18210093
}, 'Bio::EnsEMBL::Mapper::Unit' ),
'from' => bless( {
'id' => 'cdna',
'end' => 88,
'start' => 1
}, 'Bio::EnsEMBL::Mapper::Unit' )
}, 'Bio::EnsEMBL::Mapper::Pair' ),
bless( {
'ori' => 1,
'to' => bless( {
'id' => 'genome',
'end' => 18219489,
'start' => 18219411
}, 'Bio::EnsEMBL::Mapper::Unit' ),
'from' => bless( {
'id' => 'cdna',
'end' => 167,
'start' => 89
}, 'Bio::EnsEMBL::Mapper::Unit' )
}, 'Bio::EnsEMBL::Mapper::Pair' ),
bless( {
'ori' => 1,
'to' => bless( {
'id' => 'genome',
'end' => 18223689,
'start' => 18222042
}, 'Bio::EnsEMBL::Mapper::Unit' ),
'from' => bless( {
'id' => 'cdna',
'end' => 1815,
'start' => 168
}, 'Bio::EnsEMBL::Mapper::Unit' )
}, 'Bio::EnsEMBL::Mapper::Pair' )
]
},
'to' => 'genomic',
'from' => 'cdna'
}, 'Bio::EnsEMBL::Mapper' );
$pair_cdna = $trmapper_positive_strand->{'_pair_cdna'};
ok(defined($pair_cdna), "_pair_cdna defined");
ok(defined($pair_cdna->{ 'CDNA' }), "CDNA defined" );
#id, start, end, strand, type, include_original_region, cdna start
@mappings = $trmapper_positive_strand->map_coordinates( "cdna", 1, 1000, 1, "cdna", 0 , 1);
is(3, scalar(@mappings), "Got back 3 regions mapped");
#Test mappings without including the original for forward strand
test_mappings(\@mappings, \@coords_mapped_forward);
@mappings_include_original = $trmapper_positive_strand->map_coordinates( "cdna", 1, 1000, 1, "cdna", 1 , 1);
is(3, scalar(@mappings_include_original), "Got back 3 regions mapped");
$mapper_coordinates = $mappings_include_original[0];
isnt($mapper_coordinates, "Bio::EnsEMBL::Mapper::Coordinate", "Not a Bio::EnsEMBL::Mapper::Coordinate");
isa_ok($mapper_coordinates, "HASH", "Got a HASH ref");
ok(exists $mapper_coordinates->{'original'}, "original mappings exists");
ok(exists $mapper_coordinates->{'mapped'}, "mapped mappings exists");
test_mappings_include_ori(\@mappings_include_original, \@coords_mapped_forward, \@coords_ori_forward);
#Tests for CDS reverse
#Chromosome 20: 32,192,503-32,207,791 reverse strand (ENST00000246229)
# We know already what to expect of the mappings, so check if we are getting the right mappings back
# Note that the last mappings should be truncated based on user query
# cdna coding start => 266, cdna coding end => 1756
my @coords_mapped_cds = (32201919,32202178,32196943,32197682);
my @coords_ori_cds = (1,260,261,1000);
#id, start, end, strand, type, include_original_region
@mappings = $trmapper_reverse_strand->map_coordinates( "cdna", 266, 1265, 1, "cdna", 0 );
is(2, scalar(@mappings), "Got back 2 regions mapped");
#Test mappings without including the original for reverse strand
#test_mappings(\@mappings, \@coords_mapped_cds);
@mappings_include_original = $trmapper_reverse_strand->map_coordinates( "cdna", 266, 1265, 1, "cdna", 1 );
#test_mappings_include_ori(\@mappings_include_original, \@coords_mapped_cds, \@coords_ori_cds);
#Tests for CDS forward
#Check for forward strand
#Chromosome 8: 18,210,093-18,223,689 forward strand. (ENST00000307719)
my @coords_mapped_forward_cds = (18222048,18222920);
my @coords_ori_forward_cds = (1,873);
#id, start, end, strand, type, include_original_region
@mappings = $trmapper_positive_strand->map_coordinates( "cdna", 174, 1046, 1, "cdna", 0, 174 );
is(1, scalar(@mappings), "Got back 1 regions mapped");
#Test mappings without including the original for forward strand
test_mappings(\@mappings, \@coords_mapped_forward_cds);
@mappings_include_original = $trmapper_positive_strand->map_coordinates( "cdna", 174, 1046, 1, "cdna", 1, 174 );
test_mappings_include_ori(\@mappings_include_original, \@coords_mapped_forward_cds, \@coords_ori_forward_cds);
#utility routines to check the expected and the actual returned mapping coordinates are the same
sub test_mappings{
my($mappings, $coords_mapped) = @_;
my $i=0;
foreach my $mapping(@$mappings){
isa_ok($mapping, "Bio::EnsEMBL::Mapper::Coordinate", "Got back Bio::EnsEMBL::Mapper::Coordinate");
ok(${$coords_mapped}[$i] == $mapping->{'start'}, "${$coords_mapped}[$i] == $mapping->{'start'} => Expected and Actual mappings for start ok");
ok(${$coords_mapped}[$i+1] == $mapping->{'end'}, "${$coords_mapped}[$i+1] == $mapping->{'end'} => Expected and Actual mappings for end ok");
$i += 2;
}
}
#utility routines to check the actual returned mapping coordinates are the same with original coordinate mappings included
sub test_mappings_include_ori{
my($mappings, $coords_mapped, $coords_ori) = @_;
#Test mapped
my $i=0;
foreach my $mapping(@$mappings){
my $mapped_unit = $mapping->{'mapped'};
isa_ok($mapped_unit, "Bio::EnsEMBL::Mapper::Coordinate", "Got back Bio::EnsEMBL::Mapper::Coordinate");
ok(${$coords_mapped}[$i] == $mapped_unit->{'start'}, "${$coords_mapped}[$i] == $mapped_unit->{'start'} => Expected and Actual mappings for start ok");
ok(${$coords_mapped}[$i+1] == $mapped_unit->{'end'}, "${$coords_mapped}[$i+1] == $mapped_unit->{'end'} => Expected and Actual mappings for end ok");
$i += 2;
}
#Test original
$i=0;
foreach my $mapping(@$mappings){
my $mapped_unit = $mapping->{'original'};
isa_ok($mapped_unit, "Bio::EnsEMBL::Mapper::Coordinate", "Got back Bio::EnsEMBL::Mapper::Coordinate");
ok(${$coords_ori}[$i] == $mapped_unit->{'start'}, "${$coords_ori}[$i] == $mapped_unit->{'start'} => Expected and Actual ori for start ok");
ok(${$coords_ori}[$i+1] == $mapped_unit->{'end'}, "${$coords_ori}[$i+1] == $mapped_unit->{'end'} => Expected and Actual ori for end ok");
$i += 2;
}
}
done_testing();
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment