diff --git a/modules/Bio/EnsEMBL/EMBLLOAD/Clone.pm b/modules/Bio/EnsEMBL/EMBLLOAD/Clone.pm index 4cf3d2a6be381ad9a8f5fed1993f4b068f20f521..890e8d68014b118ad86ab10d59ee726120a048a7 100755 --- a/modules/Bio/EnsEMBL/EMBLLOAD/Clone.pm +++ b/modules/Bio/EnsEMBL/EMBLLOAD/Clone.pm @@ -49,6 +49,8 @@ use Bio::Root::RootI; use vars qw(@ISA); use strict; use Bio::EnsEMBL::EMBLLOAD::Contig; +use Bio::EnsEMBL::DB::CloneI; + @ISA = qw(Bio::Root::RootI Bio::EnsEMBL::DB::CloneI); use Bio::EnsEMBL::Translation; diff --git a/modules/Bio/EnsEMBL/Mapper.pm b/modules/Bio/EnsEMBL/Mapper.pm new file mode 100644 index 0000000000000000000000000000000000000000..d845a9d42f89669ff99a88788b947f7e6f2dddf6 --- /dev/null +++ b/modules/Bio/EnsEMBL/Mapper.pm @@ -0,0 +1,410 @@ + + +# +# Ensembl module for Bio::EnsEMBL::Mapper +# +# Cared for by Ewan Birney <birney@ebi.ac.uk> +# +# Copyright Ewan Birney +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::EnsEMBL::Mapper - DESCRIPTION of Object + +=head1 SYNOPSIS + + + $map->add_map_coordinates( $contig_start, $contig_end, $contig_id, + $chr_start, $chr_end, $chr_name, $contig_ori ); + + + my @coordlist = $mapper->map_coordinates(2,5,-1,627012,"rawcontig"); + +Give standard usage here + +=head1 DESCRIPTION + +Describe the object here + +=head1 AUTHOR - Ewan Birney + +This modules is part of the Ensembl project http://www.ensembl.org + +Email birney@ebi.ac.uk + +Describe contact details here + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::EnsEMBL::Mapper; +use vars qw(@ISA); +use strict; + +# Object preamble - inherits from Bio::Root::RootI + +use Bio::Root::RootI; +use Bio::EnsEMBL::Mapper::Pair; +use Bio::EnsEMBL::Mapper::Unit; +use Bio::EnsEMBL::Mapper::Coordinate; +use Bio::EnsEMBL::Mapper::Gap; + +@ISA = qw(Bio::Root::RootI); + +# new() is written here + +sub new { + my($class,@args) = @_; + + my $to = shift @args; + my $from = shift @args; + + my $self = {}; + bless $self,$class; + + $self->{'_pair_hash_to'} = {}; + $self->{'_pair_hash_from'} = {}; + + $self->to($to); + $self->from($from); + +# set stuff in self from @args + return $self; +} + + +=head2 map_coordinates + + Title : map_coordinates + Usage : + Function: + Example : + Returns : + Args : + + +=cut + +sub map_coordinates{ + my ($self,$start,$end,$strand,$id,$type) = @_; + + if( !defined $type ) { + $self->throw("Must start,end,strand,id,type as coordinates"); + } + + my $self_func; + my $target_func; + my $hash; + + if( $type eq $self->to ) { + $self_func = \&Bio::EnsEMBL::Mapper::Pair::to; + $target_func = \&Bio::EnsEMBL::Mapper::Pair::from; + $hash = $self->{'_pair_hash_to'}; + } elsif ( $type eq $self->from ) { + $self_func = \&Bio::EnsEMBL::Mapper::Pair::from; + $target_func = \&Bio::EnsEMBL::Mapper::Pair::to; + $hash = $self->{'_pair_hash_from'}; + } else { + $self->throw("Type $type is neither to or from coordinate systems"); + } + + if( $self->_is_frozen == 0 ) { + $self->_freeze(); + } + + if( !defined $hash->{$id} ) { + # one big gap! + my $gap = Bio::EnsEMBL::Mapper::Gap->new(); + $gap->start($start); + $gap->end($end); + return $gap; + } + + my $last_used_pair; + my @result; + + foreach my $pair ( @{$hash->{$id}} ) { + + + + my $self_coord = &$self_func($pair); + my $target_coord = &$target_func($pair); + + # if we haven't even reached the start, move on + if( $self_coord->end < $start ) { + next; + } + + # if we have over run, break + if( $self_coord->start > $end ) { + last; + } + + + + if( $start < $self_coord->start ) { + # gap detected + my $gap = Bio::EnsEMBL::Mapper::Gap->new(); + $gap->start($start); + $gap->end($self_coord->start-1); + push(@result,$gap); + } + + my ($target_start,$target_end,$target_ori); + + # start is somewhere inside the region + if( $pair->ori == 1 ) { + $target_start = $target_coord->start + ($start - $self_coord->start); + } else { + $target_end = $target_coord->end - ($start - $self_coord->start); + } + + # either we are enveloping this map or not. If yes, then end + # point (self perspective) is determined solely by target. If not + # we need to adjust + + if( $end > $self_coord->end ) { + # enveloped + if( $pair->ori == 1 ) { + $target_end = $target_coord->end; + } else { + $target_start = $target_coord->start; + } + } else { + # need to adjust end + if( $pair->ori == 1 ) { + $target_end = $target_coord->start + ($end - $self_coord->start); + } else { + $target_start = $target_coord->end - ($end - $self_coord->start); + } + } + + my $res = Bio::EnsEMBL::Mapper::Coordinate->new(); + $res->start($target_start); + $res->end($target_end); + $res->strand($pair->ori * $strand); + $res->id($target_coord->id); + push(@result,$res); + + $last_used_pair = $pair; + $start = $self_coord->end+1; + } + + + if( !defined $last_used_pair ) { + my $gap = Bio::EnsEMBL::Mapper::Gap->new(); + $gap->start($start); + $gap->end($end); + push(@result,$gap); + + } elsif( &$self_func($last_used_pair)->end < $end ) { + # gap at the end + my $gap = Bio::EnsEMBL::Mapper::Gap->new(); + $gap->start(&$self_func($last_used_pair)->end+1); + $gap->end($end); + push(@result,$gap); + } + + if ( $strand == -1 ) { + @result = reverse ( @result); + } + + return @result; + +} + +=head2 add_map_coordinates + + Title : add_map_coordinates + Usage : + Function: + Example : + Returns : + Args : + + +=cut + +sub add_map_coordinates{ + my ($self,$contig_start,$contig_end,$contig_id,$chr_start,$chr_end,$chr_name,$contig_ori) = @_; + + if( $contig_start !~ /\d+/ || $chr_start !~ /\d+/ ) { + $self->throw("Not doable - $contig_start as start or $chr_start as start?"); + } + + if( ($contig_end - $contig_start) != ($chr_end - $chr_start) ) { + $self->throw("Cannot deal with mis-lengthed mappings so far"); + } + + my $pair = Bio::EnsEMBL::Mapper::Pair->new(); + + my $from = Bio::EnsEMBL::Mapper::Unit->new(); + $from->start($contig_start); + $from->end($contig_end); + $from->id($contig_id); + + my $to = Bio::EnsEMBL::Mapper::Unit->new(); + $to->start($chr_start); + $to->end($chr_end); + $to->id($chr_name); + + $pair->to($to); + $pair->from($from); + + $pair->ori($contig_ori); + + # place into hash on both ids + + if( !defined $self->{'_pair_hash_to'}->{$chr_name} ) { + $self->{'_pair_hash_to'}->{$chr_name} = []; + } + push(@{$self->{'_pair_hash_to'}->{$chr_name}},$pair); + + if( !defined $self->{'_pair_hash_from'}->{$contig_id} ) { + $self->{'_pair_hash_from'}->{$contig_id} = []; + } + push(@{$self->{'_pair_hash_from'}->{$contig_id}},$pair); + + $self->_is_frozen(0); +} + +=head2 to + + Title : to + Usage : $obj->to($newval) + Function: + Example : + Returns : value of to + Args : newvalue (optional) + + +=cut + +sub to{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'to'} = $value; + } + return $self->{'to'}; + +} + +=head2 from + + Title : from + Usage : $obj->from($newval) + Function: + Example : + Returns : value of from + Args : newvalue (optional) + + +=cut + +sub from{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'from'} = $value; + } + return $self->{'from'}; + +} + + +=head2 _dump + + Title : _dump + Usage : + Function: + Example : + Returns : + Args : + + +=cut + +sub _dump{ + my ($self,$fh) = @_; + + if( !defined $fh ) { + $fh = \*STDERR; + } + + foreach my $id ( keys %{$self->{'_pair_hash_from'}} ) { + print $fh "From Hash $id\n"; + foreach my $pair ( @{$self->{'_pair_hash_from'}->{$id}} ) { + print $fh " ",$pair->from->start," ",$pair->from->end,":",$pair->to->start," ",$pair->to->end," ",$pair->to->id,"\n"; + } + } + +} + + +=head2 _freeze + + Title : _freeze + Usage : + Function: + Example : + Returns : + Args : + + +=cut + +sub _freeze{ + my ($self) = @_; + + foreach my $id ( keys %{$self->{'_pair_hash_from'}} ) { + @{$self->{'_pair_hash_from'}->{$id}} = sort { $a->from->start <=> $b->from->start } @{$self->{'_pair_hash_from'}->{$id}}; + } + + foreach my $id ( keys %{$self->{'_pair_hash_to'}} ) { + @{$self->{'_pair_hash_to'}->{$id}} = sort { $a->to->start <=> $b->to->start } @{$self->{'_pair_hash_to'}->{$id}}; + } + + $self->_is_frozen(1); + +} + + + +=head2 _is_frozen + + Title : _is_frozen + Usage : $obj->_is_frozen($newval) + Function: + Example : + Returns : value of _is_frozen + Args : newvalue (optional) + + +=cut + +sub _is_frozen{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'_is_frozen'} = $value; + } + return $self->{'_is_frozen'}; + +} + + + + + + + + diff --git a/modules/Bio/EnsEMBL/Mapper/Coordinate.pm b/modules/Bio/EnsEMBL/Mapper/Coordinate.pm new file mode 100644 index 0000000000000000000000000000000000000000..b2c17900b3e3ce396cd8e0fd1a106f441ced9d5b --- /dev/null +++ b/modules/Bio/EnsEMBL/Mapper/Coordinate.pm @@ -0,0 +1,143 @@ + +# +# Ensembl module for Bio::EnsEMBL::MapperCoordinate +# +# Cared for by Ewan Birney <birney@ebi.ac.uk> +# +# Copyright GRL and EBI +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::EnsEMBL::Mapper::Coordinate - DESCRIPTION of Object + +=head1 SYNOPSIS + +Give standard usage here + +=head1 DESCRIPTION + +Describe the object here + +=head1 CONTACT + +Ensembl - ensembl-dev@ebi.ac.uk + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::EnsEMBL::Mapper::Coordinate; +use vars qw(@ISA); +use strict; + +# Object preamble - inheriets from Bio::Root::RootI + +use Bio::Root::RootI; + + +@ISA = qw(Bio::Root::RootI); + +sub new { + my($class,@args) = @_; + + my $self = {}; + bless $self,$class; + + return $self; +} + +=head2 start + + Title : start + Usage : $obj->start($newval) + Function: + Returns : value of start + Args : newvalue (optional) + + +=cut + +sub start{ + my $obj = shift; + if( @_ ) { + my $value = shift; + $obj->{'start'} = $value; + } + return $obj->{'start'}; + +} + +=head2 end + + Title : end + Usage : $obj->end($newval) + Function: + Returns : value of end + Args : newvalue (optional) + + +=cut + +sub end{ + my $obj = shift; + if( @_ ) { + my $value = shift; + $obj->{'end'} = $value; + } + return $obj->{'end'}; + +} + +=head2 strand + + Title : strand + Usage : $obj->strand($newval) + Function: + Returns : value of strand + Args : newvalue (optional) + + +=cut + +sub strand{ + my $obj = shift; + if( @_ ) { + my $value = shift; + $obj->{'strand'} = $value; + } + return $obj->{'strand'}; + +} + +=head2 id + + Title : id + Usage : $obj->id($newval) + Function: + Example : + Returns : value of id + Args : newvalue (optional) + + +=cut + +sub id{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'id'} = $value; + } + return $self->{'id'}; + +} + +1; diff --git a/modules/Bio/EnsEMBL/Mapper/Gap.pm b/modules/Bio/EnsEMBL/Mapper/Gap.pm new file mode 100644 index 0000000000000000000000000000000000000000..9ee3615e37a5dd65d7b475bd15188ee7e49469a3 --- /dev/null +++ b/modules/Bio/EnsEMBL/Mapper/Gap.pm @@ -0,0 +1,108 @@ + + +# +# Ensembl module for Bio::EnsEMBL::Mapper::Gap +# +# Cared for by Ewan Birney <birney@ebi.ac.uk> +# +# Copyright Ewan Birney +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::EnsEMBL::Mapper::Gap - DESCRIPTION of Object + +=head1 SYNOPSIS + +Give standard usage here + +=head1 DESCRIPTION + +Describe the object here + +=head1 AUTHOR - Ewan Birney + +This modules is part of the Ensembl project http://www.ensembl.org + +Email birney@ebi.ac.uk + +Describe contact details here + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::EnsEMBL::Mapper::Gap; +use vars qw(@ISA); +use strict; + +# Object preamble - inherits from Bio::Root::RootI + +use Bio::Root::RootI; + +@ISA = qw(Bio::Root::RootI); + +# new() is written here + +sub new { + my($class,@args) = @_; + + my $self = {}; + bless $self,$class; + +# set stuff in self from @args + return $self; +} + +=head2 start + + Title : start + Usage : $obj->start($newval) + Function: + Example : + Returns : value of start + Args : newvalue (optional) + + +=cut + +sub start{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'start'} = $value; + } + return $self->{'start'}; + +} + +=head2 end + + Title : end + Usage : $obj->end($newval) + Function: + Example : + Returns : value of end + Args : newvalue (optional) + + +=cut + +sub end{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'end'} = $value; + } + return $self->{'end'}; + +} + +1; diff --git a/modules/Bio/EnsEMBL/Mapper/Pair.pm b/modules/Bio/EnsEMBL/Mapper/Pair.pm new file mode 100644 index 0000000000000000000000000000000000000000..10e6da10a27300fece42a72e9317034e8d97c7a4 --- /dev/null +++ b/modules/Bio/EnsEMBL/Mapper/Pair.pm @@ -0,0 +1,128 @@ + +# +# Ensembl module for Bio::EnsEMBL::Mapper::Pair +# +# Cared for by Ewan Birney <birney@ebi.ac.uk> +# +# Copyright Ewan Birney +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::EnsEMBL::Mapper::Pair - DESCRIPTION of Object + +=head1 SYNOPSIS + +Give standard usage here + +=head1 DESCRIPTION + +Describe the object here + +=head1 AUTHOR - Ewan Birney + +This modules is part of the Ensembl project http://www.ensembl.org + +Email birney@ebi.ac.uk + +Describe contact details here + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::EnsEMBL::Mapper::Pair; +use vars qw(@ISA); +use strict; + +# Object preamble - inherits from Bio::Root::RootI + +use Bio::Root::RootI; + +@ISA = qw(Bio::Root::RootI); + +# new() is written here + +sub new { + my($class,@args) = @_; + + my $self = {}; + bless $self,$class; + +# set stuff in self from @args + return $self; +} + +=head2 to + + Title : to + Usage : $obj->to($newval) + Function: + Example : + Returns : value of to + Args : newvalue (optional) + + +=cut + +sub to{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'to'} = $value; + } + return $self->{'to'}; + +} + +=head2 from + + Title : from + Usage : $obj->from($newval) + Function: + Example : + Returns : value of from + Args : newvalue (optional) + + +=cut + +sub from{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'from'} = $value; + } + return $self->{'from'}; + +} + +=head2 ori + + Title : ori + Usage : $obj->ori($newval) + Function: + Example : + Returns : value of ori + Args : newvalue (optional) + + +=cut + +sub ori{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'ori'} = $value; + } + return $self->{'ori'}; + +} + +1; diff --git a/modules/Bio/EnsEMBL/Mapper/Unit.pm b/modules/Bio/EnsEMBL/Mapper/Unit.pm new file mode 100644 index 0000000000000000000000000000000000000000..754d0f113773d02a5d67cba313a0b35dfe76142b --- /dev/null +++ b/modules/Bio/EnsEMBL/Mapper/Unit.pm @@ -0,0 +1,129 @@ + +# +# Ensembl module for Bio::EnsEMBL::Mapper::Unit +# +# Cared for by Ewan Birney <birney@ebi.ac.uk> +# +# Copyright Ewan Birney +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::EnsEMBL::Mapper::Unit - One side of a map pair + +=head1 SYNOPSIS + +Give standard usage here + +=head1 DESCRIPTION + +Describe the object here + +=head1 AUTHOR - Ewan Birney + +This modules is part of the Ensembl project http://www.ensembl.org + +Email birney@ebi.ac.uk + +Describe contact details here + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::EnsEMBL::Mapper::Unit; +use vars qw(@ISA); +use strict; + +# Object preamble - inherits from Bio::Root::RootI + +use Bio::Root::RootI; + +@ISA = qw(Bio::Root::RootI); + +# new() is written here + +sub new { + my($class,@args) = @_; + + my $self = {}; + bless $self,$class; + +# set stuff in self from @args + return $self; +} + +=head2 id + + Title : id + Usage : $obj->id($newval) + Function: + Example : + Returns : value of id + Args : newvalue (optional) + + +=cut + +sub id{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'id'} = $value; + } + return $self->{'id'}; + +} + +=head2 start + + Title : start + Usage : $obj->start($newval) + Function: + Example : + Returns : value of start + Args : newvalue (optional) + + +=cut + +sub start{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'start'} = $value; + } + return $self->{'start'}; + +} + +=head2 end + + Title : end + Usage : $obj->end($newval) + Function: + Example : + Returns : value of end + Args : newvalue (optional) + + +=cut + +sub end{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'end'} = $value; + } + return $self->{'end'}; + +} + + +1; diff --git a/modules/t/mapper.t b/modules/t/mapper.t index e96e32876dc17dd8179a5e1a5cb155251ab0f76c..5ee3bb02bfd521b3500cdfeb749767b0aa7a93a7 100644 --- a/modules/t/mapper.t +++ b/modules/t/mapper.t @@ -21,7 +21,7 @@ ## We start with some black magic to print on failure. -BEGIN { $| = 1; print "1..4\n"; +BEGIN { $| = 1; print "1..6\n"; use vars qw($loaded); } END {print "not ok 1\n" unless $loaded;} @@ -30,13 +30,16 @@ use lib 't'; $loaded = 1; print "ok 1\n"; # 1st test passes. - + +use Bio::EnsEMBL::Mapper; # testing the Bio::EnsEMBL::Mapper module -$map = Bio::EnsEMBL::Mapper->new( "virtualcontig", "rawcontig" ); -load_sgp_dump( $map ); +$mapper = Bio::EnsEMBL::Mapper->new( "virtualcontig", "rawcontig" ); +load_sgp_dump( $mapper ); +$mapper->_freeze; +#$mapper->_dump; # loading done successfully print "ok 2\n"; @@ -50,37 +53,45 @@ if( @coordlist == 1 ) { print "not ok 3\n"; } -if( $coordlist[0]->{'start'} == 2 && - $coordlist[0]->{'end' } == 5 && - $coordlist[0]->{'strand' } == -1 ) { +if( $coordlist[0]->start == 2 && + $coordlist[0]->end == 5 && + $coordlist[0]->strand == -1 ) { print "ok 4\n"; } else { print "not ok 4\n"; - print STDERR %$coordlist[0]; } # now a split coord -@coordlist = $map->map_coordinates( 383700, 444000, 1, "chr1", "virtualcontig" ); +@coordlist = $mapper->map_coordinates( 383700, 444000, 1, "chr1", "virtualcontig" ); # should be ( 56072, 56092, -1, 314696 ) # (about:-) ( 126, 59773, -1, 341 ) # ( 5332, 5963, -1, 315843) -if( @coordlist = 3 ) { +if( scalar(@coordlist) == 3 ) { print "ok 5\n"; } else { print "not ok 5\n"; } +if( $coordlist[0]->start == 31917 && + $coordlist[0]->end == 31937 && + $coordlist[0]->id == 314696 && + $coordlist[0]->strand == -1 ) { + print "ok 6\n"; +} else { + print "not ok 6\n"; +} + sub load_sgp_dump { my $map = shift; +#chr_name raw_id chr_start chr_end raw_start raw_end raw_ori my @sgp_dump = split ( /\n/, qq { -chr_name raw_id chr_start chr_end raw_start raw_end raw_ori chr1 627012 1 31276 1 31276 1 chr1 627010 31377 42949 72250 83822 -1 chr1 2768 42950 180950 251 138251 1 @@ -198,3 +209,4 @@ chr1 625359 1214016 1216330 1 2315 1 } +