Commit ea807e30 authored by Graham McVicker's avatar Graham McVicker
Browse files

Changes so negative strand slices will work

parent 05a408ed
......@@ -429,10 +429,7 @@ sub _parse_features {
my $query_unit = $self->_query_unit();
my $hit_unit = $self->_hit_unit();
my $verbose = 0;
if($features->[0]->hseqname eq 'CE25688'){
$verbose = 1;
}
if (ref($features) ne "ARRAY") {
$self->throw("features must be an array reference not a [" .
ref($features) . "]");
......@@ -450,10 +447,7 @@ sub _parse_features {
} else {
@f = sort { $b->start <=> $a->start} @$features;
}
foreach my $f (@f) {
#print STDERR "SOMETHING INFORMATIVE " . $f->gffstring . "\n";
}
#print STDERR "\n\n";
#print STDERR $f[0]->gffstring."\n";
my $hstrand = $f[0]->hstrand;
my $name = $f[0]->seqname;
......@@ -510,7 +504,6 @@ sub _parse_features {
#
# Sanity checks
#
#print STDERR "supporting feature to convert ".$f->gffstring."\n" if($verbose);
if (!$f->isa("Bio::EnsEMBL::FeaturePair")) {
$self->throw("Array element [$f] is not a Bio::EnsEMBL::FeaturePair");
}
......@@ -543,7 +536,7 @@ sub _parse_features {
my $start1 = $f->start; #source sequence alignment start
my $start2 = $f->hstart(); #hit sequence alignment start
# print STDERR "start1 ".$start1." start2 ".$start2."\n" if($verbose);
#
# More sanity checking
#
......@@ -566,7 +559,7 @@ sub _parse_features {
my $length = ($f->end - $f->start + 1); #length of source seq alignment
my $hlength = ($f->hend - $f->hstart + 1); #length of hit seq alignment
# print STDERR "length ".$length." hlength ".$hlength."\n" if($verbose);
# using multiplication to avoid rounding errors, hence the
# switch from query to hit for the ratios
......@@ -596,7 +589,7 @@ sub _parse_features {
}
my $hlengthfactor = ($query_unit/$hit_unit);
# print STDERR "hlengthfactor ".$hlengthfactor."\n" if($verbose);
# if( $query_unit == 1 && $hit_unit == 3 ) {
# $hlengthfactor = (1/3);
# }
......@@ -614,7 +607,6 @@ sub _parse_features {
my $insertion_flag = 0;
if( $strand == 1 ) {
if( ( defined $prev1 ) && ( $f->start > $prev1 + 1 )) {
#print STDERR "there is an insertion\n" if($verbose);
#there is an insertion
$insertion_flag = 1;
my $gap = $f->start - $prev1 - 1;
......@@ -622,7 +614,6 @@ sub _parse_features {
$gap = ""; # no need for a number if gap length is 1
}
$string .= "$gap"."I";
#print STDERR "cigar stands at ".$string."\n" if($verbose);
}
#shift our position in the source seq alignment
......@@ -638,7 +629,6 @@ sub _parse_features {
$gap = ""; # no need for a number if gap length is 1
}
$string .= "$gap"."I";
#print STDERR "cigar stands at ".$string."\n" if($verbose);
}
#shift our position in the source seq alignment
......@@ -652,7 +642,6 @@ sub _parse_features {
#
if( $hstrand == 1 ) {
if(( defined $prev2 ) && ( $f->hstart() > $prev2 + 1 )) {
#print STDERR "there is a deletion\n" if($verbose);
#there is a deletion
my $gap = $f->hstart - $prev2 - 1;
my $gap2 = int( $gap * $hlengthfactor + 0.05 );
......@@ -661,7 +650,7 @@ sub _parse_features {
$gap2 = ""; # no need for a number if gap length is 1
}
$string .= "$gap2"."D";
#print STDERR "cigar stands at ".$string."\n" if($verbose);
#sanity check, Should not be an insertion and deletion
if($insertion_flag) {
$self->warn("Should not be an deletion and insertion on the " .
......@@ -673,7 +662,6 @@ sub _parse_features {
} else {
if( ( defined $prev2 ) && ( $f->hend() + 1 < $prev2 )) {
#print STDERR "there is a deletion\n" if($verbose);
#there is a deletion
my $gap = $prev2 - $f->hend - 1;
my $gap2 = int( $gap * $hlengthfactor + 0.05 );
......@@ -682,7 +670,6 @@ sub _parse_features {
$gap2 = ""; # no need for a number if gap length is 1
}
$string .= "$gap2"."D";
#print STDERR "cigar stands at ".$string."\n" if($verbose);
#sanity check, Should not be an insertion and deletion
if($insertion_flag) {
$self->throw("Should not be an deletion and insertion on the " .
......@@ -700,7 +687,7 @@ sub _parse_features {
$matchlength = "";
}
$string .= $matchlength."M";
# print STDERR "cigar stands at ".$string."\n" if($verbose);
#print STDERR "finished with this feature\n\n";
}
# print STDERR "creating align feature start ".$f1start." end ".$f1end." strand ".$strand." score ".$score." percent id ".$percent." pvalue ".$pvalue." seqname ".$name." phase ".$phase." analysis ".$analysis." hstart ".$f2start," hend ".$f2end." hstrand ".$hstrand." hid ".$hname."\n";
......@@ -737,9 +724,6 @@ sub _parse_features {
$self->feature1($feature1);
$self->feature2($feature2);
$self->cigar_string($string);
#print STDERR "finished ".$self->gffstring."\t ".$self->cigar_string."\n\n" if($verbose);
#print STDERR "\n\n";
}
......@@ -749,14 +733,13 @@ sub _parse_features {
sub _transform_to_RawContig{
my ($self) = @_;
#print STDERR "transforming to raw contig coord\n\n";
if(!$self->contig){
$self->throw("can't transform coordinates of ".$self." without some sort of contig defined");
$self->throw("can't transform coordinates of " . $self .
" without some sort of contig defined");
}
#my $mapper = $self->contig->adaptor->db->get_AssemblyMapperAdaptor->fetch_by_type( $self->contig()->assembly_type() );
#print STDERR "transforming align feature to rawcontig \n\n";
my $rcAdaptor = $self->contig->adaptor()->db()->get_RawContigAdaptor();
#my $global_start = $self->contig->chr_start();
my @out;
my @features = $self->_parse_cigar();
......@@ -832,28 +815,42 @@ sub _query_unit {
sub _transform_feature_to_RawContig{
my($self, $feature) = @_;
my $verbose = 0;
#$verbose = 1 if($feature->hseqname eq 'CE25688');
#print STDERR "transforming ".$feature->gffstring."\n" if($verbose);
my $slice = $self->contig;
if(!$self->contig){
$self->throw("can't transform coordinates of ".$self." without some sort of contig defined");
unless($slice){
$self->throw("can't transform coordinates of $self without some " .
"sort of contig defined");
}
my $mapper = $self->contig->adaptor->db->get_AssemblyMapperAdaptor->fetch_by_type( $self->contig()->assembly_type() );
my $asma = $slice->adaptor->db->get_AssemblyMapperAdaptor;
my $mapper = $asma->fetch_by_type( $slice->assembly_type );
my $rcAdaptor = $self->contig->adaptor()->db()->get_RawContigAdaptor();
my $global_start = $self->contig->chr_start();
my $rcAdaptor = $slice->adaptor()->db()->get_RawContigAdaptor();
my @out;
#first convert the feature's slice coords to assembly coords
my ($start, $end, $strand);
if($slice->strand == 1) {
$start = $slice->chr_start + $feature->start - 1;
$end = $slice->chr_start + $feature->end - 1;
$strand = $feature->strand;
} else {
$start = $slice->chr_end - $feature->end + 1;
$end = $slice->chr_end - $feature->start + 1;
$strand = $feature->strand * -1;
}
#now convert the feature's assembly coords to raw contig coords
my @mapped = $mapper->map_coordinates_to_rawcontig
(
$self->contig()->chr_name(),
$feature->start()+$global_start-1,
$feature->end()+$global_start-1,
$feature->strand()*$self->contig()->strand()
$slice->chr_name(),
$start,
$end,
$strand
);
if( ! @mapped ) {
unless( @mapped ) {
$self->throw( "couldn't map ".$self."\n" );
return $self;
}
......@@ -899,10 +896,8 @@ sub _transform_feature_to_RawContig{
}
#print "hit start ".$hit_start." hit end ".$hit_end."\n";
my $rawContig = $rcAdaptor->fetch_by_dbID( $mapped[$i]->id() );
my $f1 = new Bio::EnsEMBL::SeqFeature();
my $f2 = new Bio::EnsEMBL::SeqFeature();
my $new_feature = Bio::EnsEMBL::FeaturePair->new(-feature1=>$f1,
-feature2=>$f2);
my $new_feature = Bio::EnsEMBL::FeaturePair->new();
$new_feature->start($mapped[$i]->start);
$new_feature->end($mapped[$i]->end);
......@@ -911,11 +906,12 @@ sub _transform_feature_to_RawContig{
$new_feature->score($feature->score);
$new_feature->percent_id($feature->percent_id);
$new_feature->p_value($feature->p_value);
$new_feature->hstart($hit_start);
$new_feature->hend($hit_end);
$new_feature->hstrand($feature->hstrand);
$new_feature->hseqname($feature->hseqname);
#$new_feature->hscore($feature->score);
$new_feature->analysis($feature->analysis);
$new_feature->contig($rawContig);
#print STDERR "FEATURE: ",join( " ", ( $new_feature->start(), $new_feature->end(), $new_feature->seqname,
......@@ -934,10 +930,8 @@ sub _transform_feature_to_RawContig{
return;
}
my $rawContig = $rcAdaptor->fetch_by_dbID( $mapped[0]->id() );
my $f1 = new Bio::EnsEMBL::SeqFeature();
my $f2 = new Bio::EnsEMBL::SeqFeature();
my $new_feature = Bio::EnsEMBL::FeaturePair->new(-feature1=>$f1,
-feature2=>$f2);
my $new_feature = Bio::EnsEMBL::FeaturePair->new();
$new_feature->start($mapped[0]->start);
$new_feature->end($mapped[0]->end);
$new_feature->strand($mapped[0]->strand);
......@@ -945,11 +939,12 @@ sub _transform_feature_to_RawContig{
$new_feature->score($feature->score);
$new_feature->percent_id($feature->percent_id);
$new_feature->p_value($feature->p_value);
$new_feature->hstart($feature->hstart);
$new_feature->hend($feature->hend);
$new_feature->hstrand($feature->hstrand);
$new_feature->hseqname($feature->hseqname);
#$new_feature->hscore($feature->score);
$new_feature->analysis($feature->analysis);
$new_feature->contig($rawContig);
......
......@@ -377,12 +377,11 @@ sub fetch_all_by_Slice_constraint {
return $self->{'_slice_feature_cache'}{$key} = $features;
}
#remapping has not been done, we have to do our own
#remapping has not been done, we have to do our own conversion from
# raw contig coords to slice coords
my @out = ();
#convert the features to slice coordinates from raw contig coordinates
my ($feat_start, $feat_end, $feat_strand);
foreach my $f (@$features) {
......@@ -401,7 +400,7 @@ sub fetch_all_by_Slice_constraint {
#shift the feature start, end and strand in one call
if($slice_strand == -1) {
$f->move( $slice_end - $end + 1, $slice_end - $start + 1, -$strand );
$f->move( $slice_end - $end + 1, $slice_end - $start + 1, $strand * -1 );
} else {
$f->move( $start - $slice_start + 1, $end - $slice_start + 1, $strand );
}
......
......@@ -225,7 +225,7 @@ sub _objs_from_sth {
if($slice_strand == -1) {
$feat_start = $slice_end - $end + 1;
$feat_end = $slice_end - $start + 1;
$feat_strand = -$strand;
$feat_strand = $strand * -1;
} else {
$feat_start = $start - $slice_start + 1;
$feat_end = $end - $slice_start + 1;
......
......@@ -187,7 +187,7 @@ sub _objs_from_sth {
if($slice_strand == -1) {
$feat_start = $slice_end - $end + 1;
$feat_end = $slice_end - $start + 1;
$feat_strand = -$strand;
$feat_strand = $strand * -1;
} else {
$feat_start = $start - $slice_start + 1;
$feat_end = $end - $slice_start + 1;
......
......@@ -271,18 +271,35 @@ sub _transform_between_Slices {
"to chr " . $to_slice->chr_name());
}
#create a copy of the old exon
my $new_exon = new Bio::EnsEMBL::Exon;
%$new_exon = %$self;
my $shift = $from_slice->chr_start() - $to_slice->chr_start();
#unset the new exons supporting features
$new_exon->{'_supporting_evidence'} = [];
#shift the start and end of the exon accordingly
#negative coordinates are permitted, as are coords past slice boundary
$new_exon->start($self->start() + $shift);
$new_exon->end($self->end() + $shift);
#calculate the exons position in the assembly
my ($exon_chr_start, $exon_chr_end, $exon_chr_strand);
if($from_slice->strand == 1) {
$exon_chr_start = $self->start + $from_slice->chr_start - 1;
$exon_chr_end = $self->end + $from_slice->chr_start - 1;
$exon_chr_strand = $self->strand;
} else {
$exon_chr_start = $from_slice->chr_end - $self->end + 1;
$exon_chr_end = $from_slice->chr_end - $self->start + 1;
$exon_chr_strand = $self->strand * -1;
}
#now calculate the exons position on the new slice
if($to_slice->strand == 1) {
$new_exon->start($exon_chr_start - $to_slice->chr_start + 1);
$new_exon->end ($exon_chr_end - $to_slice->chr_start + 1);
$new_exon->strand($exon_chr_strand);
} else {
$new_exon->start($to_slice->chr_end - $exon_chr_end + 1);
$new_exon->end ($to_slice->chr_end - $exon_chr_start + 1);
$new_exon->strand($exon_chr_strand * -1);
}
$new_exon->contig($to_slice);
......
......@@ -1036,7 +1036,6 @@ sub transform {
}
else {
# transform to raw_contig coords from Slice coords
return $self->_transform_to_RawContig();
}
......@@ -1103,11 +1102,16 @@ sub _transform_to_Slice {
}
# mapped coords are on chromosome - need to convert to slice
my $global_start = $slice->chr_start;
if($slice->strand == 1) {
$self->start ($mapped[0]->start - $slice->chr_start + 1);
$self->end ($mapped[0]->end - $slice->chr_start + 1);
$self->strand ($mapped[0]->strand);
} else {
$self->start ($slice->end - $mapped[0]->end + 1);
$self->end ($slice->end - $mapped[0]->start + 1);
$self->strand ($mapped[0]->strand * -1);
}
$self->start ($mapped[0]->start - $global_start + 1);
$self->end ($mapped[0]->end - $global_start + 1);
$self->strand ($mapped[0]->strand);
$self->seqname($mapped[0]->id);
#set the contig to the slice
......@@ -1118,22 +1122,43 @@ sub _transform_to_Slice {
sub _transform_between_Slices {
my ($self, $slice) = @_;
my ($self, $to_slice) = @_;
my $from_slice = $self->contig;
$self->throw("New contig [$slice] is not a Bio::EnsEMBL::Slice")
unless $slice->isa("Bio::EnsEMBL::Slice");
$self->throw("New contig [$to_slice] is not a Bio::EnsEMBL::Slice")
unless $to_slice->isa("Bio::EnsEMBL::Slice");
if ((my $c1 = $self->contig->chr_name) ne (my $c2 = $slice->chr_name)) {
if ((my $c1 = $from_slice->chr_name) ne (my $c2 = $to_slice->chr_name)) {
$self->warn("Can't transform between chromosomes: $c1 and $c2");
return;
}
my $shift = $slice->chr_start - $self->contig->chr_start;
my($start, $end, $strand);
$self->start ($self->start - $shift);
$self->end ($self->end - $shift);
$self->strand($self->strand * $slice->strand);
$self->contig($slice);
#first convert to assembly coords
if($from_slice->strand == 1) {
$start = $from_slice->chr_start + $self->start - 1;
$end = $from_slice->chr_start + $self->end - 1;
$strand = $self->strand;
} else {
$start = $from_slice->chr_end - $self->end + 1;
$end = $from_slice->chr_end - $self->start + 1;
$strand = $self->strand;
}
#now convert to the other slice's coords
if($to_slice->strand == 1) {
$self->start ($start - $to_slice->chr_start + 1);
$self->end ($end - $to_slice->chr_start + 1);
$self->strand($strand);
} else {
$self->start ($to_slice->chr_end - $end + 1);
$self->end ($to_slice->chr_end - $start + 1);
$self->strand($strand * -1);
}
$self->contig($to_slice);
return $self;
}
......@@ -1145,20 +1170,32 @@ sub _transform_to_RawContig {
$self->throw("can't transform coordinates of $self without a contig defined")
unless $self->contig;
my $dbh = $self->contig->adaptor->db;
my $slice = $self->contig;
my $dbh = $slice->adaptor->db;
my $mapper = $dbh->get_AssemblyMapperAdaptor->
fetch_by_type($self->contig->assembly_type);
my $mapper =
$dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type);
my $rca = $dbh->get_RawContigAdaptor;
# need to convert to chromosomal coords before mapping
my $global_start = $self->contig->chr_start;
#first convert the features coordinates to assembly coordinates
my($start, $end, $strand);
if($slice->strand == 1) {
$start = $slice->chr_start + $self->start - 1;
$end = $slice->chr_start + $self->end - 1;
$strand = $self->strand;
} else {
$start = $slice->chr_end - $self->end + 1;
$end = $slice->chr_end - $self->start + 1;
$strand = $self->strand * -1;
}
#convert the assembly coordinates to RawContig coordinates
my @mapped = $mapper->map_coordinates_to_rawcontig(
$self->contig->chr_name,
$self->start + $global_start - 1,
$self->end + $global_start - 1,
$self->strand * $self->contig->strand
$slice->chr_name,
$start,
$end,
$strand
);
unless (@mapped) {
......@@ -1214,7 +1251,7 @@ sub _transform_to_RawContig {
}
unless ($self->is_splittable) {
print STDERR "Feature spans >1 raw contig - can't split\n";
$self->warn("Feature spans >1 raw contig - can't split\n");
return;
}
......
......@@ -262,6 +262,8 @@ sub invert {
my $strand = $self->strand();
$self->strand(-$strand);
return $self;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment