Commit be44bac6 authored by Graham McVicker's avatar Graham McVicker
Browse files

Exons attached supporting features now tranformed when exons transformed

parent 15c16c6e
......@@ -191,63 +191,6 @@ sub ungapped_features {
}
=head2 transform
Arg [1] : Bio::EnsEMBL::Slice $slice
Example : none
Description: if argument is given, transforms this feature into the slice
coordinate system, invalidating this one.
if no argument is given, transforms this feature into raw contig
coordinates, invalidating this one.
The process can produce more than one feature so we return an array.
Returntype : list of Bio::EnsEMBL::BaseAlignFeature
Exceptions : none
Caller : general
=cut
sub transform{
my ($self, $slice) = @_;
#print "transforming ".$self." to ".$slice." coords\n";
if( ! defined $slice ) {
#Since slice arg is not defined - we want raw contig coords
if(( defined $self->contig ) &&
( $self->contig->isa( "Bio::EnsEMBL::RawContig" )) ) {
# print STDERR "BaseAlignFeature::transform, you are already apparently in rawcontig coords so why try to transform to them\n";
#we are already in rawcontig coords, nothing needs to be done
return $self;
} else {
#transform to raw_contig coords from Slice coords
my @array = $self->_transform_to_rawcontig();
# print "transform to rawcontig has returned ".@array." features\n";
# foreach my $a(@array){
# print "feature is ".$a."\n";
# }
return @array;
}
}
if( defined $self->contig ) {
if($self->contig->isa( "Bio::EnsEMBL::RawContig" )) {
#transform to slice coords from raw contig coords
return $self->_transform_to_slice( $slice );
} elsif($self->contig->isa( "Bio::EnsEMBL::Slice" )) {
#transform to slice coords from other slice coords
return $self->_transform_between_slices( $slice );
} else {
#Unknown contig type - throw an exception
return $self->throw("Exon's 'contig' is of unknown type "
. $self->contig() . " - cannot transform to Slice coords");
}
} else {
#Can't convert to slice coords without a contig to work with
return $self->throw("Exon's contig is not defined - cannot transform to " .
"Slice coords");
}
}
=head2 dbID
Arg [1] : int $dbID
......@@ -796,22 +739,16 @@ sub _parse_features {
sub _transform_to_slice{
my ($self, $slice ) = @_;
$self->throw( "implented soon :-)" );
}
sub _transform_to_rawcontig{
sub _transform_to_RawContig {
my ($self) = @_;
#print STDERR "transforming to raw contig coord\n\n";
if(!$self->contig){
$self->throw("can't transform coordinates of ".$self." without some sort of contig defined");
$self->throw("can't transform coordinates of " . $self .
" without some sort of contig defined");
}
#my $mapper = $self->contig->adaptor->db->get_AssemblyMapperAdaptor->fetch_by_type( $self->contig()->assembly_type() );
my $rcAdaptor = $self->adaptor()->db()->get_RawContigAdaptor();
my $rcAdaptor = $self->contig->adaptor()->db()->get_RawContigAdaptor();
#my $global_start = $self->contig->chr_start();
my @out;
......@@ -820,7 +757,7 @@ sub _transform_to_rawcontig{
my %rc_features;
foreach my $f(@features){
my @new_features = $self->_transform_feature_to_rawcontig($f);
my @new_features = $self->_transform_feature_to_RawContig($f);
push(@mapped_features, @new_features);
}
......@@ -839,6 +776,7 @@ sub _transform_to_rawcontig{
$outputf->score( $self->score() );
$outputf->percent_id( $self->percent_id() );
$outputf->p_value( $self->p_value() );
$outputf->contig($rcAdaptor->fetch_by_dbID($contig_id));
push(@out, $outputf);
}
return @out;
......@@ -847,14 +785,6 @@ sub _transform_to_rawcontig{
sub _transform_between_slices {
my ( $self, $to_slice ) = @_;
}
=head2 _hit_unit
Args : none
......@@ -890,7 +820,7 @@ sub _query_unit {
}
sub _transform_feature_to_rawcontig{
sub _transform_feature_to_RawContig{
my($self, $feature) = @_;
my $verbose = 0;
......@@ -903,7 +833,7 @@ sub _transform_feature_to_rawcontig{
}
my $mapper = $self->contig->adaptor->db->get_AssemblyMapperAdaptor->fetch_by_type( $self->contig()->assembly_type() );
my $rcAdaptor = $self->adaptor()->db()->get_RawContigAdaptor();
my $rcAdaptor = $self->contig->adaptor()->db()->get_RawContigAdaptor();
my $global_start = $self->contig->chr_start();
my @out;
my @mapped = $mapper->map_coordinates_to_rawcontig
......@@ -974,7 +904,7 @@ sub _transform_feature_to_rawcontig{
$new_feature->hseqname($feature->hseqname);
#$new_feature->hscore($feature->score);
$new_feature->analysis($feature->analysis);
$new_feature->attach_seq($rawContig);
$new_feature->contig($rawContig);
#print STDERR "FEATURE: ",join( " ", ( $new_feature->start(), $new_feature->end(), $new_feature->seqname,
# $new_feature->contig(), $new_feature->hstart(), $new_feature->hend() )),"\n";
#print STDERR "split feature ".$new_feature->gffstring."\n";
......@@ -1008,17 +938,11 @@ sub _transform_feature_to_rawcontig{
$new_feature->hseqname($feature->hseqname);
#$new_feature->hscore($feature->score);
$new_feature->analysis($feature->analysis);
$new_feature->attach_seq($rawContig);
$new_feature->contig($rawContig);
push(@out, $new_feature);
}
foreach my $sf(@out){
#print STDERR "gff ".$sf->gffstring."\n";
}
return @out;
}
1;
......@@ -39,24 +39,29 @@ use vars qw(@ISA);
Arg [1] : Bio::EnsEMBL::Exon $exon
The exon to fetch supporting features for
Arg [2] : (optional)boolean $sticky_component_flag
Indicates $exon is a component exon to a sticky exon
Example : @supporting = $supporting_feature_adaptor->fetch_by_Exon($exon);
Description: Retrieves supporting features (evidence) for a given exon.
Returntype : list of Bio::EnsEMBL::BaseAlignFeatures
Exceptions : none
Returntype : list of Bio::EnsEMBL::BaseAlignFeatures in the same coordinate
system as the $exon argument
Exceptions : warning if $exon is not in the database (i.e. dbID not defined)
throw if a retrieved supporting feature is of unknown type
Caller : Bio::EnsEMBL::Exon
=cut
sub fetch_by_Exon {
my ( $self, $exon ) = @_;
my ( $self, $exon, $sticky_component_flag ) = @_;
my @out;
# if exon is sticky, get supporting from components
if( $exon->isa( 'Bio::EnsEMBL::StickyExon' )) {
my @component_exons = $exon->each_component_exon();
for my $component_exon ( @component_exons ) {
$self->fetch_evidence_by_Exon( $component_exon );
foreach my $component_exon ( $exon->each_component_Exon() ) {
push @out, $self->fetch_by_Exon( $component_exon, 1 );
}
return;
return @out;
}
unless($exon->dbID) {
......@@ -66,8 +71,8 @@ sub fetch_by_Exon {
}
my $sth = $self->prepare("SELECT feature_type, feature_id
FROM supporting_feature
my $sth = $self->prepare("SELECT sf.feature_type, sf.feature_id
FROM supporting_feature sf
WHERE exon_id = ?");
$sth->execute($exon->dbID());
......@@ -75,16 +80,29 @@ sub fetch_by_Exon {
my $prot_adp = $self->db->get_ProteinAlignFeatureAdaptor;
my $dna_adp = $self->db->get_DnaAlignFeatureAdaptor;
my @out;
my $feature;
while(my ($type, $feature_id) = $sth->fetchrow){
if($type eq 'protein_align_feature'){
push @out, $prot_adp->fetch_by_dbID($feature_id);
$feature = $prot_adp->fetch_by_dbID($feature_id);
}elsif($type eq 'dna_align_feature'){
push @out, $dna_adp->fetch_by_dbID($feature_id);
$feature = $dna_adp->fetch_by_dbID($feature_id);
}else {
$self->throw("Unknown feature type [$type]\n");
}
#we might have to convert the features coordinate system
unless($feature->contig->name() eq $exon->contig->name()) {
if($exon->contig()->isa("Bio::EnsEMBL::Slice")) {
#tranform to slice coords
$feature->transform($exon->contig());
} else {
#this feature is on the wrong contig
#this is okay only if this is a sticky component exon
next;
}
}
push @out, $feature;
}
return @out;
}
......
......@@ -268,12 +268,14 @@ sub _transform_between_Slices {
"to chr " . $to_slice->chr_name());
}
my $new_exon = new Bio::EnsEMBL::Exon();
my $new_exon = new Bio::EnsEMBL::Exon;
%$new_exon = %$self;
my $shift = $from_slice->chr_start() - $to_slice->chr_start();
#unset the new exons supporting features
$new_exon->{'_supporting_evidence'} = [];
#shift the start and end of the exon accordingly
#negative coordinates are permitted, as are coords past slice boundary
$new_exon->start($self->start() + $shift);
......@@ -281,6 +283,16 @@ sub _transform_between_Slices {
$new_exon->contig($to_slice);
#copy the attached supporting features and transform them
my @feats;
foreach my $sf ($self->get_all_supporting_features()) {
#my $f = $sf->new();
#%$f = %$sf;
###(mcvicker) this would be better if the feature was copied
push @feats, $sf->transform($to_slice);
}
$new_exon->add_supporting_features(@feats);
return $new_exon;
}
......@@ -315,10 +327,9 @@ sub _transform_to_Slice {
$self->strand()
);
# exons should always transform so in theory no error check
# necessary
# actually we could have exons inside and outside the Slice because of db design
# and the query that produces them
# exons should always transform so in theory no error check necessary
# actually we could have exons inside and outside the Slice
# because of db design and the query that produces them
if( ! @mapped ) {
$self->throw( "Exon couldnt map" );
}
......@@ -337,8 +348,11 @@ sub _transform_to_Slice {
%$slice = %{$sa->fetch_by_chr_name( $mapped[0]->id() )};
}
my $newexon = Bio::EnsEMBL::Exon->new();
my $newexon = new Bio::EnsEMBL::Exon();
%$newexon = %$self;
#unset supporting features of new exon
$newexon->{'_supporting_evidence'} = [];
if ($slice->strand == 1) {
$newexon->start( $mapped[0]->start() - $slice->chr_start() + 1);
......@@ -351,6 +365,16 @@ sub _transform_to_Slice {
$newexon->strand( $mapped[0]->strand() * $slice->strand() );
$newexon->contig( $slice );
#copy the attached supporting features and transform them
my @feats;
foreach my $sf ($self->get_all_supporting_features()) {
#my $f = $sf->new();
#%$f = %$sf;
#(mcvicker) this would be better if the feature was copied
push @feats, $sf->transform($slice);
}
$newexon->add_supporting_features(@feats);
return $newexon;
}
......@@ -372,15 +396,14 @@ sub _transform_to_Slice {
sub _transform_to_RawContig {
my $self = shift;
my $mapper = $self->contig()->adaptor->db->get_AssemblyMapperAdaptor->fetch_by_type
( $self->contig()->assembly_type() );
my $asma = $self->contig()->adaptor()->db->get_AssemblyMapperAdaptor();
my $mapper = $asma->fetch_by_type( $self->contig()->assembly_type() );
my $rcAdaptor = $self->adaptor()->db()->get_RawContigAdaptor();
my $slice_chr_start = $self->contig->chr_start();
my $slice_chr_end = $self->contig->chr_end();
$self->_transform_features_to_rawcontig();
my ($exon_chr_start,$exon_chr_end);
if ($self->contig()->strand() == 1) {
......@@ -404,10 +427,21 @@ sub _transform_to_RawContig {
return $self;
}
#transform the supporting features to raw contig coords (hashed on contig)
my %sf_hash;
foreach my $sf ($self->get_all_supporting_features()) {
foreach my $mapped_feat ($sf->transform()) {
unless(exists $sf_hash{$mapped_feat->contig->name}) {
$sf_hash{$mapped_feat->contig->name} = [];
}
push @{$sf_hash{$mapped_feat->contig->name}}, $mapped_feat;
}
}
if( scalar( @mapped ) > 1 ) {
# sticky exons
# bjeacchh
my $stickyExon = Bio::EnsEMBL::StickyExon->new();
$stickyExon->phase( $self->phase() );
$stickyExon->adaptor( $self->adaptor() );
......@@ -433,6 +467,13 @@ sub _transform_to_RawContig {
$componentExon->end_phase($self->end_phase);
$componentExon->dbID( $self->dbID() );
$componentExon->adaptor( $self->adaptor() );
#add the supporting features on this contig to the component exon
if(exists $sf_hash{$rawContig->name()}) {
$componentExon->add_supporting_features(@{$sf_hash{$rawContig->name()}});
}
$stickyExon->add_component_Exon( $componentExon );
$sticky_length += ( $mapped[$i]->end() - $mapped[$i]->start() + 1 );
}
......@@ -462,43 +503,25 @@ sub _transform_to_RawContig {
" lies on a gap cannot be mapped\n");
}
my $rawContig = $rcAdaptor->fetch_by_dbID( $mapped[0]->id() );
$self->start( $mapped[0]->start() );
$self->end( $mapped[0]->end() );
$self->strand( $mapped[0]->strand() );
# attaching seq ?
$self->contig( $rawContig );
return $self;
}
}
my $new_exon = new Bio::EnsEMBL::Exon();
#copy this exon
%$new_exon = %$self;
# this should definatly happen inside the features ...
sub _transform_features_to_rawcontig {
my ( $self ) = @_;
#unset supporting evidence of new exon
$new_exon->{'_supporting_evidence'} = undef;
if( ! defined $self->{'_supporting_evidence'} ) {
return;
}
#print STDERR "TRANSFORMING SUPPORTING FEATURES\n";
my @supporting_features = $self->each_Supporting_Feature;
my @remapped_sf;
foreach my $sf(@supporting_features) {
#print "transforming ".$sf."\n";
my @new_sf = $sf->transform();
#print STDERR "have ".@new_sf." supporting features from transform\n";
#foreach my $f(@new_sf){
# print "have ".$f." from transformation\n";
#}
push(@remapped_sf, @new_sf);
$new_exon->start( $mapped[0]->start() );
$new_exon->end( $mapped[0]->end() );
$new_exon->strand( $mapped[0]->strand() );
# attach raw contig
$new_exon->contig( $rawContig );
#replace old supporting feats with transformed supporting feats
$new_exon->add_supporting_features($sf_hash{$rawContig->name()});
return $new_exon;
}
$self->{'_supporting_evidence'} = undef;
foreach my $f(@remapped_sf){
# print "adding ".$f." to supporting feature\n";
$self->add_Supporting_Feature($f);
}
}
......@@ -913,7 +936,9 @@ sub _genscan_peptide{
get_all_supporting_features call will not retrieve supporting
features from the database.
Returntype : none
Exceptions : thrown if any of the features are not SeqFeatureIs
Exceptions : throw if any of the features are not SeqFeatureIs
throw if any of the features are not in the same coordinate
system as the exon
Caller : general
=cut
......@@ -923,11 +948,19 @@ sub add_supporting_features {
$self->{_supporting_evidence} = []
unless defined($self->{_supporting_evidence});
# check whether this feature object has been added already
FEATURE: foreach my $feature (@features) {
$self->throw("Supporting feat [$feature] not a Bio::EnsEMBL::SeqFeatureI")
unless($feature && $feature->isa("Bio::EnsEMBL::SeqFeatureI"));
FEATURE: foreach my $feature (@features) {
unless($feature && $feature->isa("Bio::EnsEMBL::SeqFeatureI")) {
$self->throw("Supporting feat [$feature] not a " .
"Bio::EnsEMBL::SeqFeatureI");
}
unless($self->contig()->name() eq $feature->contig->name()) {
$self->throw("Supporting feat not in same coord system as exon\n" .
"exon is attached to [".$self->contig->name()."]\n" .
"feat is attached to [".$feature->contig->name()."]");
}
foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
# compare objects
......@@ -936,7 +969,7 @@ sub add_supporting_features {
next FEATURE;
}
}
#no duplicate was found, add the feature
push(@{$self->{_supporting_evidence}},$feature);
}
......
......@@ -1077,8 +1077,8 @@ sub _transform_to_Slice {
my $dbh = $self->contig->adaptor->db;
my $mapper = $dbh->get_AssemblyMapperAdaptor->
fetch_by_type($slice->assembly_type);
my $mapper =
$dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type);
my $rca = $dbh->get_RawContigAdaptor;
my @mapped = $mapper->map_coordinates_to_assembly(
......@@ -1093,7 +1093,8 @@ sub _transform_to_Slice {
}
unless (@mapped == 1) {
$self->throw("$self should only map to one chromosome - something bad has happened ...");
$self->throw("$self should only map to one chromosome - " .
"something bad has happened ...");
}
if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) {
......@@ -1109,6 +1110,9 @@ sub _transform_to_Slice {
$self->strand ($mapped[0]->strand);
$self->seqname($mapped[0]->id);
#set the contig to the slice
$self->contig($slice);
return $self;
}
......@@ -1175,7 +1179,7 @@ sub _transform_to_RawContig {
$self->end ($mapped[0]->end);
$self->strand ($mapped[0]->strand);
$self->seqname ($mapped[0]->id);
$self->attach_seq($rca->fetch_by_dbID($mapped[0]->id));
$self->contig($rca->fetch_by_dbID($mapped[0]->id));
return $self;
}
......@@ -1199,11 +1203,11 @@ sub _transform_to_RawContig {
# case where only one RawContig maps
if (@coords == 1) {
$self->start ($coords[0]->start);
$self->end ($coords[0]->end);
$self->strand ($coords[0]->strand);
$self->seqname ($coords[0]->id);
$self->attach_seq($rca->fetch_by_dbID($coords[0]->id));
$self->start ($coords[0]->start);
$self->end ($coords[0]->end);
$self->strand ($coords[0]->strand);
$self->seqname($coords[0]->id);
$self->contig ($rca->fetch_by_dbID($coords[0]->id));
$self->warn("Feature [$self] truncated as lies partially on a gap");
return $self;
......@@ -1226,10 +1230,11 @@ sub _transform_to_RawContig {
my $feat = $obj->new;
$feat->start ($map->start);
$feat->end ($map->end);
$feat->strand ($map->strand);
$feat->attach_seq($rca->fetch_by_dbID($map->id));
$feat->start ($map->start);
$feat->end ($map->end);
$feat->strand ($map->strand);
$feat->contig ($rca->fetch_by_dbID($map->id));
$feat->adaptor($self->adaptor) if $self->adaptor();
push @out, $feat;
}
......
......@@ -458,7 +458,17 @@ sub transform {
$newexon->adaptor($component_exons[0]->adaptor);
$newexon->contig( $slice );
$newexon->add_supporting_features(@supporting_features);
#copy each of the supporting features and transform them
my @feats;
foreach my $sf (@supporting_features) {
#my $f;
#%$f = %$sf;
#(mcvicker) this would be better as a copy
push @feats, $sf->transform($slice);
}
$newexon->add_supporting_features(@feats);
$newexon->phase( $composite_exon_phase );
#SMJS Hack
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment