Skip to content
Snippets Groups Projects
Commit 403064f4 authored by Andreas Kusalananda Kähäri's avatar Andreas Kusalananda Kähäri
Browse files

Mark up places where penalties are given. Also fix potential bug in

TranscriptScoreBuilder.pm
parent df01fdcd
No related branches found
No related tags found
No related merge requests found
......@@ -645,57 +645,67 @@ sub parse_exonerate_results {
sub non_mapped_transcript_rescore {
my $self = shift;
my $matrix = shift;
my $self = shift;
my $matrix = shift;
my $transcript_mappings = shift;
# argument checks
unless ($matrix and
$matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix of exons.');
unless ($matrix
and $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix') )
{
throw(
'Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix of exons.');
}
unless ($transcript_mappings and
$transcript_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
throw('Need a Bio::EnsEMBL::IdMapping::MappingList of transcripts.');
unless ( $transcript_mappings
and
$transcript_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList') )
{
throw(
'Need a Bio::EnsEMBL::IdMapping::MappingList of transcripts.');
}
# create of lookup hash of mapped source transcripts to target transcripts
my %transcript_lookup = map { $_->source => $_->target }
# create of lookup hash of mapped source transcripts to target
# transcripts
my %transcript_lookup =
map { $_->source => $_->target }
@{ $transcript_mappings->get_all_Entries };
my $i = 0;
foreach my $entry (@{ $matrix->get_all_Entries }) {
foreach my $entry ( @{ $matrix->get_all_Entries } ) {
my @source_transcripts = @{ $self->cache->get_by_key(
'transcripts_by_exon_id', 'source', $entry->source) };
my @target_transcripts = @{ $self->cache->get_by_key(
'transcripts_by_exon_id', 'target', $entry->target) };
my @source_transcripts = @{
$self->cache->get_by_key( 'transcripts_by_exon_id', 'source',
$entry->source ) };
my @target_transcripts = @{
$self->cache->get_by_key( 'transcripts_by_exon_id', 'target',
$entry->target ) };
my $found_mapped = 0;
TR:
TR:
foreach my $source_tr (@source_transcripts) {
foreach my $target_tr (@target_transcripts) {
my $mapped_target = $transcript_lookup{$source_tr->id};
my $mapped_target = $transcript_lookup{ $source_tr->id };
if ($mapped_target and ($mapped_target == $target_tr->id)) {
if ( $mapped_target and ( $mapped_target == $target_tr->id ) ) {
$found_mapped = 1;
last TR;
}
}
}
unless ($found_mapped) {
$matrix->set_score($entry->source, $entry->target, ($entry->score * 0.8));
# PENALTY: The exon stable ID is on a new transcript.
$matrix->set_score( $entry->source(), $entry->target(),
0.8*$entry->score() );
$i++;
}
}
$self->logger->debug("Scored exons in non-mapped transcripts: $i\n", 1);
}
} ## end foreach my $entry ( @{ $matrix...})
$self->logger->debug( "Scored exons in non-mapped transcripts: $i\n",
1 );
} ## end sub non_mapped_transcript_rescore
1;
......@@ -413,9 +413,10 @@ sub biotype_gene_rescore {
$entry->target );
if ( $source_gene->biotype() ne $target_gene->biotype() ) {
#$self->logger->debug("biotype ".$entry->to_string."\n");
# PENALTY: The gene stable ID is now on a gene with a different
# biotype.
$matrix->set_score( $entry->source(), $entry->target(),
( $entry->score()*0.25 ) );
0.25*$entry->score() );
$i++;
}
}
......
......@@ -164,50 +164,54 @@ sub create_shrinked_matrix {
=cut
sub internal_id_rescore {
my $self = shift;
my $self = shift;
my $matrix = shift;
unless ($matrix and
$matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
unless ($matrix
and $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix') )
{
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
my $i = 0;
foreach my $source (@{ $matrix->get_all_sources }) {
my @entries = sort { $b <=> $a }
@{ $matrix->get_Entries_for_source($source) };
foreach my $source ( @{ $matrix->get_all_sources } ) {
my @entries =
sort { $b <=> $a } @{ $matrix->get_Entries_for_source($source) };
# nothing to do if we only have one mapping
next unless (scalar(@entries) > 1);
next unless ( scalar(@entries) > 1 );
# only penalise if mappings are ambiguous
next unless ($entries[0]->score == $entries[1]->score);
next unless ( $entries[0]->score == $entries[1]->score );
# only penalise if one source id == target id where score == best score
# only penalise if one source id == target id where score == best
# score
my $ambiguous = 0;
foreach my $e (@entries) {
if ($e->target == $source and $e->score == $entries[0]) {
if ( $e->target == $source and $e->score == $entries[0] ) {
$ambiguous = 1;
}
}
next unless ($ambiguous);
# now penalise those where source id != target id and score == best score
# now penalise those where source id != target id and score == best
# score
foreach my $e (@entries) {
if ($e->target != $source and $e->score == $entries[0]) {
$matrix->set_score($source, $e->target, ($e->score * 0.8));
if ( $e->target != $source and $e->score == $entries[0] ) {
# PENALTY: This stable ID is not any longer on the same object.
$matrix->set_score( $source, $e->target(), 0.8*$e->score() );
$i++;
}
}
}
$self->logger->debug("Scored entries with internal ID mismatch: $i\n", 1);
}
} ## end foreach my $source ( @{ $matrix...})
$self->logger->debug("Scored entries with internal ID mismatch: $i\n",
1 );
} ## end sub internal_id_rescore
=head2 log_matrix_stats
......
......@@ -355,82 +355,102 @@ sub score_matrix_from_flag_matrix {
sub different_translation_rescore {
my $self = shift;
my $self = shift;
my $matrix = shift;
unless ($matrix and
$matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
unless ($matrix
and $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix') )
{
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
my $i = 0;
foreach my $entry (sort { $b->score <=> $a->score }
@{ $matrix->get_all_Entries }) {
foreach my $entry ( sort { $b->score <=> $a->score }
@{ $matrix->get_all_Entries } )
{
# we only do this for perfect matches, i.e. transcript score == 1
last if ($entry->score < 1);
last if ( $entry->score < 1 );
my $source_tl = $self->cache->get_by_key('transcripts_by_id',
'source', $entry->source)->translation;
my $target_tl = $self->cache->get_by_key('transcripts_by_id',
'target', $entry->target)->translation;
my $source_tl =
$self->cache->get_by_key( 'transcripts_by_id', 'source',
$entry->source )->translation;
my $target_tl =
$self->cache->get_by_key( 'transcripts_by_id', 'target',
$entry->target )->translation;
# no penalty if both transcripts have no translation
next if (!$source_tl and !$target_tl);
if (!$source_tl or !$target_tl or
($source_tl->seq ne $target_tl->seq)) {
# set score to a value less than 1
$matrix->set_score($entry->source, $entry->target, 0.98);
next if ( !$source_tl and !$target_tl );
if ( !$source_tl
or !$target_tl
or ( $source_tl->seq ne $target_tl->seq ) )
{
# PENALTY: The transcript stable ID is now on a transcript with a
# different translation.
$matrix->set_score( $entry->source(), $entry->target(),
0.98*$entry->score() );
$i++;
}
}
$self->logger->debug("Non-perfect translations on perfect transcripts: $i\n", 1);
}
} ## end foreach my $entry ( sort { ...})
$self->logger->debug(
"Non-perfect translations on perfect transcripts: $i\n",
1 );
} ## end sub different_translation_rescore
sub non_mapped_gene_rescore {
my $self = shift;
my $matrix = shift;
my $self = shift;
my $matrix = shift;
my $gene_mappings = shift;
# argument checks
unless ($matrix and
$matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a transcript Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
unless ($matrix
and $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix') )
{
throw(
'Need a transcript Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.'
);
}
unless ($gene_mappings and
$gene_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
unless ( $gene_mappings
and $gene_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList') )
{
throw('Need a gene Bio::EnsEMBL::IdMapping::MappingList.');
}
# create of lookup hash of mapped source genes to target genes
my %gene_lookup = map { $_->source => $_->target }
my %gene_lookup =
map { $_->source => $_->target }
@{ $gene_mappings->get_all_Entries };
my $i = 0;
foreach my $entry (@{ $matrix->get_all_Entries }) {
foreach my $entry ( @{ $matrix->get_all_Entries } ) {
my $source_gene = $self->cache->get_by_key('genes_by_transcript_id',
'source', $entry->source);
my $target_gene = $self->cache->get_by_key('genes_by_transcript_id',
'target', $entry->target);
my $source_gene =
$self->cache->get_by_key( 'genes_by_transcript_id', 'source',
$entry->source );
my $target_gene =
$self->cache->get_by_key( 'genes_by_transcript_id', 'target',
$entry->target );
my $mapped_target = $gene_lookup{$source_gene->id};
my $mapped_target = $gene_lookup{ $source_gene->id };
if (!$mapped_target or ($mapped_target != $target_gene->id)) {
$matrix->set_score($entry->source, $entry->target, ($entry->score * 0.8));
if ( !$mapped_target or ( $mapped_target != $target_gene->id ) ) {
# PENALTY: The transcript stable ID is now on a different gene.
$matrix->set_score( $entry->source(), $entry->target(),
0.8*$entry->score() );
$i++;
}
}
$self->logger->debug("Scored transcripts in non-mapped genes: $i\n", 1);
}
$self->logger->debug( "Scored transcripts in non-mapped genes: $i\n",
1 );
} ## end sub non_mapped_gene_rescore
1;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment