diff --git a/modules/Bio/EnsEMBL/IdMapping/TranscriptScoreBuilder.pm b/modules/Bio/EnsEMBL/IdMapping/TranscriptScoreBuilder.pm index 5a4aadfcbe4c4d9ea7830833ddc0457d39d65279..a1a8d234e0de41caae3ba47551ac06211ae147f6 100644 --- a/modules/Bio/EnsEMBL/IdMapping/TranscriptScoreBuilder.pm +++ b/modules/Bio/EnsEMBL/IdMapping/TranscriptScoreBuilder.pm @@ -317,20 +317,37 @@ sub score_matrix_from_flag_matrix { } else { + my $source_transcript_biotype_group = $self->get_biotype_group($source_transcript->biotype()); + my $target_transcript_biotype_group = $self->get_biotype_group($target_transcript->biotype()); + =cut # debug $self->logger->info($source_transcript->id.":".$target_transcript->id. " source score: $source_transcript_score". " source length: $source_transcript_length". + " source biotype:" . $source_transcript->biotype() . + " source group: $source_transcript_biotype_group". " target score: $target_transcript_score". + " target biotype:" . $target_transcript->biotype() . + " target group: $target_transcript_biotype_group". " target length: $target_transcript_length\n"); =cut - # everything is fine, add score to matrix my $transcript_score = ($source_transcript_score + $target_transcript_score) / ($source_transcript_length + $target_transcript_length); +## Add penalty if biotypes are different + if ($source_transcript->biotype() ne $target_transcript->biotype()) { + $transcript_score = $transcript_score * 0.9; + } + +## Add penalty if biotype groups are different + if ($source_transcript_biotype_group ne $target_transcript_biotype_group) { + $transcript_score = $transcript_score * 0.8; + } + + # everything is fine, add score to matrix if ($transcript_score > $transcript_score_threshold) { $matrix->add_score($source_transcript->id, $target_transcript->id, $transcript_score); @@ -493,6 +510,23 @@ sub non_mapped_gene_rescore { 1 ); } ## end sub non_mapped_gene_rescore + +sub get_biotype_group { + my ($self, $biotype) = @_; + my $dba = $self->cache->get_production_DBAdaptor(); + my $helper = $self->cache->get_production_DBAdaptor()->dbc()->sql_helper(); + + my $sql = q{ + SELECT biotype_group + FROM biotype + WHERE object_type = 'transcript' + AND is_current = 1 + AND name = ? + AND db_type like '%core%' }; + my $result = $helper->execute_simple(-SQL => $sql, -PARAMS => [$biotype]); + return $result->[0]; +} + 1;