From 3c79026df457118d3bf2c38bf007b8967f299b76 Mon Sep 17 00:00:00 2001 From: Magali Ruffier <mr6@ebi.ac.uk> Date: Fri, 12 Dec 2014 09:17:49 +0000 Subject: [PATCH] Revert "ENSCORESW-1116: first attempt at storing some form of scoring in the overlap mappings" overlap score is not identity score, we get the identity from the alignments instead This reverts commit 26152269f07dc8090d6f67b686b196100bf2f541. --- .../XrefParser/RefSeqCoordinateParser.pm | 48 +------------------ 1 file changed, 2 insertions(+), 46 deletions(-) diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm index fdee63dbc6..7e44052000 100644 --- a/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm @@ -213,10 +213,6 @@ sub run_script { foreach my $transcript_of (sort { $a->start() <=> $b->start() } @$transcripts_of) { my %transcript_result; my %tl_transcript_result; - my %transcript_to_dbid; - my %translation_to_dbid; - my %query_overlap; - my %target_overlap; my $id = $transcript_of->stable_id(); if ($id =~ /^XM_/) { next; } my $exons_of = $transcript_of->get_all_Exons(); @@ -248,16 +244,11 @@ sub run_script { my $exon_match = 0; my $tl_exons = $transcript->get_all_translateable_Exons(); my $tl_exon_match = 0; - my $overall_overlap = 0; - my $overall_overlap_of = 0; - my $tl_overall_overlap = 0; - my $tl_overall_overlap_of = 0; foreach my $exon (@$exons) { my $start = $exon->seq_region_start(); my $end = $exon->seq_region_end(); my $overlap = $rr1->overlap_size('exon', $start, $end); - $overall_overlap += $overlap; $exon_match += $overlap/($end - $start + 1); $rr2->check_and_register('exon', $start, $end); } @@ -266,7 +257,6 @@ sub run_script { my $tl_start = $tl_exon->seq_region_start(); my $tl_end = $tl_exon->seq_region_end(); my $tl_overlap = $rr3->overlap_size('exon', $tl_start, $tl_end); - $tl_overall_overlap += $tl_overlap; $tl_exon_match += $tl_overlap/($tl_end - $tl_start + 1); $rr4->check_and_register('exon', $tl_start, $tl_end); } @@ -274,12 +264,11 @@ sub run_script { my $exon_match_of = 0; my $tl_exon_match_of = 0; -# Look for overlap between the two sets of exons +# Look for oeverlap between the two sets of exons foreach my $exon_of (@$exons_of) { my $start_of = $exon_of->seq_region_start(); my $end_of = $exon_of->seq_region_end(); my $overlap_of = $rr2->overlap_size('exon', $start_of, $end_of); - $overall_overlap_of += $overlap_of; $exon_match_of += $overlap_of/($end_of - $start_of + 1); } @@ -287,7 +276,6 @@ sub run_script { my $tl_start_of = $tl_exon_of->seq_region_start(); my $tl_end_of = $tl_exon_of->seq_region_end(); my $tl_overlap_of = $rr4->overlap_size('exon', $tl_start_of, $tl_end_of); - $tl_overall_overlap_of += $tl_overlap_of; $tl_exon_match_of += $tl_overlap_of/($tl_end_of - $tl_start_of + 1); } @@ -296,19 +284,10 @@ sub run_script { my $tl_score = 0; if (scalar(@$tl_exons_of) > 0) { $tl_score = ( ($tl_exon_match_of + $tl_exon_match)) / (scalar(@$tl_exons_of) + scalar(@$tl_exons) ); - if ($transcript->translation) { - my $tl_stable_id = $transcript->translation->stable_id; - $translation_to_dbid{$tl_stable_id} = $transcript->translation->stable_id; - $query_overlap{$tl_stable_id} = $tl_overall_overlap_of/$transcript_of->translation->length(); - $target_overlap{$tl_stable_id} = $tl_overall_overlap/$transcript->translation->length(); - } } if ($transcript->biotype eq $transcript_of->biotype) { $transcript_result{$transcript->stable_id} = $score; - $transcript_to_dbid{$transcript->stable_id} = $transcript->dbID; $tl_transcript_result{$transcript->stable_id} = $tl_score; - $query_overlap{$transcript->stable_id} = $overall_overlap_of/$transcript_of->length() * 100; - $target_overlap{$transcript->stable_id} = $overall_overlap/$transcript->length() * 100; } } @@ -350,17 +329,6 @@ sub run_script { species_id => $species_id, info_type => 'DIRECT' }); $self->add_direct_xref($xref_id, $best_id, "Transcript", ""); - my $ensembl_id = $transcript_to_dbid{$best_id}; - my $object_xref_id = $self->add_object_xref({ - xref_id => $xref_id, - ensembl_id => $ensembl_id, - object_type => 'Transcript'}); -## Add 'identity_xref' to store the overlap values - $self->add_identity_xref({ - object_xref_id => $object_xref_id, - query_identity => $query_overlap{$best_id}, - target_identity => $target_overlap{$best_id}, - score => $best_score }); # Also store refseq protein as direct xref for ensembl translation, if translation exists my $ta_of = $otherf_dba->get_TranscriptAdaptor(); @@ -372,7 +340,6 @@ sub run_script { if (defined $tl && defined $tl_of) { if ($tl_of->seq eq $tl->seq) { ($acc, $version) = split(/\./, $tl_of->stable_id()); - my $tl_stable_id = $tl->stable_id; my $tl_xref_id = $self->add_xref({ acc => $acc, version => $version, label => $acc, @@ -380,18 +347,7 @@ sub run_script { source_id => $peptide_source_id, species_id => $species_id, info_type => 'DIRECT' }); - $self->add_direct_xref($tl_xref_id, $tl_stable_id, "Translation", ""); - my $tl_ensembl_id = $translation_to_dbid{$tl_stable_id}; - my $tl_object_xref_id = $self->add_object_xref({ - xref_id => $tl_xref_id, - ensembl_id => $tl_ensembl_id, - object_type => 'Translation'}); -## Add 'identity_xref' to store the overlap values - $self->add_identity_xref({ - object_xref_id => $tl_object_xref_id, - query_identity => $query_overlap{$tl_stable_id}, - target_identity => $target_overlap{$tl_stable_id}, - score => $best_score }); + $self->add_direct_xref($tl_xref_id, $tl->stable_id(), "Translation", ""); } } } -- GitLab