From d7ceb81493491d445311a69a421114b405ad441a Mon Sep 17 00:00:00 2001 From: Glenn Proctor <gp1@sanger.ac.uk> Date: Tue, 8 Feb 2005 08:46:40 +0000 Subject: [PATCH] Improved DB handling in CCDS dumping. Seems to work now. --- .../xref_mapping/XrefMapper/BasicMapper.pm | 59 +++++++++++++++++-- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm index 45a1c5090f..b46d5b1e9d 100644 --- a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm +++ b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm @@ -905,6 +905,21 @@ sub dump_direct_xrefs { open (XREF, ">>" . $self->dir() . "/xref.txt"); open (OBJECT_XREF, ">>" . $self->dir() . "/object_xref.txt"); + # Will need to look up translation stable ID from transcript stable ID, build hash table + print "Building transcript stable ID -> translation stable ID lookup table\n"; + my %transcript_stable_id_to_translation_stable_id; + my $trans_sth = $self->dbi()->prepare("SELECT tss.stable_id as transcript, tls.stable_id AS translation FROM translation tl, translation_stable_id tls, transcript_stable_id tss WHERE tss.transcript_id=tl.transcript_id AND tl.translation_id=tls.translation_id"); + $trans_sth->execute(); + my ($transcript_stable_id, $translation_stable_id); + $trans_sth->bind_columns(\$transcript_stable_id, \$translation_stable_id); + while ($trans_sth->fetch()) { + $transcript_stable_id_to_translation_stable_id{$transcript_stable_id} = $translation_stable_id; + } + $trans_sth->finish(); + + # Will need lookup tables for gene/transcript/translation stable ID to internal ID + my $stable_id_to_internal_id = $self->build_stable_id_to_internal_id_hash(); + # SQL / statement handle for getting all direct xrefs my $xref_sql = "SELECT dx.general_xref_id, dx.ensembl_stable_id, dx.type, dx.linkage_xref, x.accession, x.version, x.label, x.description, x.source_id, x.species_id FROM direct_xref dx, xref x WHERE dx.general_xref_id=x.xref_id"; my $xref_sth = $self->xref()->dbi()->prepare($xref_sql); @@ -919,12 +934,17 @@ sub dump_direct_xrefs { my $external_db_id = $source_to_external_db{$source_id}; if ($external_db_id) { - # Look up Ensembl internal ID from stable ID. No joins, so quick. - my $core_sql = "SELECT ${type}_id FROM ${type}_stable_id WHERE stable_id=\'$ensembl_stable_id\'" ; - my $core_sth = $self->dbi()->prepare($core_sql); - $core_sth->execute(); - my @row = $core_sth->fetchrow_array(); - my $ensembl_internal_id = $row[0]; + # In the case of CCDS xrefs, direct_xref is to transcript but we want + # the mapping in the core db to be to the *translation* + if ($source_id == get_source_id_from_source_name($self->xref(), "CCDS")) { + $type = 'translation'; + my $tmp_esid = $ensembl_stable_id; + $ensembl_stable_id = $transcript_stable_id_to_translation_stable_id{$tmp_esid}; + warn "Can't find translation for transcript $tmp_esid" if (!$ensembl_stable_id); + #print "CCDS: transcript $tmp_esid -> translation $ensembl_stable_id\n"; + } + + my $ensembl_internal_id = $stable_id_to_internal_id->{$type}->{$ensembl_stable_id}; if ($ensembl_internal_id) { if (!$xrefs_written{$xref_id}) { @@ -976,6 +996,33 @@ sub dump_interpro { } +sub build_stable_id_to_internal_id_hash { + + my $self = shift; + + my %stable_id_to_internal_id; + + foreach my $type ('gene', 'transcript', 'translation') { # Add exon here if required + + print "Caching stable ID -> internal ID links for ${type}s\n"; + + my $core_sql = "SELECT ${type}_id, stable_id FROM ${type}_stable_id" ; + my $sth = $self->dbi()->prepare($core_sql); + $sth->execute(); + my ($internal_id, $stable_id); + $sth->bind_columns(\$internal_id, \$stable_id); + + while ($sth->fetch) { + + $stable_id_to_internal_id{$type}{$stable_id} = $internal_id; + + } + + } + + return \%stable_id_to_internal_id; + +} sub get_ensembl_object_type { -- GitLab