From b0b66b4dab0941e40fe6141106bf5fbab70c7092 Mon Sep 17 00:00:00 2001 From: Ian Longden <ianl@sanger.ac.uk> Date: Wed, 8 Dec 2004 11:18:04 +0000 Subject: [PATCH] source_id removed from synonym,dependent_xref and primary_xref this information is available via the xref. linkage_source_id added to dependent xref. create_xrefs now also returns a hashref --- .../xref_mapping/XrefParser/BaseParser.pm | 46 +++++++++++-------- .../XrefParser/RefSeqGPFFParser.pm | 7 ++- .../xref_mapping/XrefParser/RefSeqParser.pm | 2 +- .../xref_mapping/XrefParser/UniProtParser.pm | 6 ++- misc-scripts/xref_mapping/sql/table.sql | 4 +- 5 files changed, 39 insertions(+), 26 deletions(-) diff --git a/misc-scripts/xref_mapping/XrefParser/BaseParser.pm b/misc-scripts/xref_mapping/XrefParser/BaseParser.pm index 9b9fc46bc8..145c320ff6 100644 --- a/misc-scripts/xref_mapping/XrefParser/BaseParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/BaseParser.pm @@ -278,21 +278,24 @@ sub get_valid_codes{ sub upload_xref_object_graphs { - my ($self, @xrefs) = @_; + my ($self, $rxrefs) = @_; + my $dbi = dbi(); - if ($#xrefs > -1) { +# print "count = ".$#$rxrefs."\n"; + + if ($#$rxrefs > -1) { # remove all existing xrefs with same source ID(s) - delete_by_source(\@xrefs); + delete_by_source($rxrefs); # upload new ones print "Uploading xrefs\n"; my $xref_sth = $dbi->prepare("INSERT INTO xref (accession,version,label,description,source_id,species_id) VALUES(?,?,?,?,?,?)"); - my $pri_insert_sth = $dbi->prepare("INSERT INTO primary_xref VALUES(?,?,?,?,?)"); + my $pri_insert_sth = $dbi->prepare("INSERT INTO primary_xref VALUES(?,?,?,?)"); my $pri_update_sth = $dbi->prepare("UPDATE primary_xref SET sequence=? WHERE xref_id=?"); - my $syn_sth = $dbi->prepare("INSERT INTO synonym VALUES(?,?,?)"); + my $syn_sth = $dbi->prepare("INSERT INTO synonym VALUES(?,?)"); my $dep_sth = $dbi->prepare("INSERT INTO dependent_xref VALUES(?,?,?,?)"); my $xref_update_label_sth = $dbi->prepare("UPDATE xref SET label=? WHERE xref_id=?"); my $xref_update_descr_sth = $dbi->prepare("UPDATE xref SET description=? WHERE xref_id=?"); @@ -300,7 +303,7 @@ sub upload_xref_object_graphs { local $xref_sth->{RaiseError}; # disable error handling here as we'll do it ourselves local $xref_sth->{PrintError}; - foreach my $xref (@xrefs) { + foreach my $xref (@{$rxrefs}) { my $xref_id; # Create entry in xref table and note ID if(! $xref_sth->execute($xref->{ACCESSION}, @@ -332,23 +335,26 @@ sub upload_xref_object_graphs { $pri_insert_sth->execute($xref_id, $xref->{SEQUENCE}, $xref->{SEQUENCE_TYPE}, - $xref->{STATUS}, - $xref->{SOURCE_ID}) || die $dbi->errstr; + $xref->{STATUS}) || die $dbi->errstr; } # if there are synonyms, create xrefs for them and entries in the synonym table foreach my $syn (@{$xref->{SYNONYMS}}) { - $xref_sth->execute($syn, - "", - "", - "", - $xref->{SOURCE_ID}, - $xref->{SPECIES_ID}); - - my $syn_xref_id = insert_or_select($xref_sth, $dbi->err, $syn, $xref->{SOURCE_ID}); - - $syn_sth->execute($xref_id, $syn_xref_id, $xref->{SOURCE_ID} ) || die $dbi->errstr; + my $syn_xref_id = get_xref($syn,$xref->{SOURCE_ID}); + if(!defined($syn_xref_id)){ + $xref_sth->execute($syn, + "", + "", + "", + $xref->{SOURCE_ID}, + $xref->{SPECIES_ID}); + $syn_xref_id = get_xref($syn,$xref->{SOURCE_ID}); + } + if(!defined($syn_xref_id)){ + print STDERR $xref->{ACCESSION}."\n$syn\n"; + } + $syn_sth->execute($xref_id, $syn_xref_id ) || die "$dbi->errstr \n $xref_id\n $syn_xref_id\n"; } # foreach syn @@ -370,9 +376,9 @@ sub upload_xref_object_graphs { print STDERR "dbi\t$dbi->err \n$dep{ACCESSION} \n $dep{SOURCE_ID} \n"; } if(!defined($dep_xref_id)){ - print STDERR "$dep{ACCESSION} \n $dep{SOURCE_ID} \n".$dbi->err."\n"; + print STDERR "acc = $dep{ACCESSION} \nlink = $dep{LINKAGE_SOURCE_ID} \n".$dbi->err."\n"; } - $dep_sth->execute($xref_id, $dep_xref_id, $dep{LINKAGE_ANNOTATION}, $dep{SOURCE_ID} ) || die $dbi->errstr; + $dep_sth->execute($xref_id, $dep_xref_id, $dep{LINKAGE_ANNOTATION}, $dep{LINKAGE_SOURCE_ID} ) || die $dbi->errstr; # TODO linkage anntation? } # foreach dep diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm index bfc69b0881..8a677b9541 100644 --- a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm @@ -141,24 +141,28 @@ sub create_xrefs { foreach my $ll (@LocusIDline) { my %dep; $dep{SOURCE_ID} = $dependent_sources{LocusLink}; + $dep{LINKAGE_SOURCE_ID} = $source_id; $dep{ACCESSION} = $ll; push @{$xref->{DEPENDENT_XREFS}}, \%dep; } foreach my $mim (@mimline) { my %dep; $dep{SOURCE_ID} = $dependent_sources{MIM}; + $dep{LINKAGE_SOURCE_ID} = $source_id; $dep{ACCESSION} = $mim; push @{$xref->{DEPENDENT_XREFS}}, \%dep; } foreach my $med (@medline) { my %dep; $dep{SOURCE_ID} = $dependent_sources{MEDLINE}; + $dep{LINKAGE_SOURCE_ID} = $source_id; $dep{ACCESSION} = $med; push @{$xref->{DEPENDENT_XREFS}}, \%dep; } foreach my $pub (@pubmed) { my %dep; $dep{SOURCE_ID} = $dependent_sources{PUBMED}; + $dep{LINKAGE_SOURCE_ID} = $source_id; $dep{ACCESSION} = $pub; push @{$xref->{DEPENDENT_XREFS}}, \%dep; } @@ -169,6 +173,7 @@ sub create_xrefs { if($mrna){ my %mrna_dep; $mrna_dep{SOURCE_ID} = $source_id; # source is still RefSeq + $mrna_dep{LINKAGE_SOURCE_ID} = $source_id; my ($mrna_acc,$mrna_ver) = split (/\./,$mrna); $mrna_dep{ACCESSION} = $mrna_acc; @@ -185,7 +190,7 @@ sub create_xrefs { print "Read " . scalar(@xrefs) ." xrefs from $file\n"; - return @xrefs; + return \@xrefs; } diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm index 7ea1d3f5a9..83f3eb3f72 100644 --- a/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm @@ -121,7 +121,7 @@ sub create_xrefs { print "Read " . scalar(@xrefs) ." xrefs from $file\n"; - return @xrefs; + return \@xrefs; } diff --git a/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm b/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm index 78449c9f27..90e90d2968 100644 --- a/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm @@ -195,6 +195,7 @@ sub create_xrefs { # create dependent xref structure & store it my %dep; $dep{SOURCE_NAME} = $source; + $dep{LINKAGE_SOURCE_ID} = $xref->{SOURCE_ID}; $dep{SOURCE_ID} = $dependent_sources{$source}; $dep{ACCESSION} = $acc; push @{$xref->{DEPENDENT_XREFS}}, \%dep; # array of hashrefs @@ -205,6 +206,7 @@ sub create_xrefs { my %dep2; $dep2{SOURCE_NAME} = $source; $dep2{SOURCE_ID} = $dependent_sources{protein_id}; + $dep2{LINKAGE_SOURCE_ID} = $xref->{SOURCE_ID}; $dep2{ACCESSION} = $protein_id; push @{$xref->{DEPENDENT_XREFS}}, \%dep2; # array of hashrefs } @@ -219,6 +221,7 @@ sub create_xrefs { my %medline_dep; $medline_dep{SOURCE_ID} = $dependent_sources{PUBMED}; + $medline_dep{LINKAGE_SOURCE_ID} = $xref->{SOURCE_ID}; $medline_dep{ACCESSION} = $medline; push @{$xref->{DEPENDENT_XREFS}}, \%medline_dep; @@ -229,6 +232,7 @@ sub create_xrefs { my %pubmed_dep; $pubmed_dep{SOURCE_ID} = $dependent_sources{PUBMED}; + $pubmed_dep{LINKAGE_SOURCE_ID} = $xref->{SOURCE_ID}; $pubmed_dep{ACCESSION} = $pubmed; push @{$xref->{DEPENDENT_XREFS}}, \%pubmed_dep; @@ -242,7 +246,7 @@ sub create_xrefs { print "Read $num_sp SwissProt xrefs and $num_sptr SPTrEMBL xrefs from $file\n"; - return @xrefs; + return \@xrefs; #TODO - currently include records from other species - filter on OX line?? } diff --git a/misc-scripts/xref_mapping/sql/table.sql b/misc-scripts/xref_mapping/sql/table.sql index df13b89a24..105e7d6a01 100755 --- a/misc-scripts/xref_mapping/sql/table.sql +++ b/misc-scripts/xref_mapping/sql/table.sql @@ -28,7 +28,6 @@ CREATE TABLE primary_xref ( sequence mediumtext, sequence_type enum('dna','peptide'), status enum('experimental','predicted'), - source_id int unsigned not null, PRIMARY KEY (xref_id) @@ -41,7 +40,7 @@ CREATE TABLE dependent_xref ( master_xref_id int unsigned not null, dependent_xref_id int unsigned not null, linkage_annotation varchar(255), - source_id int unsigned not null, + linkage_source_id int unsigned not null, KEY master_idx(master_xref_id), KEY dependent_idx(dependent_xref_id) @@ -54,7 +53,6 @@ CREATE TABLE synonym ( xref_id int unsigned not null, synonym_xref_id int unsigned not null, - source_id int unsigned not null, KEY xref_idx(xref_id) -- GitLab