diff --git a/misc-scripts/xref_mapping/XrefParser/BaseParser.pm b/misc-scripts/xref_mapping/XrefParser/BaseParser.pm index 9b9fc46bc8a768697e6bd22904972e6e6d7c9151..145c320ff653290a883ce654efb43df61bf2a0d2 100644 --- a/misc-scripts/xref_mapping/XrefParser/BaseParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/BaseParser.pm @@ -278,21 +278,24 @@ sub get_valid_codes{ sub upload_xref_object_graphs { - my ($self, @xrefs) = @_; + my ($self, $rxrefs) = @_; + my $dbi = dbi(); - if ($#xrefs > -1) { +# print "count = ".$#$rxrefs."\n"; + + if ($#$rxrefs > -1) { # remove all existing xrefs with same source ID(s) - delete_by_source(\@xrefs); + delete_by_source($rxrefs); # upload new ones print "Uploading xrefs\n"; my $xref_sth = $dbi->prepare("INSERT INTO xref (accession,version,label,description,source_id,species_id) VALUES(?,?,?,?,?,?)"); - my $pri_insert_sth = $dbi->prepare("INSERT INTO primary_xref VALUES(?,?,?,?,?)"); + my $pri_insert_sth = $dbi->prepare("INSERT INTO primary_xref VALUES(?,?,?,?)"); my $pri_update_sth = $dbi->prepare("UPDATE primary_xref SET sequence=? WHERE xref_id=?"); - my $syn_sth = $dbi->prepare("INSERT INTO synonym VALUES(?,?,?)"); + my $syn_sth = $dbi->prepare("INSERT INTO synonym VALUES(?,?)"); my $dep_sth = $dbi->prepare("INSERT INTO dependent_xref VALUES(?,?,?,?)"); my $xref_update_label_sth = $dbi->prepare("UPDATE xref SET label=? WHERE xref_id=?"); my $xref_update_descr_sth = $dbi->prepare("UPDATE xref SET description=? WHERE xref_id=?"); @@ -300,7 +303,7 @@ sub upload_xref_object_graphs { local $xref_sth->{RaiseError}; # disable error handling here as we'll do it ourselves local $xref_sth->{PrintError}; - foreach my $xref (@xrefs) { + foreach my $xref (@{$rxrefs}) { my $xref_id; # Create entry in xref table and note ID if(! $xref_sth->execute($xref->{ACCESSION}, @@ -332,23 +335,26 @@ sub upload_xref_object_graphs { $pri_insert_sth->execute($xref_id, $xref->{SEQUENCE}, $xref->{SEQUENCE_TYPE}, - $xref->{STATUS}, - $xref->{SOURCE_ID}) || die $dbi->errstr; + $xref->{STATUS}) || die $dbi->errstr; } # if there are synonyms, create xrefs for them and entries in the synonym table foreach my $syn (@{$xref->{SYNONYMS}}) { - $xref_sth->execute($syn, - "", - "", - "", - $xref->{SOURCE_ID}, - $xref->{SPECIES_ID}); - - my $syn_xref_id = insert_or_select($xref_sth, $dbi->err, $syn, $xref->{SOURCE_ID}); - - $syn_sth->execute($xref_id, $syn_xref_id, $xref->{SOURCE_ID} ) || die $dbi->errstr; + my $syn_xref_id = get_xref($syn,$xref->{SOURCE_ID}); + if(!defined($syn_xref_id)){ + $xref_sth->execute($syn, + "", + "", + "", + $xref->{SOURCE_ID}, + $xref->{SPECIES_ID}); + $syn_xref_id = get_xref($syn,$xref->{SOURCE_ID}); + } + if(!defined($syn_xref_id)){ + print STDERR $xref->{ACCESSION}."\n$syn\n"; + } + $syn_sth->execute($xref_id, $syn_xref_id ) || die "$dbi->errstr \n $xref_id\n $syn_xref_id\n"; } # foreach syn @@ -370,9 +376,9 @@ sub upload_xref_object_graphs { print STDERR "dbi\t$dbi->err \n$dep{ACCESSION} \n $dep{SOURCE_ID} \n"; } if(!defined($dep_xref_id)){ - print STDERR "$dep{ACCESSION} \n $dep{SOURCE_ID} \n".$dbi->err."\n"; + print STDERR "acc = $dep{ACCESSION} \nlink = $dep{LINKAGE_SOURCE_ID} \n".$dbi->err."\n"; } - $dep_sth->execute($xref_id, $dep_xref_id, $dep{LINKAGE_ANNOTATION}, $dep{SOURCE_ID} ) || die $dbi->errstr; + $dep_sth->execute($xref_id, $dep_xref_id, $dep{LINKAGE_ANNOTATION}, $dep{LINKAGE_SOURCE_ID} ) || die $dbi->errstr; # TODO linkage anntation? } # foreach dep diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm index bfc69b08812bedeb265e0745641c9fc5feed96cd..8a677b95416e38c804bd42bdf886efd69085ba30 100644 --- a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm @@ -141,24 +141,28 @@ sub create_xrefs { foreach my $ll (@LocusIDline) { my %dep; $dep{SOURCE_ID} = $dependent_sources{LocusLink}; + $dep{LINKAGE_SOURCE_ID} = $source_id; $dep{ACCESSION} = $ll; push @{$xref->{DEPENDENT_XREFS}}, \%dep; } foreach my $mim (@mimline) { my %dep; $dep{SOURCE_ID} = $dependent_sources{MIM}; + $dep{LINKAGE_SOURCE_ID} = $source_id; $dep{ACCESSION} = $mim; push @{$xref->{DEPENDENT_XREFS}}, \%dep; } foreach my $med (@medline) { my %dep; $dep{SOURCE_ID} = $dependent_sources{MEDLINE}; + $dep{LINKAGE_SOURCE_ID} = $source_id; $dep{ACCESSION} = $med; push @{$xref->{DEPENDENT_XREFS}}, \%dep; } foreach my $pub (@pubmed) { my %dep; $dep{SOURCE_ID} = $dependent_sources{PUBMED}; + $dep{LINKAGE_SOURCE_ID} = $source_id; $dep{ACCESSION} = $pub; push @{$xref->{DEPENDENT_XREFS}}, \%dep; } @@ -169,6 +173,7 @@ sub create_xrefs { if($mrna){ my %mrna_dep; $mrna_dep{SOURCE_ID} = $source_id; # source is still RefSeq + $mrna_dep{LINKAGE_SOURCE_ID} = $source_id; my ($mrna_acc,$mrna_ver) = split (/\./,$mrna); $mrna_dep{ACCESSION} = $mrna_acc; @@ -185,7 +190,7 @@ sub create_xrefs { print "Read " . scalar(@xrefs) ." xrefs from $file\n"; - return @xrefs; + return \@xrefs; } diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm index 7ea1d3f5a97a6a3a5acc66e325c62bd3f5e64673..83f3eb3f72a5e7496e84dbe59f606860748d22cd 100644 --- a/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm @@ -121,7 +121,7 @@ sub create_xrefs { print "Read " . scalar(@xrefs) ." xrefs from $file\n"; - return @xrefs; + return \@xrefs; } diff --git a/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm b/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm index 78449c9f271f01315a7567e2d4d601d0911ae324..90e90d296870d510f4c011ce9de9a03e147e5f11 100644 --- a/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm @@ -195,6 +195,7 @@ sub create_xrefs { # create dependent xref structure & store it my %dep; $dep{SOURCE_NAME} = $source; + $dep{LINKAGE_SOURCE_ID} = $xref->{SOURCE_ID}; $dep{SOURCE_ID} = $dependent_sources{$source}; $dep{ACCESSION} = $acc; push @{$xref->{DEPENDENT_XREFS}}, \%dep; # array of hashrefs @@ -205,6 +206,7 @@ sub create_xrefs { my %dep2; $dep2{SOURCE_NAME} = $source; $dep2{SOURCE_ID} = $dependent_sources{protein_id}; + $dep2{LINKAGE_SOURCE_ID} = $xref->{SOURCE_ID}; $dep2{ACCESSION} = $protein_id; push @{$xref->{DEPENDENT_XREFS}}, \%dep2; # array of hashrefs } @@ -219,6 +221,7 @@ sub create_xrefs { my %medline_dep; $medline_dep{SOURCE_ID} = $dependent_sources{PUBMED}; + $medline_dep{LINKAGE_SOURCE_ID} = $xref->{SOURCE_ID}; $medline_dep{ACCESSION} = $medline; push @{$xref->{DEPENDENT_XREFS}}, \%medline_dep; @@ -229,6 +232,7 @@ sub create_xrefs { my %pubmed_dep; $pubmed_dep{SOURCE_ID} = $dependent_sources{PUBMED}; + $pubmed_dep{LINKAGE_SOURCE_ID} = $xref->{SOURCE_ID}; $pubmed_dep{ACCESSION} = $pubmed; push @{$xref->{DEPENDENT_XREFS}}, \%pubmed_dep; @@ -242,7 +246,7 @@ sub create_xrefs { print "Read $num_sp SwissProt xrefs and $num_sptr SPTrEMBL xrefs from $file\n"; - return @xrefs; + return \@xrefs; #TODO - currently include records from other species - filter on OX line?? } diff --git a/misc-scripts/xref_mapping/sql/table.sql b/misc-scripts/xref_mapping/sql/table.sql index df13b89a24e8532744892254c2f62231a4be79d5..105e7d6a0113a5e16c57dce5e2cc78d2c802fe0f 100755 --- a/misc-scripts/xref_mapping/sql/table.sql +++ b/misc-scripts/xref_mapping/sql/table.sql @@ -28,7 +28,6 @@ CREATE TABLE primary_xref ( sequence mediumtext, sequence_type enum('dna','peptide'), status enum('experimental','predicted'), - source_id int unsigned not null, PRIMARY KEY (xref_id) @@ -41,7 +40,7 @@ CREATE TABLE dependent_xref ( master_xref_id int unsigned not null, dependent_xref_id int unsigned not null, linkage_annotation varchar(255), - source_id int unsigned not null, + linkage_source_id int unsigned not null, KEY master_idx(master_xref_id), KEY dependent_idx(dependent_xref_id) @@ -54,7 +53,6 @@ CREATE TABLE synonym ( xref_id int unsigned not null, synonym_xref_id int unsigned not null, - source_id int unsigned not null, KEY xref_idx(xref_id)