Skip to content
Snippets Groups Projects
Commit 7ef13ad5 authored by Ian Longden's avatar Ian Longden
Browse files

version now added to xref so that accession has no version info in it

parent 2505f2c2
No related branches found
No related tags found
No related merge requests found
......@@ -282,27 +282,34 @@ sub upload_xrefs {
# upload new ones
print "Uploading xrefs\n";
my $xref_sth = $dbi->prepare("INSERT INTO xref (accession,label,description,source_id,species_id) VALUES(?,?,?,?,?)");
my $xref_sth = $dbi->prepare("INSERT INTO xref (accession,version,label,description,source_id,species_id) VALUES(?,?,?,?,?,?)");
my $pri_insert_sth = $dbi->prepare("INSERT INTO primary_xref VALUES(?,?,?,?,?)");
my $pri_update_sth = $dbi->prepare("UPDATE primary_xref SET sequence=? WHERE xref_id=?");
my $syn_sth = $dbi->prepare("INSERT INTO synonym VALUES(?,?,?)");
my $dep_sth = $dbi->prepare("INSERT INTO dependent_xref VALUES(?,?,?,?)");
my $xref_update_label_sth = $dbi->prepare("UPDATE xref SET label=? WHERE xref_id=?");
my $xref_update_descr_sth = $dbi->prepare("UPDATE xref SET description=? WHERE xref_id=?");
local $xref_sth->{RaiseError}; # disable error handling here as we'll do it ourselves
local $xref_sth->{PrintError};
foreach my $xref (@xrefs) {
my $xref_id;
# Create entry in xref table and note ID
$xref_sth->execute($xref->{ACCESSION},
if(! $xref_sth->execute($xref->{ACCESSION},
$xref->{VERSION},
$xref->{LABEL},
$xref->{DESCRIPTION},
$xref->{SOURCE_ID},
$xref->{SPECIES_ID});
$xref->{SPECIES_ID})){
$xref_id = insert_or_select($xref_sth, $dbi->err, $xref->{ACCESSION}, $xref->{SOURCE_ID});
$xref_update_label_sth->execute($xref->{LABEL},$xref_id) if (defined($xref->{LABEL}));
$xref_update_descr_sth->execute($xref->{DESCRIPTION},$xref_id,) if (defined($xref->{DESCRIPTION}));
}
else{
$xref_id = insert_or_select($xref_sth, $dbi->err, $xref->{ACCESSION}, $xref->{SOURCE_ID});
}
# If there was an error, an xref with the same acc & source already exists.
# If so, find its ID, otherwise get ID of xref just inserted
my $xref_id = insert_or_select($xref_sth, $dbi->err, $xref->{ACCESSION}, $xref->{SOURCE_ID});
# create entry in primary_xref table with sequence; if this is a "cumulative"
# entry it may already exist, and require an UPDATE rather than an INSERT
......@@ -326,6 +333,7 @@ sub upload_xrefs {
foreach my $syn (@{$xref->{SYNONYMS}}) {
$xref_sth->execute($syn,
"",
"",
"",
$xref->{SOURCE_ID},
......@@ -343,6 +351,7 @@ sub upload_xrefs {
my %dep = %$depref;
$xref_sth->execute($dep{ACCESSION},
$dep{VERSION},
$dep{LABEL},
"",
$dep{SOURCE_ID},
......@@ -701,16 +710,16 @@ sub get_xref{
}
sub add_to_xrefs{
my ($self,$master_xref,$acc,$label,$linkage,$source_id,$species_id) = @_;
my ($self,$master_xref,$acc,$version,$label,$linkage,$source_id,$species_id) = @_;
if(!defined($add_xref_sth)){
$add_xref_sth = dbi->prepare("INSERT INTO xref (accession,label,description,source_id,species_id) VALUES(?,?,?,?,?)");
$add_xref_sth = dbi->prepare("INSERT INTO xref (accession,version,label,description,source_id,species_id) VALUES(?,?,?,?,?,?)");
$add_dependent_xref_sth = dbi->prepare("INSERT INTO dependent_xref VALUES(?,?,?,?)");
}
my $dependent_id = get_xref($acc, $source_id);
if(!defined($dependent_id)){
$add_xref_sth->execute($acc,$label,"",$source_id,$species_id) || die "$acc\t$label\t\t$source_id\t$species_id\n";
$add_xref_sth->execute($acc,$version,$label,"",$source_id,$species_id) || die "$acc\t$label\t\t$source_id\t$species_id\n";
}
$dependent_id = get_xref($acc, $source_id);
$add_dependent_xref_sth->execute($master_xref, $dependent_id, $linkage, $source_id)|| die "$master_xref\t$dependent_id\t$linkage\t$source_id";
......
......@@ -60,13 +60,13 @@ sub run {
}
elsif($array[0] =~ /RefSeq/){
if($refseq{$array[1]}){
XrefParser::BaseParser->add_to_xrefs($refseq{$array[1]},$array[4],$array[4],$array[6],$source_id,$species_id);
XrefParser::BaseParser->add_to_xrefs($refseq{$array[1]},$array[4],'',$array[4],$array[6],$source_id,$species_id);
# print "$array[1]\tSPTR\t$array[4]\tGO\t$array[6]\t$array[9]\tXREF\n";
}
}
elsif($array[0] =~ /UniProt/){
if($swiss{$array[1]}){
XrefParser::BaseParser->add_to_xrefs($swiss{$array[1]},$array[4],$array[4],$array[6],$source_id,$species_id);
XrefParser::BaseParser->add_to_xrefs($swiss{$array[1]},$array[4],'',$array[4],$array[6],$source_id,$species_id);
}
}
else{
......
......@@ -88,7 +88,7 @@ sub run {
$mismatch++;
}
else{
XrefParser::BaseParser->add_to_xrefs($master,$hgnc,$hugo{hgnc},"",$source_id,$species_id,$count);
XrefParser::BaseParser->add_to_xrefs($master,$hgnc,'',$hugo{hgnc},"",$source_id,$species_id,$count);
$count++;
}
# print "$array[1]\tSPTR\t$hgnc\tHUGO\t$hugo_id{$hgnc}\t$hugo_syn{$hgnc}\tXREF\n";
......@@ -101,7 +101,7 @@ sub run {
$mismatch++;
}
else{
XrefParser::BaseParser->add_to_xrefs($master,$hgnc,$hugo{hgnc},"",$source_id,$species_id);
XrefParser::BaseParser->add_to_xrefs($master,$hgnc,'',$hugo{hgnc},"",$source_id,$species_id);
$count++;
}
}
......
......@@ -59,7 +59,7 @@ sub run {
my @sp = split(/\s/,$sps);
foreach my $value (@sp){
if(defined($value) and $value and defined($swiss{$value})){
XrefParser::BaseParser->add_to_xrefs($swiss{$value},$key,$label,"",$source_id,$species_id);
XrefParser::BaseParser->add_to_xrefs($swiss{$value},$key,'',$label,"",$source_id,$species_id);
$count++;
}
elsif(defined($value) and $value and defined($refseq{$value})){
......
......@@ -64,7 +64,7 @@ sub run {
$xref=$refseq{$array[0]} if defined($refseq{$array[0]});
$xref=$genbank{$array[0]} if defined($genbank{$array[0]});
if(defined($xref)){
XrefParser::BaseParser->add_to_xrefs($xref,"RGD:".$array[2],$array[1],"",$source_id,$species_id);
XrefParser::BaseParser->add_to_xrefs($xref,"RGD:".$array[2],"",$array[1],"",$source_id,$species_id);
$count++;
}
else{
......
......@@ -63,6 +63,7 @@ sub create_xrefs {
local $/ = "\/\/\n";
my $ian_count=0;
while (<REFSEQ>) {
my $xref;
......@@ -76,11 +77,12 @@ sub create_xrefs {
$species =~ s/\s+/_/g;
$species =~ s/\n//g;
my $species_id = $name2species_id{$species};
# skip xrefs for species that aren't in the species table
if (defined $species_id) {
my ($acc) = $entry =~ /ACCESSION\s+(\S+)/;
my ($ver) = $entry =~ /VERSION\s+(\S+)/;
my ($description) = $entry =~ /DEFINITION\s+([^[]*)/s;
print $entry if (length($description) == 0);
$description =~ s/\n//g;
......@@ -97,7 +99,15 @@ sub create_xrefs {
$parsed_seq =~ s/\/\///g; # remove trailing end-of-record character
$parsed_seq =~ s/\s//g; # remove whitespace
my ($acc_no_ver,$ver) = split (/\./,$ver);
$xref->{ACCESSION} = $acc;
if($acc eq $acc_no_ver){
$xref->{VERSION} = $ver;
}
else{
print "$acc NE $acc_no_ver\n";
}
$xref->{LABEL} = $acc;
$xref->{DESCRIPTION} = $description;
$xref->{SOURCE_ID} = $source_id;
......@@ -143,7 +153,10 @@ sub create_xrefs {
my %mrna_dep;
$mrna_dep{SOURCE_ID} = $source_id; # source is still RefSeq
$mrna_dep{ACCESSION} = $mrna;
my ($mrna_acc,$mrna_ver) = split (/\./,$mrna);
$mrna_dep{ACCESSION} = $mrna_acc;
$mrna_dep{VERSION} = $mrna_ver;
push @{$xref->{DEPENDENT_XREFS}}, \%mrna_dep;
push @xrefs, $xref;
......
......@@ -97,7 +97,9 @@ sub create_xrefs {
# skip xrefs for species that aren't in the species table
if (defined $species_id) {
$xref->{ACCESSION} = $acc;
my ($acc_no_ver,$ver) = split (/\./,$acc);
$xref->{ACCESSION} = $acc_no_ver;
$xref->{VERSION} = $ver;
$xref->{LABEL} = $acc;
$xref->{DESCRIPTION} = $description;
$xref->{SOURCE_ID} = $source_id;
......
......@@ -42,6 +42,8 @@ INSERT INTO source VALUES (1070, 'GO', 1, 'Y',2);
INSERT INTO source VALUES (1080, 'MarkerSymbol', 1, 'Y',2);
INSERT INTO source VALUES (1090, 'HUGO', 1, 'Y',2);
INSERT INTO source VALUES (1200, 'RGD', 1, 'Y',2);
INSERT INTO source VALUES (1300, 'Interpro', 1, 'Y', 2);
################################################################################
......@@ -69,8 +71,8 @@ INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date
## HUGO
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (1090, 1,'http://www.gene.ucl.ac.uk/public-files/nomen/ens4.txt http://www.gene.ucl.ac.uk/public-files/nomen/ens1.txt', '', now(), now(), "HUGOParser");
## Interpro
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (1300, 1,' ftp://ftp.ebi.ac.uk/pub/databases/interpro/short_name.dat ftp://ftp.ebi.ac.uk/pub/databases/interpro/protein2interpro.dat.gz', '', now(), now(), "InterproParser");
###MOUSE
......
......@@ -9,6 +9,7 @@ CREATE TABLE xref (
xref_id int unsigned not null auto_increment,
accession varchar(255) not null,
version int unsigned,
label varchar(255),
description varchar(255),
source_id int unsigned not null,
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment