Commit f19f979b authored by Ian Longden's avatar Ian Longden
Browse files

keep uniprot and refseq matches seperately and then take the best at the end

parent 69c000ce
......@@ -76,23 +76,23 @@ sub run {
# Use the RefSeq if available as this is manually curated
# If no RefSeq, use the Swissprot instead
my $master;
if ($array[6]) { # RefSeq
$master = $refseq{$array[6]};
my $seen=0;
if ($array[6]) { # RefSeq
$refseq_count++;
} elsif ($array[5]) { # Uniprot
$master = $swiss{$array[5]};
XrefParser::BaseParser->add_to_xrefs($array[6], $array[0], '', $array[1], $array[2], "", $source_id, $species_id);
$seen = 1;
}
if ($array[5]) { # Uniprot
XrefParser::BaseParser->add_to_xrefs($array[5], $array[0], '', $array[1], $array[2], "", ($source_id+1), $species_id);
$swiss_count++;
$seen=1;
}
if (!$master) {
if (!$seen) {
$mismatch++;
next;
}
#print $array[5] ." " . $array[6] . " " . $master . " " . $swiss_count . " " . $refseq_count . " " . $mismatch . "\n";
XrefParser::BaseParser->add_to_xrefs($master, $array[0], '', $array[1], $array[2], "", $source_id, $species_id);
if (defined($array[3])) { # dead name, add to synonym
my @array2 = split(',\s*', $array[3]);
......
......@@ -73,7 +73,7 @@ sub run {
$line_count++;
if(!defined($seen{$hgnc})){
$seen{$hgnc} = 1;
my $key = "CCDS".$hgnc;
my $key = "CCDS".$ccds;
if(defined($ensembl_stable_id{$key})){
my $xref_id = $self->add_xref($hgnc, $version{$hgnc} , $label{$hgnc}||$hgnc ,
$description{$hgnc}, $source_id, $species_id);
......
......@@ -79,8 +79,9 @@ INSERT INTO source VALUES (1080, 'MarkerSymbol', 1, 'Y',30, 1);
# 1090 needs 1091 to be loaded first to obtain descriptions etc.
INSERT INTO source VALUES (1090, 'HUGO', 1, 'Y',30, 1);
INSERT INTO source VALUES (1091, 'HUGO', 1, 'Y',29, 3);
INSERT INTO source VALUES (1092, 'HUGO', 1, 'Y',51, 2);
INSERT INTO source VALUES (1091, 'HUGO', 1, 'Y',29, 3); # use refseq
INSERT INTO source VALUES (1092, 'HUGO', 1, 'N',29, 4); # use uniprot
INSERT INTO source VALUES (1093, 'HUGO', 1, 'Y',51, 2);
INSERT INTO source VALUES (1110, 'EntrezGene', 1, 'N', 30, 1);
......@@ -297,9 +298,9 @@ INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (1090, 9606,'LOCAL:HUGO/HUGO_TO_ENSG', '', now(), now(), "HUGO_ENSGParser");
## HUGO
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (1092, 9606,'LOCAL:HUGO/CCDS_TO_HUGO', '', now(), now(), "HUGO_CCDSParser");
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (1093, 9606,'LOCAL:HUGO/CCDS_TO_HUGO', '', now(), now(), "HUGO_CCDSParser");
# lower priority HUGO data
# lower priority HUGO data 1091-> refseq 1092->uniprot
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (1091, 9606,'http://www.gene.ucl.ac.uk/cgi-bin/nomenclature/gdlw.pl?title=Genew+output+data&col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=md_prot_id&col=gd_pub_refseq_ids&status=Approved&status=Approved+Non-Human&status_opt=3&=on&where=&order_by=gd_hgnc_id&limit=&format=text&submit=submit&.cgifields=&.cgifields=status&.cgifields=chr', '', now(), now(), "HUGOParser");
## Interpro
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment