diff --git a/misc-scripts/xref_mapping/XrefParser/VGNCParser.pm b/misc-scripts/xref_mapping/XrefParser/VGNCParser.pm index 8fa54f522be553f4992684d7cebd3010f7ef64e8..cd3f395622db6521921b7b24d3c1062691eedabc 100644 --- a/misc-scripts/xref_mapping/XrefParser/VGNCParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/VGNCParser.pm @@ -23,7 +23,7 @@ use strict; use warnings; use File::Basename; use Carp; -use base qw( XrefParser::BaseParser ); +use base qw( XrefParser::HGNCParser); sub run { @@ -50,6 +50,10 @@ sub run { } my $source_name = $self->get_source_name_for_source_id($source_id); + # Create a hash of all valid taxon_ids for this species + my %species2tax = $self->species_id2taxonomy(); + my @tax_ids = @{$species2tax{$species_id}}; + my %taxonomy2species_id = map{ $_=>$species_id } @tax_ids; # Skip header $file_io->getline(); @@ -58,15 +62,19 @@ sub run { chomp; my @array = split /\t/x, $_; - my $acc = $array[0]; - my $symbol = $array[1]; - my $name = $array[2]; + my $taxon_id = $array[0]; + my $acc = $array[1]; + my $symbol = $array[2]; + my $name = $array[3]; + my $id = $array[20]; + my $previous_symbols = $array[9]; + my $synonyms = $array[11]; + $previous_symbols =~ s/"//g; + $synonyms =~ s/"//g; + + unless (exists ($taxonomy2species_id{$taxon_id})) { next; } - # - # Direct Ensembl mappings - # - my $id = $array[9]; if ($id){ # Ensembl direct xref $self->add_to_direct_xrefs({ stable_id => $id, type => 'gene', @@ -76,6 +84,12 @@ sub run { source_id => $source_id, species_id => $species_id} ); + $self->add_synonyms_for_hgnc( {source_id => $source_id, + name => $acc, + species_id => $species_id, + dead => $previous_symbols, + alias => $synonyms}); + $count++; } } diff --git a/misc-scripts/xref_mapping/xref_config.ini b/misc-scripts/xref_mapping/xref_config.ini index c4add4db7a4c16d5aade96695f602659afe9499d..6f922d537e1f42914616319ec37e5b056a17dd10 100644 --- a/misc-scripts/xref_mapping/xref_config.ini +++ b/misc-scripts/xref_mapping/xref_config.ini @@ -1045,7 +1045,7 @@ priority = 1 prio_descr = parser = VGNCParser release_uri = -data_uri = ftp://ftp.ebi.ac.uk/pub/databases/genenames/vgnc/ensembl/VGNC_to_Ensembl_mapping.txt +data_uri = ftp://ftp.ebi.ac.uk/pub/databases/genenames/vgnc/tsv/vgnc_gene_set_All.txt.gz [source VGNC::bos_taurus] # Used by bos_taurus @@ -1056,7 +1056,7 @@ priority = 1 prio_descr = parser = VGNCParser release_uri = -data_uri = ftp://ftp.ebi.ac.uk/pub/databases/genenames/vgnc/ensembl/VGNC_to_Ensembl_mapping.txt +data_uri = ftp://ftp.ebi.ac.uk/pub/databases/genenames/vgnc/tsv/vgnc_gene_set_All.txt.gz [source VGNC::canis_familiaris] # Used by canis_familiaris @@ -1067,7 +1067,7 @@ priority = 1 prio_descr = parser = VGNCParser release_uri = -data_uri = ftp://ftp.ebi.ac.uk/pub/databases/genenames/vgnc/ensembl/VGNC_to_Ensembl_mapping.txt +data_uri = ftp://ftp.ebi.ac.uk/pub/databases/genenames/vgnc/tsv/vgnc_gene_set_All.txt.gz [source VGNC::equus_caballus] # Used by equus_caballus @@ -1078,7 +1078,7 @@ priority = 1 prio_descr = parser = VGNCParser release_uri = -data_uri = ftp://ftp.ebi.ac.uk/pub/databases/genenames/vgnc/ensembl/VGNC_to_Ensembl_mapping.txt +data_uri = ftp://ftp.ebi.ac.uk/pub/databases/genenames/vgnc/tsv/vgnc_gene_set_All.txt.gz [source VGNC::MULTI#01] name = VGNC