Commit 4695066f authored by Glenn Proctor's avatar Glenn Proctor
Browse files

Now store external synonyms as additional fields rather than cross references.

parent 0fd869b8
...@@ -56,7 +56,7 @@ my @indexes = split ',', $ind; ...@@ -56,7 +56,7 @@ my @indexes = split ',', $ind;
@indexes = map { /dump(\w+)/ ? $1 : () } keys %ebi_search_dump:: @indexes = map { /dump(\w+)/ ? $1 : () } keys %ebi_search_dump::
if $ind eq 'ALL'; if $ind eq 'ALL';
warn Dumper \@indexes; #warn Dumper \@indexes;
my $dbHash = get_databases(); my $dbHash = get_databases();
#warn Dumper $dbHash; #warn Dumper $dbHash;
...@@ -353,7 +353,7 @@ sub familyLineXML { ...@@ -353,7 +353,7 @@ sub familyLineXML {
} }
sub dumpGene { sub dumpGene {
warn "in dumpGene";
my ( $dbspecies, $conf ) = @_; my ( $dbspecies, $conf ) = @_;
foreach my $DB ( 'core', 'otherfeatures', 'vega' ) { foreach my $DB ( 'core', 'otherfeatures', 'vega' ) {
...@@ -404,8 +404,9 @@ warn "in dumpGene"; ...@@ -404,8 +404,9 @@ warn "in dumpGene";
$xrefs{$type}{ $_->[0] }{ $_->[3] }{ $_->[1] } = 1 if $_->[1]; $xrefs{$type}{ $_->[0] }{ $_->[3] }{ $_->[1] } = 1 if $_->[1];
$xrefs{$type}{ $_->[0] }{ $_->[3] }{ $_->[2] } = 1 if $_->[2]; $xrefs{$type}{ $_->[0] }{ $_->[3] }{ $_->[2] } = 1 if $_->[2];
$xrefs{$type}{ $_->[0] }{ $_->[3] . "_synonym" }{ $_->[4] } = 1 if $_->[4]; $xrefs{$type}{ $_->[0] }{ $_->[3] . "_synonym" }{ $_->[4] } = 1 if $_->[4];
$xrefs_desc{$type}{ $_->[0] }{ $_->[5] } = 1 if $_->[5]; $xrefs_desc{$type}{ $_->[0] }{ $_->[5] } = 1 if $_->[5];
} }
warn "XREF $type query..."; warn "XREF $type query...";
...@@ -542,7 +543,7 @@ warn "in dumpGene"; ...@@ -542,7 +543,7 @@ warn "in dumpGene";
$old{'external_identifiers'}{$db}{$K} = 1; $old{'external_identifiers'}{$db}{$K} = 1;
} }
} }
} }
else { else {
$old{'transcript_stable_ids'}{$transcript_stable_id} = 1; $old{'transcript_stable_ids'}{$transcript_stable_id} = 1;
...@@ -636,30 +637,52 @@ sub geneLineXML { ...@@ -636,30 +637,52 @@ sub geneLineXML {
<name>$gene_id $altid</name> <name>$gene_id $altid</name>
<description>$description</description>}; <description>$description</description>};
my $synonyms = "";
my $cross_references = qq{ my $cross_references = qq{
<cross_references>}; <cross_references>};
# for some types of xref, merge the subtypes into the larger type # for some types of xref, merge the subtypes into the larger type
# e.g. Uniprot/SWISSPROT and Uniprot/TREMBL become just Uniprot # e.g. Uniprot/SWISSPROT and Uniprot/TREMBL become just Uniprot
# synonyms are stored as additional fields rather than cross references
foreach my $ext_db_name ( keys %$external_identifiers ) { foreach my $ext_db_name ( keys %$external_identifiers ) {
if ($ext_db_name =~ /(Uniprot|GO|Interpro|Medline|Sequence_Publications|EMBL)/) { if ($ext_db_name =~ /(Uniprot|GO|Interpro|Medline|Sequence_Publications|EMBL)/) {
my $matched_db_name = $1; my $matched_db_name = $1;
# synonyms
if ($ext_db_name =~ /_synonym/) { if ($ext_db_name =~ /_synonym/) {
$matched_db_name .= "_synonym";
} map { $synonyms .= qq{
map { $cross_references .= qq{ <field name="${matched_db_name}_synonym">$_</field>}; } keys %{ $external_identifiers->{$ext_db_name} }
<ref dbname="$matched_db_name" dbkey="$_"/>}; } keys %{ $external_identifiers->{$ext_db_name} }
} else { # non-synonyms
map { $cross_references .= qq{
<ref dbname="$matched_db_name" dbkey="$_"/>}; } keys %{ $external_identifiers->{$ext_db_name} }
}
} else { } else {
foreach my $key (keys %{ $external_identifiers->{$ext_db_name} }) { foreach my $key (keys %{ $external_identifiers->{$ext_db_name} }) {
$key =~ s/</&lt;/g; $key =~ s/</&lt;/g;
$key =~ s/>/&gt;/g; $key =~ s/>/&gt;/g;
$key =~ s/&/&amp;/g; $key =~ s/&/&amp;/g;
$ext_db_name =~s/^Ens.*/ENSEMBL/; $ext_db_name =~s/^Ens.*/ENSEMBL/;
$cross_references .= qq{
<ref dbname="$ext_db_name" dbkey="$key"/>}; if ($ext_db_name =~ /_synonym/) {
$synonyms .= qq{
<field name="$ext_db_name">$key"</field>};
} else {
$cross_references .= qq{
<ref dbname="$ext_db_name" dbkey="$key"/>};
}
} }
} }
...@@ -686,41 +709,42 @@ sub geneLineXML { ...@@ -686,41 +709,42 @@ sub geneLineXML {
<field name="transcript_count">$transcript_count</field> } <field name="transcript_count">$transcript_count</field> }
. ( . (
join "", join "",
( (
map { map {
qq{ qq{
<field name="transcript">$_</field>} <field name="transcript">$_</field>}
} keys %$transcripts } keys %$transcripts
) )
) )
. qq{ <field name="exon_count">$exon_count</field> } . qq{ <field name="exon_count">$exon_count</field> }
. ( . (
join "", join "",
( (
map { map {
qq{ qq{
<field name="exon">$_</field>} <field name="exon">$_</field>}
} keys %$exons } keys %$exons
) )
) )
. ( . (
join "", join "",
( (
map { map {
qq{ qq{
<field name="peptide">$_</field>} <field name="peptide">$_</field>}
} keys %$peptides } keys %$peptides
) )
) )
. $synonyms
. qq{ . qq{
</additional_fields> </additional_fields>
}; };
$counter->(); $counter->();
return $xml . $cross_references . $additional_fields . '</entry>'; return $xml . $cross_references . $additional_fields . '</entry>';
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment