diff --git a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm index e192056cb863c44fd4716e88fa8fbe1d6a668f3c..8dee16dae7f62b08a9f471adf9c05bb7e3567dda 100644 --- a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm +++ b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm @@ -926,6 +926,10 @@ sub dump_core_xrefs { # execute several queries with a max of 200 entries in each IN clause - more efficient my $batch_size = 200; + # keep track of what xref_ids have been written to prevent duplicates; e.g. several dependent + # xrefs my be dependent on the same master xref. + my %xrefs_written; + while(@xref_ids) { my @ids; @@ -950,20 +954,17 @@ sub dump_core_xrefs { my ($xref_id, $accession, $version, $label, $description, $source_id, $species_id, $master_xref_id); $xref_sth->bind_columns(\$xref_id, \$accession, \$version, \$label, \$description, \$source_id, \$species_id); - # keep track of what xref_ids have been written to prevent duplicates; e.g. several dependent - # xrefs my be dependent on the same master xref. - my %xrefs_written; - # note the xref_id we write to the file is NOT the one we've just read # from the internal xref database as the ID may already exist in the # core database so we add on $xref_id_offset while ($xref_sth->fetch()) { - my $external_db_id = $source_to_external_db{$source_id}; - print XREF ($xref_id+$xref_id_offset) . "\t" . $external_db_id . "\t" . $accession . "\t" . $label . "\t" . $version . "\t" . $description . "\n"; - $xrefs_written{$xref_id} = 1; - $source_ids{$source_id} = $source_id; - + if (!$xrefs_written{$xref_id}) { + my $external_db_id = $source_to_external_db{$source_id}; + print XREF ($xref_id+$xref_id_offset) . "\t" . $external_db_id . "\t" . $accession . "\t" . $label . "\t" . $version . "\t" . $description . "\n"; + $xrefs_written{$xref_id} = 1; + $source_ids{$source_id} = $source_id; + } } # Now get the dependent xrefs for each of these xrefs and write them as well