diff --git a/misc-scripts/xref_mapping/XrefMapper/eukaryota.pm b/misc-scripts/xref_mapping/XrefMapper/eukaryota.pm index 57f2115fe06ffca6aaa859aba81c795d0c974391..5819eb68f969d2182976b268de01f7fee6a0982d 100644 --- a/misc-scripts/xref_mapping/XrefMapper/eukaryota.pm +++ b/misc-scripts/xref_mapping/XrefMapper/eukaryota.pm @@ -20,7 +20,6 @@ sub set_methods{ sub transcript_display_xref_sources { my $self = shift; - my $fullmode = shift; print STDERR "getting the list of external_dbs for assigning gene names from eukaryota.pm\n"; @@ -46,16 +45,32 @@ sub transcript_display_xref_sources { my %ignore; - # Both methods - - if(!$fullmode){ - $ignore{"EntrezGene"}= 'FROM:RefSeq_[pd][en][pa].*_predicted'; - } - else{ - $ignore{"EntrezGene"} = 'select ox.object_xref_id from object_xref ox, dependent_xref dx, source s1, xref x1, source s2, xref x2 where ox.object_xref_id = dx.object_xref_id and dx.dependent_xref_id = x1.xref_id and x1.source_id = s1.source_id and s1.name = "EntrezGene" and x2.xref_id = dx.master_xref_id and x2.source_id = s2.source_id and (s2.name like "Refseq_dna_predicted" or s2.name like "RefSeq_peptide_predicted") and ox.ox_status = "DUMP_OUT"'; - - } + #don't use EntrezGene labels dependent on predicted RefSeqs + + $ignore{'EntrezGene'} =<<IEG; +SELECT DISTINCT ox.object_xref_id + FROM object_xref ox, dependent_xref dx, + xref xmas, xref xdep, + source smas, source sdep + WHERE ox.xref_id = dx.dependent_xref_id AND + dx.dependent_xref_id = xdep.xref_id AND + dx.master_xref_id = xmas.xref_id AND + xmas.source_id = smas.source_id AND + xdep.source_id = sdep.source_id AND + smas.name like "Refseq%predicted" AND + sdep.name like "EntrezGene" AND + ox.ox_status = "DUMP_OUT" +IEG + + #don't use labels starting with LOC + + $ignore{'LOC_prefix'} =<<LOCP; +SELECT object_xref_id + FROM object_xref JOIN xref USING(xref_id) JOIN source USING(source_id) + WHERE ox_status = 'DUMP_OUT' AND label REGEXP '^LOC[[:digit:]]+' +LOCP + return [\@list,\%ignore]; }