Skip to content
Snippets Groups Projects
Commit c5e35f30 authored by Glenn Proctor's avatar Glenn Proctor
Browse files

Added species-specific regexps for gene description filtering.

parent 83d499f7
No related branches found
No related tags found
No related merge requests found
......@@ -6,10 +6,43 @@ use vars '@ISA';
@ISA = qw{ XrefMapper::BasicMapper };
sub get_set_lists{
sub get_set_lists {
return [["ExonerateGappedBest1", ["mus_musculus","*"]]];
}
sub gene_description_filter_regexps {
return ('\(?[0-9A-Z]{10}RIK PROTEIN\)?[ \.]',
'RIKEN CDNA [0-9A-Z]{10}[ \.;]',
'.*RIKEN FULL-LENGTH ENRICHED LIBRARY.*PRODUCT:',
'.*RIKEN FULL-LENGTH ENRICHED LIBRARY.*',
'\(*HYPOTHETICAL\s+.*',
'^UNKNOWN\s+.*',
'CDNA SEQUENCE\s?,? [A-Z]+\d+[ \.;]',
'CLONE MGC:\d+[ \.;]',
' MGC:\s*\d+[ \.;]',
'HYPOTHETICAL PROTEIN,',
'HYPOTHETICAL PROTEIN \S+[\.;]',
'DNA SEGMENT, CHR.*',
'PROTEIN \S+ HOMOLOG\.?',
'^SIMILAR TO GENE.*',
'SIMILAR TO PUTATIVE[ \.]',
'^SIMILAR TO HYPOTHETICAL.*',
'SIMILAR TO (KIAA|LOC|RIKEN).*',
'SIMILAR TO GENBANK ACCESSION NUMBER\s+\S+',
'SIMILAR TO\s+$',
'EXPRESSED SEQUENCE [A-Z]+\d+[ \.;]',
'EST [A-Z]+\d+[ \.;]',
'^\s*\(FRAGMENT\)\.?\s*$',
'^\s*\(?GENE\)?\.?;?\s*$',
'\s*\(?GENE\)?\.?;?',
'\s*\(?PRECURSOR\)?\.?;?',
'^\s*\(\s*\)\s*$',
'^\s*\(\d*\)\s*[ \.]$',
'^\s+\(?\s*$');
}
1;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment