diff --git a/misc-scripts/gene_description/Fugu_rubripes.regexps b/misc-scripts/gene_description/Fugu_rubripes.regexps new file mode 100644 index 0000000000000000000000000000000000000000..d27b6ee77304414408025761d066b9409aa5ce7c --- /dev/null +++ b/misc-scripts/gene_description/Fugu_rubripes.regexps @@ -0,0 +1,6 @@ +# regexp used for filter out useless description for Danio rerio +# add more as appropriate, line begining with # are supposed to be comments + +^HYPOTHETICAL\s+PROTEIN\.? +^\s*\(?FRAGMENT\)?\.?\s* +^SIMILAR TO HUMAN CDNA KIAA\d+\.? \ No newline at end of file diff --git a/misc-scripts/gene_description/Rattus_norvegicus.regexps b/misc-scripts/gene_description/Rattus_norvegicus.regexps new file mode 100644 index 0000000000000000000000000000000000000000..3c0d8af10886757457f95a393f9746502375a8ce --- /dev/null +++ b/misc-scripts/gene_description/Rattus_norvegicus.regexps @@ -0,0 +1,33 @@ +# regexp used for filter out useless description for Mus musculus +# add more as appropriate, line begining with # are supposed to be comments + +^\(CLONE REM\d+\) ORF \(FRAGMENT\)\.* +^ORF\s*\d+\s+PROTEIN\.* +\(?[0-9A-Z]{10}RIK PROTEIN\)?[ \.] +RIKEN CDNA [0-9A-Z]{10}[ \.;] +.*RIKEN FULL-LENGTH ENRICHED LIBRARY.*PRODUCT: +.*RIKEN FULL-LENGTH ENRICHED LIBRARY.* +\(*HYPOTHETICAL\s+.* +^UNKNOWN\s+.* +CDNA SEQUENCE\s?,? [A-Z]+\d+[ \.;] +CLONE MGC:\d+[ \.;] +MGC:\s*\d+[ \.;] +HYPOTHETICAL PROTEIN, +HYPOTHETICAL PROTEIN \S+[\.;] +DNA SEGMENT, CHR.* +PROTEIN \S+ HOMOLOG\.? +^SIMILAR TO GENE.* +SIMILAR TO PUTATIVE[ \.] +^SIMILAR TO HYPOTHETICAL.* +SIMILAR TO (KIAA|LOC|RIKEN).* +SIMILAR TO GENBANK ACCESSION NUMBER\s+\S+ +SIMILAR TO\s+$ +EXPRESSED SEQUENCE [A-Z]+\d+[ \.;] +EST [A-Z]+\d+[ \.;] +^\s*\(FRAGMENT\)\.?\s*$ +^\s*\(?GENE\)?\.?;?\s*$ +\s*\(?GENE\)?\.?;? +\s*\(?PRECURSOR\)?\.?;? +^\s*\(\s*\)\s*$ +^\s*\(\d*\)\s*[ \.]$ +^\s+\(?\s*$