Skip to content
Snippets Groups Projects
Commit b7127833 authored by Santiago Sanchez Fragoso's avatar Santiago Sanchez Fragoso
Browse files

add build_gb script

parent 4d946760
No related branches found
No related tags found
No related merge requests found
{
"name": "emeraldbgc",
"version": "0.2.3",
"description": "SMBGC detection tool",
"author": "Santiago Sanchez Fragoso",
"author_email": "fragoso@ebi.ac.uk",
"url": "https://gitlab.ebi.ac.uk/fragoso/emerald",
"license": "Apache License, Version 2.0"
}
......@@ -49,7 +49,7 @@ def main(args=None):
dest="ip_file",
default=None,
type=str,
help="Optional, preprocessed InterProScan GFF3 output file. Requires a GBK file as SEQUENCE_FILE. The GBK must have CDS as features, and \"protein_id\" matching the ids in the InterProScan file",
help="Optional, preprocessed InterProScan GFF3 output file. Requires a GBK file as SEQUENCE_FILE. The GBK must have CDS as features, and \"protein_id\" matching the ids in the InterProScan file. The GBK file can be build with emerald_build_gb tool",
metavar="FILE",
)
parser.add_argument(
......
#!/usr/bin/env python3
# Copyright 2021 EMBL - European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import sys
import glob
import os
from Bio import SeqIO
def main(args=None):
parser = argparse.ArgumentParser(description="build_gb. Tool to build genbank format files ")
parser.add_argument(
"-n",
dest="nuc_f",
default=None,
type=str,
help="FASTA file with nucleotide sequence(s)",
metavar="FILE",
required=True,
)
parser.add_argument(
"-a",
dest="pro_f",
default=None,
type=str,
help="prodigal output FASTA file with aminoacids sequences",
metavar="FILE",
required=True,
)
parser.add_argument(
"-o",
dest="out",
default=None,
type=str,
help="output genebank format file",
metavar="FILE",
required=True,
)
args = parser.parse_args(args)
fna = {rec.id:rec.seq for rec in SeqIO.parse(open(args.nuc_f),'fasta')}
faa = list(SeqIO.parse(open(args.pro_f),'fasta'))
feats = {}
for ff in faa:
spl = ff.description.split()
s,e,st = int(spl[2])-1,int(spl[4]),int(spl[6])
from Bio import SeqFeature
start_pos = SeqFeature.ExactPosition(s)
end_pos = SeqFeature.ExactPosition(e)
from Bio.SeqFeature import FeatureLocation
feature_location = FeatureLocation(start_pos,end_pos)
feature_type = "CDS"
from Bio.SeqFeature import SeqFeature
qual = {}
qual['translation'] = str(ff.seq).replace("*","")
qual['protein_id'] = str(ff.id)
feature = SeqFeature(feature_location,type=feature_type,qualifiers=qual)
cont = "_".join(ff.id.split('_')[:-1])
feats.setdefault(cont,[]).append(feature)
recs = []
for cont,v in feats.items():
sequence = fna.get(cont)
from Bio.SeqRecord import SeqRecord
sequence_record = SeqRecord(sequence)
sequence_record.id = cont
sequence_record.name = cont
sequence_record.description = cont
sequence_record.annotations={"molecule_type": "DNA"}
sequence_record.features = v
recs.append(sequence_record)
with open(args.out, 'w') as h:
SeqIO.write(recs, h, 'genbank')
print(f"Done!.. outfile :{args.out}")
if __name__ == "__main__":
main(sys.argv[1:])
\ No newline at end of file
#!/usr/bin/env bash
if [[ "$OSTYPE" == "linux-gnu"* ]]; then
wget ftp://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/5.52-86.0/interproscan-5.52-86.0-64-bit.tar.gz
wget ftp://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/5.52-86.0/interproscan-5.52-86.0-64-bit.tar.gz.md5
md5sum -c interproscan-5.52-86.0-64-bit.tar.gz.md5
PY=$CONDA_PREFIX/bin/python
PKG_FILE=$($PY -c "import ${PKG_NAME};print(${PKG_NAME}.__file__)")
PKG_DIR=$(dirname $PKG_FILE)
echo $PKG_DIR >> ~/env.test.txt
tar --exclude-from=${PKG_DIR}/exclude.txt -pxvzf interproscan-5.52-86.0-*-bit.tar.gz -C ${PKG_DIR}
rm interproscan-5.52-86.0-*-bit.tar.gz
ln -s ${PKG_DIR}/interproscan-5.52-86.0/interproscan.sh ${CONDA_PREFIX}/bin
cd ${PKG_DIR}/interproscan-5.52-86.0/
$PY initial_setup.py
else
echo "Non Linux OS; not installing InterProScan"
fi
......@@ -36,9 +36,8 @@ setup(
entry_points = {
'console_scripts': [
'emeraldbgc = emeraldbgc._cli:main',
# 'emerald_download_data = emeraldbgc.download_data:main',
'emerald_build_gb = emeraldbgc.build_gb:main',
]
},
packages = find_packages(exclude=('tests', 'docs')),
# exclude_package_data={"emeraldbgc": ["interproscan"]},
)
>BGC0001472
GCCCCGGGGGCCGTCGCTCCGGGGGTCGGTCCTGCCCGGTGGCGCAGGACCACGGGGGCCGGGGCCCGGGGGTGGACGGCATTTGTTTTGACCCAGCTCCGTGAGGTAGGTACGCTCAAGCCTTGTGCCTGGGGTGTGCCTGGGCTCGGGTGCGTGTCCTCAACCGCATGGCGAGTCCGTAAGTGGCCACCGCAATCTGTGTTCCGTCTGCCTTCCAGCAGGGGCGTGCAGTATTCGACACACCCGACCGCGTGGGTCGGTGACTGTTCCAGGTTAGTTTCACCGAACGGCACACAGAAACCGGAGAAGTAGTGCCTACGATCCAGCAGCTGGTCCGGAAGGGCCGGCAGGACAAGGTCGAGAAGAACAAGACGCCCGCGCTCGAGGGTTCGCCCCAGCGTCGTGGTGTCTGCACGCGTGTGTTCACGACCACCCCGAAGAAGCCGAACTCGGCGCTCCGTAAGGTCGCGCGTGTGCGTCTGACCTCCGGTATCGAGGTCACGGCCTACATCCCGGGTGAGGGGCACAACCTGCAGGAGCACTCCATCGTGCTCGTGCGTGGTGGCCGTGTGAAGGACCTGCCGGGTGTTCGTTACAAGATCATCCGCGGTTCGCTCGACACCCAGGGTGTCAAGAACCGCAAGCAGGCCCGCAGCCGCTACGGCGCCAAGAAGGAGAAGTAAGAATGCCTCGTAAGGGCCCCGCCCCGAAGCGCCCGGTCATCATCGACCCGGTCTACAGCTCTCCTCTTGTCACCTCGCTGATCAACAAGATCCTGCTCGACGGCAAGCGTTCCACCGCCGAGCGGATCGTGTACGGCGCCATGGAAGGCCTCCGCGAGAAGACCGGCGCTGACCCGGTCATCACGCTGAAGCGCGCGCTTGAGAACGTCAAGCCCTCGCTCGAGGTCAAGTCCCGCCGTGTCGGTGGCGCCACCTACCAGGTGCCGATCGAGGTCAAGCCCGGTCGCGCCGCCACCCTCGCTCTGCGCTGGGTCGTGGGTTACTCCCGCGCCCGTCGCGAGAAGACCATGACCGAGCGCCTCATGAACGAGCTGCTCGACGCCTCCAACGGTCTTGGCGCTGCCGTCAAGAAGCGCGAGGACACCCACAAGATGGCCGAGTCGAACAAGGCCTTCGCGCACTACCGCTGGTAGTCGCTCACCCCATCGAGACCGAGAGAAGATTGAGCCTTATGGCCACCACTTCGCTTGACCTGGCCAAGGTCCGCAACATCGGGATCATGGCCCACATCGACGCGGGCAAGACGACCACCACCGAGCGGATCCTCTTCTACACCGGCGTTTCGTACAAGATCGGTGAAGTCCACGACGGCGCAGCCACGATGGACTGGATGGAGCAGGAGCAGGAGCGCGGCATCACGATCACGTCCGCCGCGACGACCTGTCACTGGCCGCTCAATGATGTTGACCACACCATCAACATCATCGACACCCCGGGTCACGTCGACTTCACCGTCGAGGTGGAGCGTTCGCTCCGCGTCCTCGACGGTGCCGTCACCGTGTTCGACGGTGTGGCCGGCGTCGAGCCCCAGTCCGAGACCGTCTGGCGTCAGGCGGACCGCTACGGCGTGCCGCGTATCTGCTTCGTCAACAAGCTCGACCGCACGGGCGCCGACTTCCTCCGTTGCGTCGACATGATCGTCCAGCGCCTCGGCGCTGTCCCGATCGTCATGCAGCTCCCCATCGGTGCGGAGGCTGACTTCCGCGGCGTCGTCGACCTCGTGTCGATGAAGGCCTTCGTTTACCCCGAAGAGGCCGTCAAGGGCGAGATGTACGACACCGTCGAGATCCCGGACAACCTCAAGGAGGCCGCCGAGGAATGGCGCGGCAAGCTCCTCGAGGCCGTCTCGGAGAACGACGACCAGATGATGGAGCTGTACCTCGAGGGCGAAGAGCCCACCGAGGAGCAGCTGCACGAGGCGATCCGTCGGATCACCCTCGCGTCGAAGGGCTCGGCCGACTCCGTCACCGTGACCCCCGTCTTCTGTGGCACGGCGTTCAAGAACAAGGGCGTCCAGCCCCTGCTCGACGCCGTCGTCCGCTACCTGCCTTCCCCCCTGGACGTCGAGGCCATCGAGGGCCACGACGTCAAGGACCCGGAGAAGGTCGTCCAGCGGAAGCCCTCGGACGACGAGCCGTTCTCCGGCCTGGCGTTCAAGATCGCGAGCGACCCGCACCTCGGCAAGCTCACCTTCGTCCGGATCTACTCCGGTCGCCTCGAGGCCGGCACCGCGGTGCTGAACTCGGTCAAGGGCAAGAAGGAGCGCATCGGCAAGATCTACCGCATGCACGCGAACAAGCGTGAGGAGATCCCGTCGGTGGGCGCCGGTGACATCGTCGCCGTCATGGGCCTGAAGCAGACCACCACCGGTGAGACGCTGTGTGACGACAAGAACCCGGTGATCCTGGAGTCCATGGACTTCCCGGCGCCGGTCATCCAGGTCGCCATCGAGCCCAAGTCCAAGGGTGACCAGGAGAAGCTGGGTGTCGCCATCCAGCGCCTCTCGGAGGAGGACCCCTCCTTCCAGGTGCACTCCGACGAGGAGACCGGCCAGACCATCATCGGTGGTATGGGCGAGCTTCACCTCGAGGTGCTCGTCGACCGCATGAAGCGCGAGTTCCGCGTCGAGGCGAACGTCGGCAAGCCGCAGGTCGCGTACCGTGAGACGATCCGCAAGGCCGTCGAGCGTATCGACTACACGCACAAGAAGCAGACTGGTGGTACCGGCCAGTTCGCGAAGGTGCAGATCGCCATCGAGCCCATCGAGGGTGGCGACGCGTCCTACGAGTTCGTCAACAAGGTCACCGGTGGCCGCATCCCCCGTGAGTACATTCCCTCGGTGGACGCGGGTGCCCAGGAAGCCATGCAGTTCGGCATCCTGGCCGGCTACGAGATGGTGGGCGTCCGCGTCACCCTTCTCGACGGTGGTTACCACGAGGTCGACTCCTCGGAGCTCGCCTTCAAGATCGCTGGTTCGCAGGCGTTCAAGGAGGGTGCCCGCAAGGCGTCCCCCGTGCTCCTCGAGCCGATGATGGCCGTCGAGGTCACCACACCCGAGGACTACATGGGTGAAGTGGTCGGCGACATCAACTCCCGCCGTGGCCAGATCCAGGCCATGGAGGAGCGCCACGGCGCTCGCGTCGTGAAGGGCCTCGTGCCCCTCTCGGAGATGTTCGGCTACGTCGGAGACCTCCGCAGCAAGACCTCGGGTCGCGCAAGCTACTCGATGCAGTTCGACTCCTACGCCGAGGTTCCGCGGAACGTCGCCGAGGAGATCATCGCGAAGGCCAAGGGCGAGTAACTCTTCCGAGCTCACGCTTTAGGCTTGTCACCGGAGCCCGGTCGGGCATGCGTCGCAGTGCGGCGGATGCCCCCGGCACCGGCATTCCAGCAAAGATCACCTGGCGCCGATGAAGCAAGGCGTACAGAACCACTCAGGAGGACCCCAGTGGCGAAGGCAAAGTTCGAGCGGACTAAGCCGCACGTCAACATCGGCACCATCGGTCACATCGACCACGGTAAGACGACCCTCACGGCCGCCATTACCAAGGTGCTGCACGACGCGTACCCGGACCTGAACGAGGCCTCGGCCTTCGACCAGATCGACAAGGCTCCTGAGGAGCGTCAGCGCGGTATCACGATCTCGATCGCGCACGTCGAGTACCAGACGGAGTCGCGTCACTACGCGCACGTCGACTGCCCGGGTCACGCTGACTACATCAAGAACATGATCACGGGTGCGGCGCAGATGGACGGCGCCATCCTCGTGGTCGCGGCCACCGACGGCCCGATGCCGCAGACCAAGGAGCACGTGCTCCTGGCCCGCCAGGTAGGCGTGCCGTACATCGTCGTCGCGCTGAACAAGGCCGACATGGTGGACGACGAGGAGATCCTGGAGCTCGTCGAGCTCGAGGTCCGTGAGCTCCTCTCCGAGTACGAGTTCCCGGGCGACGACCTTCCGGTCGTCAAGGTCTCGGCGCTCAAGGCCCTCGAGGGCGACGCCGAGTGGGGCCAGACCGTTCTCGACCTGATGAAGGCCGTCGACGAGTCCATCCCGCAGCCCGAGCGTGACGTCGAGAAGCCGTTCCTCATGCCCATCGAGGACGTCTTCACGATCACCGGTCGCGGTACGGTCGTCACCGGCCGCATCGAGCGTGGTGTCCTGAAGGTCAACGAGACCGTCGACATCGTCGGTATCAAGACCGAGAAGACCACCACCACGGTCACCGGCATCGAGATGTTCCGCAAGCTGCTCGACGAGGGCCAGGCCGGTGAGAACGTCGGTCTGCTGCTTCGTGGCATCAAGCGCGAGGACGTCGAGCGCGGCCAGGTCATCATCAAGCCGGGTTCGGTCACGCCGCACACCGAGTTCCAGGCCCAGGCCTACATCCTGTCGAAGGACGAGGGTGGCCGTCACACCCCCTTCTTCAACAACTACCGCCCGCAGTTCTACTTCCGTACCACGGACGTGACGGGCGTTGTGACCCTTCCCGAGGGCACCGAGATGGTCATGCCGGGTGACAACACCCTCATGGACGTCGCGCTGATCCAGCCGGTCGCCATGGAAGAGGGCCTGAAGTTCGCCATCCGTGAGGGTGGTCGTACGGTGGGCGCCGGCCAGGTCACCAAGATCACCAAGTAATTCCGATTACTTGTGGGTCGGGGTAACCCGGTTGCTCTGAACTGAGCGCACAGCACCAAGCAGGGCCCGCACGGCATCACGCCGTGCGGGCCCTGTGCTGTCTCCGCGGGGCTCGCGGCGGACAGGACGACGGGCGCGGCCCGCACACCCTGAGGGTGTGCGGGCCGTCGTGACGCGGGCGCCGTTCGCGTCGGGAGGTGCGGTCAGCTCTCCGGTGGCCGGGCGTATGTGGTGGCCAGCAGGGAATGGCGGGTCGCCCCGTCGCTGTGGACCAGATGGTGCACGGACGACGGGACGAAGCCGCCGAACAGTGTGCCGACGAGCCCCAGCGCCTGGCACCTGAGATTGAGGTCGTAGGTGGCCATCGAGGCCCGCAGCGCACTGATCCGGTAGCCGTGGGAACCCGCCCAGGTGTCCGCCCGGTCCAGACTCGCGTAGAGCGCGACGATCCCCGCTCCGGTGGAGAACTCGCGCTGGACGCCGAGGTTCTCGGCCGGTCCGATCTCGTCGAGCCCGGCCAGGTAGCACGTCTCCTCGGCGGTCACCCGGTACAGCCCTGGTTCGGCGCCCTCGCTGCGGAACGCGAAGACGAAGCCCTCCAGCGCGCCCGCCGAGGCGTCGAGCCCCCAGTCGTCACGGTCCCGGCGCAGCACGTCACGCAGCAGCGACAGGATCACGTCCGTGCGTACCGGCAGTGGCGCGTAGTGCAGCGAGGAGCGGCGGCGTTCGAGCGTGCCCCGCAGGTCGTGGACCGGCCGCAGGTCCTCGGCAGACGGTTGCGGTTCGACGGGCACGGAGTCCGCGAACGGCACCGGCCGGCGTTCGGCCGCCAGGCCGTCCGACGTGGGCGTCCGGGAGGTGAAGCCCTCCAGCACGGCCGTCATGGATGTCACGTCGTTCCTCATGCGGTACCTCCCAGGGACGCGAAGGCCGCCACCGGCTCGTCGGCAGGCGATGTTCCGAGCAGCCGGGCCAACGCGTCGTCGTCCCAGTCCGACCTGGGTGTGAAGCCCAGGCCCAGGTGCTGTGCGAGCTCCCGCAGACTGGCCAGGTTGCAGCCCGCGTCCAGAAAGACCAGCCTGAAGCCGAACGTGCCGTACTTGGTCATGACCTTCTTGAGATCGCCGGTGATGATGATGTCGCACGGCGAGTCACCCGGAGGTACCTCGCCGGAGACGGTCACGAGTGTGTGGCTGCCCTGGGCGTACGCGTAGACCCCAGGAGGCATGATCCGGTCGTCGCGCACCACCGCGTAGGCCGTCGTGCTGCCGATGTTTCCCGACGCCGCGGTCCATCGTTTGACGCGCTCCGGAGTGGTCTCGTCCTCCTTCACCCCGAAGGCGACCTTCAGCAGCAGTCCCAGGGAGCTCAGGGTGAGCGGGGTGTCGCCGTGGGACGGGTCATGCCGTTCCGACCCGGCGAGGACGGAGATGTCCAGGGCGGGGAGCGGTGTGTGCGGGCGGCTCGGCCAGTCCTTGAACTGCGACTGCAGGCGCAGGTTGGACGCGTAGTAGTGCGCCTGGTGGTCCTTCGGAGCGAGGAACGCGCGTGGAGGCATCGCGACCGAGGCCTCGTAGACGGCACCGGCGGGTGCCTGGGGTGCGACCGGCCCCCGCGCGTAGGAGCAGCGAGGGCACCCCGGGCGGACGGCAACCGGCCGGTAGACCGTGCTCAGGGTCGCGGTGTCGATGACGGTGAAGTCGCCGGGGAGGTGCGAGATCGTGGCCCGTGCGAGCAACGCGGTGACGTGGTGGGAGGCCAGGCCGACGACCAGGTCGTGCAGGTATTCCGGCGGCTCGCCGGAGAGGTCCGCCTCACCGTGACGGCCGCAGTCCAGGCACGGTGTGATCGAGAGGTCGGCGTACGGGCCGATCGTGATCGTCCGCGCGTCGGCGCGCACCCGCAGCAGCGGACGCCCGTCCTGCCGGCACCGTTCCTCGGTGGCGGCGAGCAGGGGTGCCGACGCGGGCGTCTCGAAGAACACGGTGAGCTCGTCACCGGGGCCCGGCGGCCCGGCCGGTTCCGTGACCACCGGGCACACCTCCCGCAGCGACCGGCGGGCCCCGGCGACCAGGGCGGCGTCCCCCTCGAGGCGCACCGAGCGCGACACCAGCCGCGCCGCGGCATCCGCCCAGGACGGGTTCGATCCGGTGGAGTTGCCCAGCCGGGAGAGGAAGACGGCCCATTCGGGCGTGACGTCCGGTTCCTCGCCGGACATGGCCTCCTCGACCGCCCCGGCCGTCGACAGCAAGGCCAGGCACTTGTAGACCGTGGCCTCGTCGAATCCGGTCTTGAGCGCCAGTTCAGTGTGGTCGCGGGTTCCGTCGCACGCCTCGGTCAGGGGCACCAGCCCTTCTCGTGCGAACGCTCCGGAGAACACCTGTGCCCGGTCGGCCCCGTCGAGCACGACGGATTCCCCTGCCCTGCGCAGCCTCACCCCGCGGCGCAGGACGGGCCGGGCAGGGACGGTCAACTGCATGTCCGTACGTGCGGCCCGCCCGATCTGCTCGGCCTTCATCTCGGCAACCATGATCGATCTCCCTCTTACCGCAGCAGTTCCGGAACAGGCATGGCGCTGTCCGCGCGCGCCTGCGAGAACCGGCCCCCGCACCGGGGGCACCGGTTCACCGAGACCGTCACCGCGGACGAGACGGCTCCGGAGACGAGATTGAACGTACGTACGGTGCCCCCGATCCCCGTCACACCCGTGGCGATCTCGGTCAGGGCGAGGTCCAGGAGGCCGGAGGCGACGGACAGGTGCTGCCGGCCGAACCCCTCGGGCAGGCCGGACAGGGCGGCGTCCATGTCGTACGGGCGGGCGGTGCCCGCGTGTTGGGCGGCGCGCTTCCTGTAGCACGCGTAGCAGGCCGTCCGGCCCGGAACCACGACGGGTCCGCAGAGGACCTTGGTCGGCAGCAGCTGGAGCCCGACGGACGGTGTGCTCCGTTCCGCGCAGATCCGGTCCGTCTCGTCGCGGAGCCCGGGGTCGCCGCCGGCATGCACGGGGACGACGACATCCGCGTGCGGTGCCGTGTCGTTCAGGAAGTCCTCGAAGGACACCTGTAGCTCCGGGGGGACACCACGGTGCTCCGTGAGCCGCCGGCTGAAGGTGTCACCCACGAGGTACAGGCACTGCGGACGCGGATCGTGCGACATGCTGCTCTGCTCCTTCTTCTCGCTCATGCGAACGGCTGCTGCACGGGGTTGACCGCGTCCTCGGCATGGCTCGTGTGCCCCATCGCGCGCGGCGCGTCATAGAGGCGCGGGGTGCCCAGGTACCGCTCGCCGTGTACGAACGACACGGGCATGGCCTCGGGCACGAGCACCTTGACCGCCCGCATGCCCACCTGGCGCGCCTCGTCCGTGGTGATGTCCGTGACGAGCACCTCGGCACCCCGTGCCGCGAGCCGGGCGACGACGGTGTCCAGCGGGTCCGCTCCCGCCGGAAGCCCCGGCATGCCTTCGAGGCCGTACGCCGGCCGTTCACCGTCCAGCAGGAAGCCGAACACGTCCCGCCGGTCGCGCGTCGCGTTGTGGACCGCGCCGCCGACGACGCTCACCTTGGCCGGATCGGGCTCGCGCCCGGCGTACGCCGAGAGGTATCCGCGCAACGCCACCCGCAGGGAGGCCAGTTCGCGGTAGATCTTGCCCAGCGCCTGTTCCGGATGGACGTCGCAGGTGGCGGCGACGATCTGGGCGAGCGCGGGGTCGGCGTCGGAGAGCTGCACGGCGTAGATCACCGGGACGCCGAAGTCCGTCGTCGCGTCGAACAGCCTCACCCGCAGGTCCGTGGAGGTGCCGACGCGGTGCAGTTCCCGGACGCCGGCGTCGAGCCGCGCCGGATCCACGACCAGCTCGGGCAGCCGCAGCTGCTGGAGCCAGACCAGCGCGATGGCGTCCCGCTCGACGACTTCCAGCAACCCGCCGAGCACGGCGCTGCGTACGTCGGAGTGGACGGCGGCACCCGTGGTGATCCCGCGGATGAACTCTTCGGACTTCGACTGGTAGGGCATGTGCAGATACACGGAGATCGCCGGCACGAGCACCGGTATCCGCCGGGTGAGCGACCATGCGCGCACCCATCGGATCGGGACCGAGGGGTCGTACGCGGACAGGCTGCAGTCGTCGCGGGCCAGCTCGGTGGGCGAGCAGCTGGGCCACCGGGACGGCGACACGAACTCCTCTGTGAGGTCGTTCTCGGCGGCGACCACCATCTCGTCGTCGTCCCAGGCACACGTGGAGTACCGTTCGAGCGCTTCCGCGATGGAGACCAGCTTGGCGCGCTCGGGCGTGAGCCCGGTGCCCGCGCCGTCGGAATTTCCGGTGTCCTCGTCGTGGGCCCACGTCCGCAGGTTGGGAAGAGCACGGGACGGGACACCGAGATAGGCCAGCTGGACGGCGAAGGGCGGTTCGCCCTCGCGCACCGGCAGCGGCGCGGTGCGTGACACCAGTCCGTAGGGCGAGACGAGCTCCTCGAGACCGCGCAGCTCGGCCGAGATGTGCTCCTGAGGCCTGTTCAGATGCATGTGGTTCTTCTTCCCGCGTCCGGATGTTCCCCGTGTCCTTGCCGCGCTACTCGGCGGTCTTCGTGGATCCCGACGCCATGCTCCTGGCCAGGCCGTCGAGCTCGTCCTGGAGCTCCTTCATGGACTTTCCGCAGACGTCTTCGTTCGCACGTGCGACGACGTAGCAGAAGACGTACCTCTCCGCCAGGCTGAAACCGAGGGTGTACAGGCAGCTGTACAGCAGGCTGGTCTGCAGCCGGAAGGCCAGGAAGTCGGCGTCGCGGTGCATCAGGCGGTCCAGCTCGGGGCTCGGTGGCGTATGGAAGCGCGTCGGCTCGACCGGGGCGCCTCGCTTGCGAACGAGGTCTTCGAGCGTGTGTCCGGCGTTGACGACGGATCCGTCCGAGAAATTTTCGGCCAGGTGGCTTGATTCAGACGTGATGGATTTCGTCCAAAGCCGAACCATCTCGTCGGCCGGATCGTCGTCCGGATCGCCGCACGCCGTGATGAATTCCCGGACGCCGGCCCCTACCTGTTCGTAGAACCGGGCGCAGGCGGCGTCGAAGGATTGCGGATCCTTGGTGCGCAGATAAATCGCCTCGAAATGCGAACGGTAACTCAGCAGGCGTAGGGAAAGCAGCTCACGGAATTCGTATCCGTCGATTTCTCTCTGCGGTGATCTCAGAAGAGTGGCTCTGGTGTGCGCGGCCATGAGGCGGATGGCACCGAGTGCCGGCGGAGGCGATTGAGGTGTCTCTGCCGCACGATTCAAGAATGAGACGAGAGTCGGTGCGGCCTCCTGGAAGAGCCGCTTGGAGGAAGCGGAGAATCCGGTTCCGGTAAACCCCGCGTTCCACAACGGTGAGGGAACCGAATCAAGGGGCGTGGCGTCCACCTGTGCGGCGCACCCATATCGTTCGGCCGTTCTATGCAGGTCAGCGCGGGTGGAGTCCTCGTCGGACACCCCTTCGATGCTGATCTGCAGGGTGTCCGTGCCGAGCTGCGCGTCCTGGGTGCGGACGAAGTAGTAAGGGCCGTGACCGCCTGCTCCACCTACCCGCTCCGGCGCGAACAATTCGCGAATCAGGGGCGCGAACGTGTCGTTGTATTTTGAACCAACTGGCTGCGGGATGGTGGCAAACAAGTGCACGAATGAAACCCCCGAGATTTCGGCCGGCGCCAGACGGGCCGTCACGTGATCTTGTAATCGACCGTAACTCAGAGAGATGAGAAGAAGGAACCCCGGCTCCAGTGTGATCTGCGTCACTCCGGAAACTGCTTGCGTTGTGATCGTCCGAATGCCTAGATTCGAATCACATTGACGAAAGGGGGTGTAATCAATGGAGCAGCAGATCGAACTCGATGTGCTCGAGATTTCGGACCTCATTGCAGGTGCCGGGGAGAACGATGACCTGGCGCAGGTGATGGCCGCCTCGTGCACGACCACCAGTGTTTCGACGAGTTCTTCGTCGTCCTCGTCCTGAATCTAGGGACCGGGAAACAGTTGAGCCACCGTCGGGGTGTTCCTCGGCGGTGGCCTTCTGCAGTCCTGGCGTGCCTTTTTCGATTCGAGGAACGCGCCGTCCCTGGAGGTCGACCAGAGGTGCGGGCGTCGTGCCCCGGACGACGACGGGCTCGTGCCAAGCCGGCCGGCCGAGAGCCGGAACACATGTTGTCAAGCGCGAACTGACCGGTGCGGTGGAGTGACAGGCGGGCTCCGTGCGGCGGGGCGCCTCTCCGCGGCCCGGCCCCGACGTCCGCACGCCGTGGTGAACCGGGCCGGCGTGACGAAGTTGGGGGATTCCTATGGGTGTGAACATCAGTCCGTACGTCGTCTATCGGCGCAGCAGACTCCCACTGGGCGAGCTCGGAGGGATGTCCTTCACCACCGCCTGGTCGCGCATCGATGAACTGCACGCCCTGCGGGACGAGATCGGCAAGAACGCCGTCGGCCTGGCCGACCGCCTCGGCGAGCTCGTGCCTACGCTGGGGGACGACGTCCGGGCCGACCTGATCAGGCTGCGGCGCGACGTGCACAATCTGCGGCACGACCGGGCGGTGGCGCGACTGGAGCCACTGCGTCCGCATCTCGGCCGCGAGGTGGTCGACGAGGTCGAGACCTGGTGCGCGCTCGGCGTGCGGGCCGAACAGTGCGAGCGAGCAGGGCGCGAGGAGCTCGAGAGTGAGAAGGCCCGGGCCGCCGACGGCTTCGGCGCCCTCTTCGAGCACGATGCGATGGCGCGCAGCATCCAACTCTCCGGCGACCGGCTGTACCGGGGCCTGCGCGACCTCGTCGCGGGCGACGAGGCGAGCGCCCTCAAGCCGAGCAAGGCCCGGCTGCGGGAGTCTTCCCTCGTCAACTTCGCCTACCGGGCGAGCTTGAAGCCGTCCCCCTTCGGACGGTTCACCGAGATCGGCGCGTTCCCTCCGGACGACCCGCGCCCCGCGGATCCCGGTGGCCGGCACGGCGGGACGCAGGAGTCGGTCACGACGCTGAACCGTCTCCTCGTGAACTGGGGGCCCCCCGGCCTGCCGCTCGTACCGGGCGGGATGGAGCCGGGGCACCTCGTGCTGAACTCCACGCTGCGGGCCGGCACCGAGTACGTCGAGTACGTCGGTGTCGCTCCCGGCTCCCGTGAGGACGGCCGGATGGCCACCGAGAGGGTGCTGCGCGTACGCCGGGAGGGACTCTTCGACGCACTGCTCGCGGCGATGCCCGAAGGATCGGCTCCGGCGGCCACGGTGCTGCGCGACCTCACCGCCGTCACCGGGAAGGCGGAGACGAGCCGGAAGGTCGTGCAGGGGCTGATCCGGGCCGGCATCCTCTTCTTCCGGCCGGAGATCGACGATCACGACCCCGACTACTCCATGAAGCTCGACCGCGTACTCGCGGCCGGCGGGACGCCGGAGACGGCCGCGCTACGCGGACACTTCTCCGAACTCAGGCGGTTGGAGACGGACTTCTCCGAGGCGGCGGCCGACGAGAGGCAGAAGCTGCTCGACTCGGCGTACGCGGCGATCGGCGGCATCGCCGAGCTGTGCAAGGTGTCCCCGCCCCCCGAGGAGGTCCTGAAGTCACCGGTCTTCGAGGACACTCCGGCATCCACGGCGCCCCAGGCCTGGAACCTGCCGACGGTGGAGGGGAGCATCCCCGCCCTGACGGGCCTCTGGCGTCTGGCCTCGATGATGGACAACGGCCAGGTGAAGCGACTGGGTCTCTACTCCTTCGCCACCCGCGTGCTCGGCGACCGCAGCACGATGCCCTTCCTCGAGTTCTTCCAGGCCTTCTCGTCGCTGACGGACCAGGAACAGGTCGACGTGTTCATGGGGCGCGACGTGGAGGAGGCCGAGAGGTACACGAGGCAGCGGGCGGAGGCTCTGCGCACGATCCGGCAGCGGCTGGTGCCCGGGGACGGCACCGTGCACCTGGACCCCTCGGTCATCGAGAAGGCCTGCGAGGGCGTGGAGGACCTCCTCGACACGGAATCGGTGACGTTCCGCGCGCAGTTCGCCCAGGGAGTGCTGCCCGACCGGGACCGGACGTTGGTCGTGAACGGCCTGCTCACCGGCTACGGCGTCTACTTCTCACGGTTCGGCTCGTTCGTCGAGGGCACCGACGAATGGTCCCTGCCGGCCGCCCAGCGGGAGCACCTCGCACGCAGGTTCCCCGGCCAGGTCGACCTCAACTCCGTGCTCGGATTCAACTTCAACCTGCACCCCTCGGTGACCCGGCGGGTCGTCAACTACCCCGGCGCGGTGTCGCTCGGCGCCGAGCGGACGGTCTACGGACTGGCGCGTCTGGAGGTCCGCGCGGATCAGGCCACCAGGTCGCTGCGCCTCTGGGACCCTGAGGCGCAGGAAACCCTCGACCTCGTGCCCATGAACTTCATGACCCCGATCGGGGTCCCGCTGCTCTACCGTCTGCTCGAGGCGCTGTCCCCGTCCAACCGCTACCTGTGGAAGCCCCTGGACGACATCAGGGACGCGGGAGGGCCCACGGTGTACGGCGAGACGGCACCCCGGCTGGTCGTGGGTGACGTCGTGGCCGACCGCAGGTCCTGGAACGTGGCCGCGGCCGAGATCCCCATGCTCCAGGATCTGAGCCGGGACGTGCCCGAAGCGCTCGTGGCCTTCGACGCGTGGCGCCTGACGCGGGGCCTTCCCCGCCACGCCTTCGTGCTGTGCCAGACGCCCGAGGAGCGAGACGTCATGGCCGGGCGCAGCCGGAAGGTGACCCGCCAGTGGGCGGACTACGCGCACCTGCGGCGCGCCAGCGTGCACAAGCCGATGTACGTCGACTTCCGGAACCCCTTCCTGGTCCGGAGCTTCGCGAAGTCGGCCCTGTCACGCGGCGATGTCGTCGCGTCGATCCGCGAGTGCCTTCCTTCGGTGGACGACTACGGCCCGGACACGGGCTGGACCGCAGCAGAGGAGTTCTTCGTTGAACTGTGTACCGACAACTAGTGGGCAGACCGGAACGCGTGAGTGGAGGACGGTCCACATCCACGTTCCGCACTCGCTGCACACCCCCTTCCTGTGCGACGTGGTCGAGCCGCTGCTCCGGTCCGAGGGACTCCAGGACCACTTCTTCTTCCTCCGGTACTGGCAGGGCGGCCCCCATCTGCGGCTGCGGATGCTCTGCGGCCCCGGGGCCGGTTCGGCCGAGGCGGCCGAACGGGTCGTCGCGGGTCTGGCACGTGCGATGCCGGAGTTCGGTGCGCAGGCGCGGGAGGAATACGCGCTCGGGCTGACCTTGCAGGACGAGCTCGCCCGCCTGGAGAAGGAGACCTCGGAGGAGGGCCGGCCCATCGGGGCCCTCGACCGGGTGGCGTACGAGCCGGAGTACCGCAAGTACGGGGGAACGGAGGGGCTGCAGATCGCCGAGACCGTATTCCGCAAGTCGTCGGTGGCGGTCCTCGGCCTGCTGGGCGGGCAACCGCGGGCGTGGGTGGACGAGCGCCGGGCACCGATCGGGGAAGCCGCGAGGATCATGGCGATGTTCCTCCACGGCGCAGGCCTCGACCCGCGGGCCGCAGGGCTGTTCCTGCGGGAGTACGAGGACTGGTGGCGTACGTACGCGCCGGATGACATGCAGCGTGCCTGGCCGAAACTGTTCGGCGGCGTCTCGGCACAGATGACGAATCTGTGCGCGGCGGTCTGGCGTGACGGCGCCACGGACGTGTTCCACGACATCAGCGCGGAGGCCGCCGCCCGCGCCCGTTCCGTGTGCGGGGCGGAGCCCGGCGGCGATGTCCGCGACCTCCGGCTCGACGGCACGCCTTACCCGGGCTGTCTCTCGAACTACGTGCACACCACCAACAACCGTCTCGGCCTGGTCCCCGCCGCCGAGGGGCTCGTCGCGTACCTCGTGCGCCGGGGCCTGGAAGCGATGGACGGGTAGGGCCTGTCGTTGTCGGGATCATGCGGGGCCCCGATGCCCCGGTGCCGGAAGCCCGGAGCCTCCGTCCCGCGCAGGCCCTTCCCTGTGGGGCCTGCGCCGGACGGAGAGCTCACCGGTGCGTCACCAGCTCACCGGAAGCTTGTGCAGGCCGTACACCCCCATGCTGTCCTTGAACGGCAGTTCCGCGACCGGGGTGGCCAGCTTCAGTCCGGGAATCCTCGCGAGGAGGGCGTTGAAGACGACCTCCAGCTCCAGTTTCGCGAGGTTCTGGCCGATGCACTGATGGATGCCGTGGCCGAACGCCAGGTGGTGCCTGCCGCCGCGCTCGATGTCCAGCCTGTCCGGGTCGGGGAACACCGCCTCGTCGTGGTTGCCGGAGGCGCTGAGCCCGATGACGCCCTCACCCGCGCGGATCAGCACCCCGCCGATCTCGAGGTCGGCCGTCGCCACCCGGGAGGTCACCTGGTCGGCGATGCTGAAGTAGCGCAGCAGCTCGTCGACGGCCTGCGGGGCCAGACCCGGGTCGGCCCTGAGCTTGGCCAGCTGGTCGGGGTTCTCCAGCAGGCCCACGACGCCGAGGGAGATCATGTTCGCGGTGGTCTCGTGACCGCCGACCAGGAGGACGTTCGCCATCCCGACCAGTTCCCCGTGGTCGAAGGTGCCGGTCTCCCGGTTCTTCACGACGAGCCTGCCGAGCAGATCGTCCCCCGGGTCCGCTTCCTGAGCGGTGACCAGCTCGGAGAAGTACGCGTGGAGCTCTTGATGCGCGCTGTTGCGCTCCTGTGGGTCGGCGTCGACCGACACCAGCTTGTTGGTGCGGTCCTGGAAGAACGCGCGGTCGCTGCGGGGCACCCCGAGCAGCTCGCAGATCACCAGGGACGGCACCGGGAGGGCGAGCGCCTCGACCAGATCGGCCGGCCCCTCGCCGGCGAGCATGGAGGAGATGCACTCGTCCACGATCTCCTCGACGCGGGGACGCAGTTGGAGCACCCGCTTGACGGTGAACTCCGGGATCACCATCTTGCGCTGCGCCGTGTGTACGGGCGGGTCCATGGCGAGCAGCACGGGCCGCATCTGCTCCATGACCTCCGGCGGCGCATCGAAGTGGAGTGGATAGCCCGGGTGGGCCAGGTTCGAGCTCACATGAGGGTCGGCGAGCAACTGTCGTATGTGCTCGTGCCGGGTGAGCAGCCACGCGGTCCGCCCGGAGGCCAGAGTGACCTTGGTGATCGGCTGCTCGGCACGCAGGGACGCGTACTCCTTCGGCGGGTGGAGCGGGCAGGTCCTGGGGTAGGGATACGCGCTGTCCTGTCGGTCCGTCACGGTCTTCTCCGCGGGTAGTCCAGGGAAATCTCCTTACGGACTCCATTCAAGCGGAAGATGATCCACATCGTGGCGTTATCGGTGAATGAGGCCGAACTCACGTGGACCGCAGGGAGGAAAGTCGGCCGCCTTCCCGGCATGGCCCGTCCCGTGCTTTCGGTGGGAGGGGTCGGTGCAGTGCGACATTGCAGTGATCGCGTAATCCGGAATGACCCCTTCCGTGCGGGGATGCGTCGATAGTACGTTGGATTTCATGTGCTCCACATCGTGGAGAGTTCGCTTGCGCCGTCGAAGTCACAGTGTGTGCCGAGGGGGAGTTGGGGCTATGTACCTTTCGATCGTCATGTGGGACCTGAAGAAGTCGGAAGCCACGGTGGAGAGCCTCAGGGAATACCTGCGGGACTATGCCGTGGACGCCTACTCCGCGCTGGACGGAATGCGGCTCAAGGCGTGGTTCTCCGATTCCGCACGTCAGCTGTGGGGTGCGGTCTATCTGTGGGACAGCCCCGAGCAGATGCCCGGCCTGTACAAAGTCAGCCGCGTGATCGATCTCATCGGGTATCCGCCGACTTCGGTCGGTGGTTTCACGCTCGAGGCGACCGCCGAAGGGAAGAGCGTTCACGAGACACTGGCCGGCCTGGGGATCGCCCTGGAGGGCGGAACGCAGTAAGGGCAGGCGTGCGACCGGATGGGCGGGAGGTTTGCCGGGCCGGTCGGTTCGAGGGGCGGTCGAGGGCCGGAGCCGCGCGGGTGGCTTCGCCGCTCCGGCTGGCGGCCGGGTGTGGAGTGCCGCGCGGCGCCGGTGTCGCCCGCGCCGGGCATCACCGTGCTCGCGGAGGGTTCAGGTGTGTCGGACCGGCGCTTTCGCGTGTGTTCGGCACCGTCCGGTGGGCCGGGTGCTGTGCGCGGGCCTCGAGCTCCCGGGCGGGCGCGGTGCGCAGGCCCTCCCCGCCCGGACACGCGTATTCCGCACCGCTTCACGAAGATCATTCGGTGAAGGAGGCGGGGGCGCTCGTGCTAACGTCGTGATCGTGGCCAGCCTTGACATTATTACCGAACGCTCTGATTCTGCCGTACAACGCATCATCGATGTGACAAAGCATTCGAGGTCCGTTGTCCGCACGGTGCTGATCGAGGACATCGAGCCTCTTCTGCAGAGCATCCGTGCCGGAGTGGAATTCATCGAGATCTACGGACTCGACACCGTGCCTGTTCCGGACAGTCTGCTCGCCGAATGTGAACGGCGCAGAATTCCGGTCCGGCTGCTCGCCGCTTCGGTCGCCAATCAGGTCTTCAAGACCGAGAAGAAGCCCAAGGTATTCGGTATCGCCAAGGTCCCGCGGCCTCGTCGCCTGTCGGACCTGTCCGACATGACCGGTGACCTCATCCTGCTCGACGGAGTGAAGATCGTCGGCAATATCGGAGCCATCGTGCGGACCTCGTTCGCGCTCGGGGCCTCGGGAATCGTGCTCGTGGACAGCGATCTCGGCAGTATTGCGGACCGCCGTCTGATCAGGGCGAGCCGGGGCTATGTGTTCTCCCTTCCCATCGTTCTCGCGTCCCGGGCCGAGGCGCTCCAGTACTTCCAGGACAATGCGATGCGCCCGGTGGTGTTCGAGGCCGACGGGGATCTCGGCGTCGCTGATCTCGACGGTATGGACGAGCGACTTGTGCTCATGTTCGGCAGCGAGAGGATCGGCCCGTCGGGCGAGTTCTCCGACATCGCCGCCAAGTCGGTCTCCATTCCGATGAATCCCGCGGCCGAGTCCCTCAACGTATCGGTGTCGGCCGGAATCGCGCTGCACGCGAGGGCCCGCCGTAACCTCTCCCGGTAGTCCCGGCCGCAGAGCCCCGTCAAGGGCCCCGTCCCTCCCCTCCGGGAGGGGCGGGGCCCTTGACGTGCGCCGCCGGGAGCCCTCGCTACCGCGGCCGCCACATCCAGGGCGCCGCGTCCGGACCCAGGCCGGCGACCGAAGGCCGGCCGTCCGGGCCCAGGCGGAGCGAGGCCCCGTCCAGGGCGGCCGGCCCGTCGGTCCGCACGTGGAGCCCGCCGCCCATGAGCAGCTGGACGAGGCCCTCGGCGGTGCGGCCCAGCAGCACGGGCCCCCGCGGCGACGGGGCGGCGTCCACCGNNCCGTACCCGTCGAACCGTGCTCCGGGCACGGCCGTCCCCGCCCGAGCCGTGGTCAGCCCGCTCCCGGCCGCTGCCCGGTAGTAGAGGGACACGGACCCGTCGGGAGCCGGGAGGGCGGCCGGCGCGTGGGCCGGGACGGGCGCCGCCGTGAGCTGTGTGCGAGCCGTGAGCCCGGCGGACGGGGTGTCCTGGGTCCAGTGGTGTACGGCGTGGTGGCCGGCGCCGAAGACGTGGACGCGTCCACCCTCGTCCACGGCGGTGTGCAGGCCGTCCTGCACCTCTCCGCCGCCCATGTCCCGCCACGCGCTCCACCGCCCCGCCCCGTCCCGCACCCGGGTGCTGACGCCCTTCTCGGCGTCGCGTACGAAGAGATGGATCTGCCCGTCCGGAGCGGCGACGGCCACCGGTACGCCCGTGCGACGTACTTCGTCACGTCCGGGAGAAGGAGAGCCCAGGCCGCGCCAGGGGCGGAAGCCCCGGCCCGGGGCGCTCTGCTCCAGCACCACGATCTCGCGCTCGTTGTCGGCGCCGTGCCCGCCGAGTGCGGCGAAGCGGAGCCCGAACAGCAGCAGGCGCCCGTCCCGTGTGGTGGCCGAGCCCAGTGCGGGGGCGAGCGGGCCGCCGCCTAGGTCGTGCGGAGGCCCCCAGGCACCGCTGCCCGGCCCGGTCTCCTGCCACCGCACCACCCGCAGCCCCAGCACGGCATAGGCCGCGAGCCTGCCGTCCGGCTCGGCGGTGAGGACCGTGCGCGTGCCCGGGTAGCGGTGGTGGGTGGAGCGGACCCAGCCCTTGCGGTTGGTGAGCGGGCGGTCGCCGCCGACGTTGTAGTCACCGCAGCCGGACGGATTGCCGCAGTCCCAGTCGGGCGAGCCCCCGTAGGGGACGAGGTGGGCGGCCTTCCTCGCCAGTACCCCCTGCGGCAGGTTCTTCGGCCAGTGCCGGTTGTAGTAACCGCGGTAGGCGACCGTGACGAAGCCCGGTATCCGGCCACCGTCCGCGGTCGCCCGGGCCACCCAGCGGATCATCGCGGCCCACGCGAAGCAGGCCGCGGCCGTGTGGTCGGCGTGGTCGGAGTAGCCGGGCTGTTCGCTGTCCCTGCGGCGGACGGCCTCCGTGCTGTGCTGGATGTCCGGGTCGGGGTCCAGGGTGTGGACGACGGTGGGCCGGTAGCTCTCCATCAGGCCGGTGAGGACCCCGACCAGCCCGTCGTACGTGTACGAGCCGGCGCGCCGCAGCGGCGATCCGTCGGCCACGACCGTACGCAGCACGAGCCGGCGATCCTGCCAGAGACTGGGCAGCCCGAGCCGGTGCCGGCCGGTGTGCATGGCCGTGTTGAGGAAGATCAACTCCACTCTGCGGCCGTCGGCCGCCAGCACGTTCACCTCGGCCCTGTGGTCCGGGCCGAGCTCGGCGACGGAGACCTCCCAGGGGGTGAAGGGCCGCAGCCCGAGCAGGGCG
>BGC0001472_1 # 312 # 683 # 1 # ID=1_1;partial=00;start_type=GTG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.642
MPTIQQLVRKGRQDKVEKNKTPALEGSPQRRGVCTRVFTTTPKKPNSALRKVARVRLTSG
IEVTAYIPGEGHNLQEHSIVLVRGGRVKDLPGVRYKIIRGSLDTQGVKNRKQARSRYGAK
KEK
>BGC0001472_2 # 686 # 1156 # 1 # ID=1_2;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.660
MPRKGPAPKRPVIIDPVYSSPLVTSLINKILLDGKRSTAERIVYGAMEGLREKTGADPVI
TLKRALENVKPSLEVKSRRVGGATYQVPIEVKPGRAATLALRWVVGYSRARREKTMTERL
MNELLDASNGLGAAVKKREDTHKMAESNKAFAHYRW
>BGC0001472_3 # 1195 # 3324 # 1 # ID=1_3;partial=00;start_type=ATG;rbs_motif=AGxAG;rbs_spacer=5-10bp;gc_cont=0.652
MATTSLDLAKVRNIGIMAHIDAGKTTTTERILFYTGVSYKIGEVHDGAATMDWMEQEQER
GITITSAATTCHWPLNDVDHTINIIDTPGHVDFTVEVERSLRVLDGAVTVFDGVAGVEPQ
SETVWRQADRYGVPRICFVNKLDRTGADFLRCVDMIVQRLGAVPIVMQLPIGAEADFRGV
VDLVSMKAFVYPEEAVKGEMYDTVEIPDNLKEAAEEWRGKLLEAVSENDDQMMELYLEGE
EPTEEQLHEAIRRITLASKGSADSVTVTPVFCGTAFKNKGVQPLLDAVVRYLPSPLDVEA
IEGHDVKDPEKVVQRKPSDDEPFSGLAFKIASDPHLGKLTFVRIYSGRLEAGTAVLNSVK
GKKERIGKIYRMHANKREEIPSVGAGDIVAVMGLKQTTTGETLCDDKNPVILESMDFPAP
VIQVAIEPKSKGDQEKLGVAIQRLSEEDPSFQVHSDEETGQTIIGGMGELHLEVLVDRMK
REFRVEANVGKPQVAYRETIRKAVERIDYTHKKQTGGTGQFAKVQIAIEPIEGGDASYEF
VNKVTGGRIPREYIPSVDAGAQEAMQFGILAGYEMVGVRVTLLDGGYHEVDSSELAFKIA
GSQAFKEGARKASPVLLEPMMAVEVTTPEDYMGEVVGDINSRRGQIQAMEERHGARVVKG
LVPLSEMFGYVGDLRSKTSGRASYSMQFDSYAEVPRNVAEEIIAKAKGE
>BGC0001472_4 # 3472 # 4665 # 1 # ID=1_4;partial=00;start_type=GTG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.642
MAKAKFERTKPHVNIGTIGHIDHGKTTLTAAITKVLHDAYPDLNEASAFDQIDKAPEERQ
RGITISIAHVEYQTESRHYAHVDCPGHADYIKNMITGAAQMDGAILVVAATDGPMPQTKE
HVLLARQVGVPYIVVALNKADMVDDEEILELVELEVRELLSEYEFPGDDLPVVKVSALKA
LEGDAEWGQTVLDLMKAVDESIPQPERDVEKPFLMPIEDVFTITGRGTVVTGRIERGVLK
VNETVDIVGIKTEKTTTTVTGIEMFRKLLDEGQAGENVGLLLRGIKREDVERGQVIIKPG
SVTPHTEFQAQAYILSKDEGGRHTPFFNNYRPQFYFRTTDVTGVVTLPEGTEMVMPGDNT
LMDVALIQPVAMEEGLKFAIREGGRTVGAGQVTKITK
>BGC0001472_5 # 4869 # 5570 # -1 # ID=1_5;partial=00;start_type=ATG;rbs_motif=GGAGG;rbs_spacer=5-10bp;gc_cont=0.712
MRNDVTSMTAVLEGFTSRTPTSDGLAAERRPVPFADSVPVEPQPSAEDLRPVHDLRGTLE
RRRSSLHYAPLPVRTDVILSLLRDVLRRDRDDWGLDASAGALEGFVFAFRSEGAEPGLYR
VTAEETCYLAGLDEIGPAENLGVQREFSTGAGIVALYASLDRADTWAGSHGYRISALRAS
MATYDLNLRCQALGLVGTLFGGFVPSSVHHLVHSDGATRHSLLATTYARPPES
>BGC0001472_6 # 5567 # 7195 # -1 # ID=1_6;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.709
MVAEMKAEQIGRAARTDMQLTVPARPVLRRGVRLRRAGESVVLDGADRAQVFSGAFAREG
LVPLTEACDGTRDHTELALKTGFDEATVYKCLALLSTAGAVEEAMSGEEPDVTPEWAVFL
SRLGNSTGSNPSWADAAARLVSRSVRLEGDAALVAGARRSLREVCPVVTEPAGPPGPGDE
LTVFFETPASAPLLAATEERCRQDGRPLLRVRADARTITIGPYADLSITPCLDCGRHGEA
DLSGEPPEYLHDLVVGLASHHVTALLARATISHLPGDFTVIDTATLSTVYRPVAVRPGCP
RCSYARGPVAPQAPAGAVYEASVAMPPRAFLAPKDHQAHYYASNLRLQSQFKDWPSRPHT
PLPALDISVLAGSERHDPSHGDTPLTLSSLGLLLKVAFGVKEDETTPERVKRWTAASGNI
GSTTAYAVVRDDRIMPPGVYAYAQGSHTLVTVSGEVPPGDSPCDIIITGDLKKVMTKYGT
FGFRLVFLDAGCNLASLRELAQHLGLGFTPRSDWDDDALARLLGTSPADEPVAAFASLGG
TA
>BGC0001472_7 # 7210 # 7821 # -1 # ID=1_7;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.712
MSHDPRPQCLYLVGDTFSRRLTEHRGVPPELQVSFEDFLNDTAPHADVVVPVHAGGDPGL
RDETDRICAERSTPSVGLQLLPTKVLCGPVVVPGRTACYACYRKRAAQHAGTARPYDMDA
ALSGLPEGFGRQHLSVASGLLDLALTEIATGVTGIGGTVRTFNLVSGAVSSAVTVSVNRC
PRCGGRFSQARADSAMPVPELLR
>BGC0001472_8 # 7845 # 9191 # -1 # ID=1_8;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=11-12bp;gc_cont=0.709
MHLNRPQEHISAELRGLEELVSPYGLVSRTAPLPVREGEPPFAVQLAYLGVPSRALPNLR
TWAHDEDTGNSDGAGTGLTPERAKLVSIAEALERYSTCAWDDDEMVVAAENDLTEEFVSP
SRWPSCSPTELARDDCSLSAYDPSVPIRWVRAWSLTRRIPVLVPAISVYLHMPYQSKSEE
FIRGITTGAAVHSDVRSAVLGGLLEVVERDAIALVWLQQLRLPELVVDPARLDAGVRELH
RVGTSTDLRVRLFDATTDFGVPVIYAVQLSDADPALAQIVAATCDVHPEQALGKIYRELA
SLRVALRGYLSAYAGREPDPAKVSVVGGAVHNATRDRRDVFGFLLDGERPAYGLEGMPGL
PAGADPLDTVVARLAARGAEVLVTDITTDEARQVGMRAVKVLVPEAMPVSFVHGERYLGT
PRLYDAPRAMGHTSHAEDAVNPVQQPFA
>BGC0001472_9 # 9238 # 10437 # -1 # ID=1_9;partial=00;start_type=GTG;rbs_motif=AGxAG;rbs_spacer=5-10bp;gc_cont=0.621
MTQITLEPGFLLLISLSYGRLQDHVTARLAPAEISGVSFVHLFATIPQPVGSKYNDTFAP
LIRELFAPERVGGAGGHGPYYFVRTQDAQLGTDTLQISIEGVSDEDSTRADLHRTAERYG
CAAQVDATPLDSVPSPLWNAGFTGTGFSASSKRLFQEAAPTLVSFLNRAAETPQSPPPAL
GAIRLMAAHTRATLLRSPQREIDGYEFRELLSLRLLSYRSHFEAIYLRTKDPQSFDAACA
RFYEQVGAGVREFITACGDPDDDPADEMVRLWTKSITSESSHLAENFSDGSVVNAGHTLE
DLVRKRGAPVEPTRFHTPPSPELDRLMHRDADFLAFRLQTSLLYSCLYTLGFSLAERYVF
CYVVARANEDVCGKSMKELQDELDGLARSMASGSTKTAE
>BGC0001472_10 # 10511 # 10654 # 1 # ID=1_10;partial=00;start_type=ATG;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;gc_cont=0.590
MEQQIELDVLEISDLIAGAGENDDLAQVMAASCTTTSVSTSSSSSSS
>BGC0001472_11 # 10977 # 13634 # 1 # ID=1_11;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.701
MGVNISPYVVYRRSRLPLGELGGMSFTTAWSRIDELHALRDEIGKNAVGLADRLGELVPT
LGDDVRADLIRLRRDVHNLRHDRAVARLEPLRPHLGREVVDEVETWCALGVRAEQCERAG
REELESEKARAADGFGALFEHDAMARSIQLSGDRLYRGLRDLVAGDEASALKPSKARLRE
SSLVNFAYRASLKPSPFGRFTEIGAFPPDDPRPADPGGRHGGTQESVTTLNRLLVNWGPP
GLPLVPGGMEPGHLVLNSTLRAGTEYVEYVGVAPGSREDGRMATERVLRVRREGLFDALL
AAMPEGSAPAATVLRDLTAVTGKAETSRKVVQGLIRAGILFFRPEIDDHDPDYSMKLDRV
LAAGGTPETAALRGHFSELRRLETDFSEAAADERQKLLDSAYAAIGGIAELCKVSPPPEE
VLKSPVFEDTPASTAPQAWNLPTVEGSIPALTGLWRLASMMDNGQVKRLGLYSFATRVLG
DRSTMPFLEFFQAFSSLTDQEQVDVFMGRDVEEAERYTRQRAEALRTIRQRLVPGDGTVH
LDPSVIEKACEGVEDLLDTESVTFRAQFAQGVLPDRDRTLVVNGLLTGYGVYFSRFGSFV
EGTDEWSLPAAQREHLARRFPGQVDLNSVLGFNFNLHPSVTRRVVNYPGAVSLGAERTVY
GLARLEVRADQATRSLRLWDPEAQETLDLVPMNFMTPIGVPLLYRLLEALSPSNRYLWKP
LDDIRDAGGPTVYGETAPRLVVGDVVADRRSWNVAAAEIPMLQDLSRDVPEALVAFDAWR
LTRGLPRHAFVLCQTPEERDVMAGRSRKVTRQWADYAHLRRASVHKPMYVDFRNPFLVRS
FAKSALSRGDVVASIRECLPSVDDYGPDTGWTAAEEFFVELCTDN
>BGC0001472_12 # 13612 # 14571 # 1 # ID=1_12;partial=00;start_type=TTG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.706
MNCVPTTSGQTGTREWRTVHIHVPHSLHTPFLCDVVEPLLRSEGLQDHFFFLRYWQGGPH
LRLRMLCGPGAGSAEAAERVVAGLARAMPEFGAQAREEYALGLTLQDELARLEKETSEEG
RPIGALDRVAYEPEYRKYGGTEGLQIAETVFRKSSVAVLGLLGGQPRAWVDERRAPIGEA
ARIMAMFLHGAGLDPRAAGLFLREYEDWWRTYAPDDMQRAWPKLFGGVSAQMTNLCAAVW
RDGATDVFHDISAEAAARARSVCGAEPGGDVRDLRLDGTPYPGCLSNYVHTTNNRLGLVP
AAEGLVAYLVRRGLEAMDG
>BGC0001472_13 # 14692 # 15894 # -1 # ID=1_13;partial=00;start_type=GTG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.685
MTDRQDSAYPYPRTCPLHPPKEYASLRAEQPITKVTLASGRTAWLLTRHEHIRQLLADPH
VSSNLAHPGYPLHFDAPPEVMEQMRPVLLAMDPPVHTAQRKMVIPEFTVKRVLQLRPRVE
EIVDECISSMLAGEGPADLVEALALPVPSLVICELLGVPRSDRAFFQDRTNKLVSVDADP
QERNSAHQELHAYFSELVTAQEADPGDDLLGRLVVKNRETGTFDHGELVGMANVLLVGGH
ETTANMISLGVVGLLENPDQLAKLRADPGLAPQAVDELLRYFSIADQVTSRVATADLEIG
GVLIRAGEGVIGLSASGNHDEAVFPDPDRLDIERGGRHHLAFGHGIHQCIGQNLAKLELE
VVFNALLARIPGLKLATPVAELPFKDSMGVYGLHKLPVSW
>BGC0001472_14 # 16220 # 16564 # 1 # ID=1_14;partial=00;start_type=ATG;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;gc_cont=0.626
MYLSIVMWDLKKSEATVESLREYLRDYAVDAYSALDGMRLKAWFSDSARQLWGAVYLWDS
PEQMPGLYKVSRVIDLIGYPPTSVGGFTLEATAEGKSVHETLAGLGIALEGGTQ
>BGC0001472_15 # 17019 # 17729 # 1 # ID=1_15;partial=00;start_type=GTG;rbs_motif=3Base/5BMM;rbs_spacer=13-15bp;gc_cont=0.636
MLIEDIEPLLQSIRAGVEFIEIYGLDTVPVPDSLLAECERRRIPVRLLAASVANQVFKTE
KKPKVFGIAKVPRPRRLSDLSDMTGDLILLDGVKIVGNIGAIVRTSFALGASGIVLVDSD
LGSIADRRLIRASRGYVFSLPIVLASRAEALQYFQDNAMRPVVFEADGDLGVADLDGMDE
RLVLMFGSERIGPSGEFSDIAAKSVSIPMNPAAESLNVSVSAGIALHARARRNLSR
>BGC0001472_16 # 17815 # 19485 # -1 # ID=1_16;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.756
ALLGLRPFTPWEVSVAELGPDHRAEVNVLAADGRRVELIFLNTAMHTGRHRLGLPSLWQD
RRLVLRTVVADGSPLRRAGSYTYDGLVGVLTGLMESYRPTVVHTLDPDPDIQHSTEAVRR
RDSEQPGYSDHADHTAAACFAWAAMIRWVARATADGGRIPGFVTVAYRGYYNRHWPKNLP
QGVLARKAAHLVPYGGSPDWDCGNPSGCGDYNVGGDRPLTNRKGWVRSTHHRYPGTRTVL
TAEPDGRLAAYAVLGLRVVRWQETGPGSGAWGPPHDLGGGPLAPALGSATTRDGRLLLFG
LRFAALGGHGADNEREIVVLEQSAPGRGFRPWRGLGSPSPGRDEVRRTGVPVAVAAPDGQ
IHLFVRDAEKGVSTRVRDGAGRWSAWRDMGGGEVQDGLHTAVDEGGRVHVFGAGHHAVHH
WTQDTPSAGLTARTQLTAAPVPAHAPAALPAPDGSVSLYYRAAAGSGLTTARAGTAVPGA
RFDGYXXVDAAPSPRGPVLLGRTAEGLVQLLMGGGLHVRTDGPAALDGASLRLGPDGRPS
VAGLGPDAAPWMWRPR
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment