Commit d0cce0c4 authored by carlosribas's avatar carlosribas
Browse files

Use tempfile to export ids

parent 990169bc
......@@ -133,30 +133,25 @@ def export_search_results(query, _format, hits):
JSON requires special treatment in order to concatenate
multiple batches
"""
filename = os.path.join(EXPORT_RESULTS_DIR,
'%s.%s.gz' % (job.id, _format))
filename = os.path.join(EXPORT_RESULTS_DIR, '%s.%s.gz' % (job.id, _format))
start = 0
page_size = 100 # max EBI search page size
if _format in ['json', 'list']:
archive = gzip.open(filename, 'wb')
if _format == 'json':
archive = gzip.open(filename, 'wb')
archive.write(b'[')
if _format == 'fasta':
if _format in ['fasta', 'list']:
f = tempfile.NamedTemporaryFile(delete=True, dir=EXPORT_RESULTS_DIR)
while start < hits:
max_end = start + page_size
end = min(max_end, hits)
rnacentral_ids = get_results_page(start, end)
if _format == 'fasta':
if _format in ['fasta', 'list']:
# write out RNAcentral ids to a temporary file
for _id in rnacentral_ids:
line = str.encode(_id+'\n')
f.write(line)
if _format == 'list':
text = str.encode('\n'.join(rnacentral_ids) + '\n')
archive.write(text)
if _format == 'json':
text = format_output(rnacentral_ids)
archive.write(text)
......@@ -169,6 +164,19 @@ def export_search_results(query, _format, hits):
# job.save_meta()
job.save()
if _format == 'list':
f.flush()
os.fsync(f.fileno())
cmd = 'cat {temp_file} | gzip > {output}'.format(temp_file=f.name, output=filename)
process = sub.Popen(cmd, stdout=sub.PIPE, stderr=sub.PIPE, shell=True)
output, errors = process.communicate()
return_code = process.returncode
f.close()
if return_code != 0:
class ListError(Exception):
"""Raise when exits with a non-zero status"""
pass
raise ListError(errors, output + b'\n' + cmd.encode(), return_code)
if _format == 'json':
archive.write(b']')
archive.close()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment