From cb023cbb30bfab95847743155c0ee90f51170966 Mon Sep 17 00:00:00 2001 From: Nikos Marinos <nmarinos@ebi.ac.uk> Date: Wed, 6 Mar 2019 21:08:01 +0000 Subject: [PATCH] massive refactor of pdfconversion to accommodate knex.destroy --- scripts/seed.js | 5 +- scripts/shared-data-model.sql | 2 + server/pdf-conversion/api.js | 208 +++++++++++++++++++--------------- 3 files changed, 121 insertions(+), 94 deletions(-) diff --git a/scripts/seed.js b/scripts/seed.js index a6e7b5b9d..869afd164 100755 --- a/scripts/seed.js +++ b/scripts/seed.js @@ -16,7 +16,7 @@ async function seed() { let counter = 0 /* eslint-disable no-await-in-loop */ /* eslint-disable no-restricted-syntax */ - users.forEach(async user => { + for (const user of users) { let i = 0 for (const identity of user.identities) { const exists = await Identity.findByField('email', identity.email) @@ -37,7 +37,7 @@ async function seed() { if (counter === users.length) process.exit() }) } - }) + } } catch (e) { logger.warn('Could not load any seeds', e) } @@ -48,5 +48,6 @@ async function seed() { ;(async () => { const beforeUpdate = Date.now() await seed() + // User.knex().destroy() logger.info(`Seeding was finished in ${Date.now() - beforeUpdate} ms`) })() diff --git a/scripts/shared-data-model.sql b/scripts/shared-data-model.sql index 43dbe116e..5fdcfbaed 100755 --- a/scripts/shared-data-model.sql +++ b/scripts/shared-data-model.sql @@ -105,6 +105,8 @@ CREATE TABLE file ( updated_by UUID NOT NULL ); +CREATE INDEX file_manid_idx ON file (manuscript_id) ; + -- user is a reserved word so we use users instead CREATE TABLE users ( id UUID DEFAULT uuid_generate_v4() PRIMARY KEY, diff --git a/server/pdf-conversion/api.js b/server/pdf-conversion/api.js index e1f513738..4355ee065 100644 --- a/server/pdf-conversion/api.js +++ b/server/pdf-conversion/api.js @@ -13,7 +13,14 @@ const Manuscript = require('../xpub-model/entities/manuscript/data-access') const pubsweetServer = config.get('pubsweet-server.baseUrl') const pdfTransformerApi = config.get('ncbiPdfTransformerApi') -getDeposits() +;(async () => { + const beforeUpdate = Date.now() + await getDeposits() + Manuscript.knex().destroy() + logger.info( + `PDF conversion process finished in ${Date.now() - beforeUpdate} ms`, + ) +})() // runMonitor() @@ -36,136 +43,149 @@ function sleep(ms) { async function getDeposits() { logger.info('Polling our database for manuscripts to be converted to PDF...') - Manuscript.findByFieldEager( + await Manuscript.findByFieldEager( 'pdf_deposit_state', 'WAITING_FOR_PDF_CONVERSION', 'files', - ).then(resultSet => { - resultSet.map(manuscript => deposit(manuscript)) + ).then(async resultSet => { + for (const manuscript of resultSet) { + await deposit(manuscript) + } }) logger.info('Polling NCBI for replies from the PDF conversion service...') const manuscript = new Manuscript() const pdfDepositStates = ['COMMITTED', 'WAITING_FOR_RESULT'] - manuscript + await manuscript .findByFieldValuesIn('pdf_deposit_state', pdfDepositStates, 'files') - .then(resultSet => { - resultSet.forEach(manuscript => { + .then(async resultSet => { + for (const manuscript of resultSet) { logger.info(`Check status of conversion: ${manuscript.id}`) - ncbiApiCall( + await ncbiApiCall( '/deposits', 'GET', null, `/${manuscript.pdfDepositId}`, - ).then(ncbiResponse => { - processDeposit(manuscript, ncbiResponse) + ).then(async ncbiResponse => { + await processDeposit(manuscript, ncbiResponse) }) - }) + } }) } function getFile(url, dest, cb) { - const file = fs.createWriteStream(dest) - https - .get(url, response => { - response.pipe(file) - file.on('finish', () => { - file.close(cb) // close() is async, call cb after close completes. + return new Promise((resolve, reject) => { + const file = fs.createWriteStream(dest) + https + .get(url, response => { + const stream = response.pipe(file) + stream.on('finish', () => { + resolve(true) + }) + /* + + await response.pipe(file) + await file.on('finish', async () => { + await file.close(cb) // close() is async, call cb after close completes. }) - }) - .on('error', err => { - // Handle errors - fs.unlink(dest) // Delete the file async. (But we don't check the result) - if (cb) cb(err.message) - }) +*/ + }) + .on('error', err => { + // Handle errors + fs.unlink(dest) // Delete the file async. (But we don't check the result) + reject(err) + }) + }) } -function uploadToMinio(filename, manuscript, filePath, item_size, item) { +async function uploadToMinio(filename, manuscript, filePath, item_size, item) { const Role = require('../xpub-model/entities/role/data-access') - Role.findByFieldEager('name', 'admin', 'users') - .then(roles => { - const adminUser = roles[0].users[0] // get the first available admin - minioClient.uploadFile( - filename, - `${manuscript.id}.pdf`, - 'application/pdf', - filePath, - (error, etag) => { - if (error) { - fs.unlink(filePath, () => { - manuscript.pdfDepositState = 'RESULT_FAILED_TO_BE_RETRIEVED' - new Manuscript(manuscript).save() - }) - logger.error(error) - } else { - logger.info('PDF file uploaded to minio') - fs.unlink(filePath, () => { - const pdf4print = 'pdf4print' - - // delete it if already exists - manuscript.files = manuscript.files.filter( - file => file.type !== pdf4print, - ) - const pdfFile = { - manuscriptId: manuscript.id, - url: `/download/${filename}`, - size: item_size, - filename: item, - type: pdf4print, - label: 'PDF Conversion', - // todo: get admin user - updatedBy: adminUser.id, - } - manuscript.files.push(pdfFile) - manuscript.pdfDepositState = 'RESULT_RETRIEVED' - new Manuscript(manuscript).save().then(() => { - logger.info( - `PDF retrieved and saved successfully for manuscript ${ - manuscript.id - }`, - ) - }) - }) - } - }, + const roles = await Role.findByFieldEager('name', 'admin', 'users') + const adminUser = roles[0].users[0] // get the first available admin + await minioClient + .uploadFile( + filename, + `${manuscript.id}.pdf`, + 'application/pdf', + filePath, + async (error, etag) => { + if (error) { + await fs.unlink(filePath, () => { + manuscript.pdfDepositState = 'RESULT_FAILED_TO_BE_RETRIEVED' + new Manuscript(manuscript).save() + }) + logger.error(error) + } + }, + ) + .then(async () => { + logger.info('PDF file uploaded to minio') + // todo: need to delete this but minio still needs it + // await fs.unlink(filePath, async () => { + const pdf4print = 'pdf4print' + + // delete it if already exists + manuscript.files = manuscript.files.filter( + file => file.type !== pdf4print, ) + const pdfFile = { + manuscriptId: manuscript.id, + url: `/download/${filename}`, + size: item_size, + filename: item, + type: pdf4print, + label: 'PDF Conversion', + mimeType: 'application/pdf', + updatedBy: adminUser.id, + } + manuscript.files.push(pdfFile) + manuscript.pdfDepositState = 'RESULT_RETRIEVED' + await new Manuscript(manuscript).save().then(() => { + logger.info( + `PDF retrieved and saved successfully for manuscript ${ + manuscript.id + }`, + ) + }) }) - .catch(error => { - logger.error('Error while getting admin user', error) + .then(() => { + logger.debug('fulfilled') }) + // }) } -function processDeposit(manuscript, deposit) { +async function processDeposit(manuscript, deposit) { logger.info(`NCBI conversion status for : ${manuscript.id}: ${deposit.state}`) if (deposit.state === 'RESULT_IS_READY') { logger.info(`PDF conversion is ready: ${manuscript.id}. Fetching...`) const pdfItem = deposit.items.filter(item => item.result)[0] const { item, s3_get_url, item_size } = pdfItem - const uuidv = uuidv4() + const uuidv = await uuidv4() const filename = `${uuidv}.pdf` const filePath = `/tmp/${filename}` - getFile(s3_get_url, filePath, err => { - if (err) { + await getFile(s3_get_url, filePath) + .then( + async () => + await uploadToMinio(filename, manuscript, filePath, item_size, item), + ) + .catch(async error => { manuscript.pdfDepositState = 'RESULT_FAILED_TO_BE_RETRIEVED' - manuscript.formState = err.toString() + manuscript.formState = error.toString() if (manuscript.status === 'tagging') { manuscript.status = 'xml-triage' } - new Manuscript(manuscript).save() - logger.err(err) - } else { - uploadToMinio(filename, manuscript, filePath, item_size, item) - } - }) + await new Manuscript(manuscript).save() + logger.error(error) + }) } else if (deposit.state === 'NO_RESULT') { manuscript.pdfDepositState = deposit.state manuscript.formState = deposit.details if (manuscript.status === 'tagging') { manuscript.status = 'xml-triage' } - new Manuscript(manuscript).save() + await new Manuscript(manuscript).save() logger.error( `Error getting result from PDF conversion service: ${deposit.details}`, ) @@ -186,7 +206,7 @@ function storeIds(manuscript, depositId, status) { return new Manuscript(manuscript).save() } -function deposit(manuscript) { +async function deposit(manuscript) { if (manuscript.files) { logger.info(`Start depositing ${manuscript.id}`) const depositObj = { @@ -208,16 +228,20 @@ function deposit(manuscript) { }) }) - ncbiApiCall('/deposits/', 'POST', depositObj).then(ncbiResponse => { - logger.info(ncbiResponse) - storeIds(manuscript, ncbiResponse.deposit_id, ncbiResponse.state).then( - databaseResponse => { + await ncbiApiCall('/deposits/', 'POST', depositObj).then( + async ncbiResponse => { + logger.info(ncbiResponse) + await storeIds( + manuscript, + ncbiResponse.deposit_id, + ncbiResponse.state, + ).then(databaseResponse => { logger.info( `Deposit done. Stored depositId: ${ncbiResponse.deposit_id}`, ) - }, - ) - }) + }) + }, + ) } else { logger.info(`No files to deposit for manuscript ${manuscript.id}`) } -- GitLab