From cb023cbb30bfab95847743155c0ee90f51170966 Mon Sep 17 00:00:00 2001
From: Nikos Marinos <nmarinos@ebi.ac.uk>
Date: Wed, 6 Mar 2019 21:08:01 +0000
Subject: [PATCH] massive refactor of pdfconversion to accommodate knex.destroy

---
 scripts/seed.js               |   5 +-
 scripts/shared-data-model.sql |   2 +
 server/pdf-conversion/api.js  | 208 +++++++++++++++++++---------------
 3 files changed, 121 insertions(+), 94 deletions(-)

diff --git a/scripts/seed.js b/scripts/seed.js
index a6e7b5b9d..869afd164 100755
--- a/scripts/seed.js
+++ b/scripts/seed.js
@@ -16,7 +16,7 @@ async function seed() {
     let counter = 0
     /* eslint-disable no-await-in-loop */
     /* eslint-disable no-restricted-syntax */
-    users.forEach(async user => {
+    for (const user of users) {
       let i = 0
       for (const identity of user.identities) {
         const exists = await Identity.findByField('email', identity.email)
@@ -37,7 +37,7 @@ async function seed() {
           if (counter === users.length) process.exit()
         })
       }
-    })
+    }
   } catch (e) {
     logger.warn('Could not load any seeds', e)
   }
@@ -48,5 +48,6 @@ async function seed() {
 ;(async () => {
   const beforeUpdate = Date.now()
   await seed()
+  // User.knex().destroy()
   logger.info(`Seeding was finished in ${Date.now() - beforeUpdate} ms`)
 })()
diff --git a/scripts/shared-data-model.sql b/scripts/shared-data-model.sql
index 43dbe116e..5fdcfbaed 100755
--- a/scripts/shared-data-model.sql
+++ b/scripts/shared-data-model.sql
@@ -105,6 +105,8 @@ CREATE TABLE file (
     updated_by UUID NOT NULL
 );
 
+CREATE INDEX file_manid_idx ON file (manuscript_id) ;
+
 -- user is a reserved word so we use users instead
 CREATE TABLE users (
     id UUID DEFAULT uuid_generate_v4() PRIMARY KEY,
diff --git a/server/pdf-conversion/api.js b/server/pdf-conversion/api.js
index e1f513738..4355ee065 100644
--- a/server/pdf-conversion/api.js
+++ b/server/pdf-conversion/api.js
@@ -13,7 +13,14 @@ const Manuscript = require('../xpub-model/entities/manuscript/data-access')
 const pubsweetServer = config.get('pubsweet-server.baseUrl')
 const pdfTransformerApi = config.get('ncbiPdfTransformerApi')
 
-getDeposits()
+;(async () => {
+  const beforeUpdate = Date.now()
+  await getDeposits()
+  Manuscript.knex().destroy()
+  logger.info(
+    `PDF conversion process finished in ${Date.now() - beforeUpdate} ms`,
+  )
+})()
 
 // runMonitor()
 
@@ -36,136 +43,149 @@ function sleep(ms) {
 async function getDeposits() {
   logger.info('Polling our database for manuscripts to be converted to PDF...')
 
-  Manuscript.findByFieldEager(
+  await Manuscript.findByFieldEager(
     'pdf_deposit_state',
     'WAITING_FOR_PDF_CONVERSION',
     'files',
-  ).then(resultSet => {
-    resultSet.map(manuscript => deposit(manuscript))
+  ).then(async resultSet => {
+    for (const manuscript of resultSet) {
+      await deposit(manuscript)
+    }
   })
 
   logger.info('Polling NCBI for replies from the PDF conversion service...')
   const manuscript = new Manuscript()
   const pdfDepositStates = ['COMMITTED', 'WAITING_FOR_RESULT']
 
-  manuscript
+  await manuscript
     .findByFieldValuesIn('pdf_deposit_state', pdfDepositStates, 'files')
-    .then(resultSet => {
-      resultSet.forEach(manuscript => {
+    .then(async resultSet => {
+      for (const manuscript of resultSet) {
         logger.info(`Check status of conversion: ${manuscript.id}`)
-        ncbiApiCall(
+        await ncbiApiCall(
           '/deposits',
           'GET',
           null,
           `/${manuscript.pdfDepositId}`,
-        ).then(ncbiResponse => {
-          processDeposit(manuscript, ncbiResponse)
+        ).then(async ncbiResponse => {
+          await processDeposit(manuscript, ncbiResponse)
         })
-      })
+      }
     })
 }
 
 function getFile(url, dest, cb) {
-  const file = fs.createWriteStream(dest)
-  https
-    .get(url, response => {
-      response.pipe(file)
-      file.on('finish', () => {
-        file.close(cb) // close() is async, call cb after close completes.
+  return new Promise((resolve, reject) => {
+    const file = fs.createWriteStream(dest)
+    https
+      .get(url, response => {
+        const stream = response.pipe(file)
+        stream.on('finish', () => {
+          resolve(true)
+        })
+        /*
+
+      await response.pipe(file)
+      await file.on('finish', async () => {
+        await file.close(cb) // close() is async, call cb after close completes.
       })
-    })
-    .on('error', err => {
-      // Handle errors
-      fs.unlink(dest) // Delete the file async. (But we don't check the result)
-      if (cb) cb(err.message)
-    })
+*/
+      })
+      .on('error', err => {
+        // Handle errors
+        fs.unlink(dest) // Delete the file async. (But we don't check the result)
+        reject(err)
+      })
+  })
 }
 
-function uploadToMinio(filename, manuscript, filePath, item_size, item) {
+async function uploadToMinio(filename, manuscript, filePath, item_size, item) {
   const Role = require('../xpub-model/entities/role/data-access')
-  Role.findByFieldEager('name', 'admin', 'users')
-    .then(roles => {
-      const adminUser = roles[0].users[0] // get the first available admin
-      minioClient.uploadFile(
-        filename,
-        `${manuscript.id}.pdf`,
-        'application/pdf',
-        filePath,
-        (error, etag) => {
-          if (error) {
-            fs.unlink(filePath, () => {
-              manuscript.pdfDepositState = 'RESULT_FAILED_TO_BE_RETRIEVED'
-              new Manuscript(manuscript).save()
-            })
-            logger.error(error)
-          } else {
-            logger.info('PDF file uploaded to minio')
-            fs.unlink(filePath, () => {
-              const pdf4print = 'pdf4print'
-
-              // delete it if already exists
-              manuscript.files = manuscript.files.filter(
-                file => file.type !== pdf4print,
-              )
-              const pdfFile = {
-                manuscriptId: manuscript.id,
-                url: `/download/${filename}`,
-                size: item_size,
-                filename: item,
-                type: pdf4print,
-                label: 'PDF Conversion',
-                // todo: get admin user
-                updatedBy: adminUser.id,
-              }
-              manuscript.files.push(pdfFile)
-              manuscript.pdfDepositState = 'RESULT_RETRIEVED'
-              new Manuscript(manuscript).save().then(() => {
-                logger.info(
-                  `PDF retrieved and saved successfully for manuscript ${
-                    manuscript.id
-                  }`,
-                )
-              })
-            })
-          }
-        },
+  const roles = await Role.findByFieldEager('name', 'admin', 'users')
+  const adminUser = roles[0].users[0] // get the first available admin
+  await minioClient
+    .uploadFile(
+      filename,
+      `${manuscript.id}.pdf`,
+      'application/pdf',
+      filePath,
+      async (error, etag) => {
+        if (error) {
+          await fs.unlink(filePath, () => {
+            manuscript.pdfDepositState = 'RESULT_FAILED_TO_BE_RETRIEVED'
+            new Manuscript(manuscript).save()
+          })
+          logger.error(error)
+        }
+      },
+    )
+    .then(async () => {
+      logger.info('PDF file uploaded to minio')
+      // todo: need to delete this but minio still needs it
+      // await fs.unlink(filePath, async () => {
+      const pdf4print = 'pdf4print'
+
+      // delete it if already exists
+      manuscript.files = manuscript.files.filter(
+        file => file.type !== pdf4print,
       )
+      const pdfFile = {
+        manuscriptId: manuscript.id,
+        url: `/download/${filename}`,
+        size: item_size,
+        filename: item,
+        type: pdf4print,
+        label: 'PDF Conversion',
+        mimeType: 'application/pdf',
+        updatedBy: adminUser.id,
+      }
+      manuscript.files.push(pdfFile)
+      manuscript.pdfDepositState = 'RESULT_RETRIEVED'
+      await new Manuscript(manuscript).save().then(() => {
+        logger.info(
+          `PDF retrieved and saved successfully for manuscript ${
+            manuscript.id
+          }`,
+        )
+      })
     })
-    .catch(error => {
-      logger.error('Error while getting admin user', error)
+    .then(() => {
+      logger.debug('fulfilled')
     })
+  // })
 }
 
-function processDeposit(manuscript, deposit) {
+async function processDeposit(manuscript, deposit) {
   logger.info(`NCBI conversion status for : ${manuscript.id}: ${deposit.state}`)
   if (deposit.state === 'RESULT_IS_READY') {
     logger.info(`PDF conversion is ready: ${manuscript.id}. Fetching...`)
     const pdfItem = deposit.items.filter(item => item.result)[0]
     const { item, s3_get_url, item_size } = pdfItem
 
-    const uuidv = uuidv4()
+    const uuidv = await uuidv4()
     const filename = `${uuidv}.pdf`
     const filePath = `/tmp/${filename}`
-    getFile(s3_get_url, filePath, err => {
-      if (err) {
+    await getFile(s3_get_url, filePath)
+      .then(
+        async () =>
+          await uploadToMinio(filename, manuscript, filePath, item_size, item),
+      )
+      .catch(async error => {
         manuscript.pdfDepositState = 'RESULT_FAILED_TO_BE_RETRIEVED'
-        manuscript.formState = err.toString()
+        manuscript.formState = error.toString()
         if (manuscript.status === 'tagging') {
           manuscript.status = 'xml-triage'
         }
-        new Manuscript(manuscript).save()
-        logger.err(err)
-      } else {
-        uploadToMinio(filename, manuscript, filePath, item_size, item)
-      }
-    })
+        await new Manuscript(manuscript).save()
+        logger.error(error)
+      })
   } else if (deposit.state === 'NO_RESULT') {
     manuscript.pdfDepositState = deposit.state
     manuscript.formState = deposit.details
     if (manuscript.status === 'tagging') {
       manuscript.status = 'xml-triage'
     }
-    new Manuscript(manuscript).save()
+    await new Manuscript(manuscript).save()
     logger.error(
       `Error getting result from PDF conversion service: ${deposit.details}`,
     )
@@ -186,7 +206,7 @@ function storeIds(manuscript, depositId, status) {
   return new Manuscript(manuscript).save()
 }
 
-function deposit(manuscript) {
+async function deposit(manuscript) {
   if (manuscript.files) {
     logger.info(`Start depositing ${manuscript.id}`)
     const depositObj = {
@@ -208,16 +228,20 @@ function deposit(manuscript) {
         })
       })
 
-    ncbiApiCall('/deposits/', 'POST', depositObj).then(ncbiResponse => {
-      logger.info(ncbiResponse)
-      storeIds(manuscript, ncbiResponse.deposit_id, ncbiResponse.state).then(
-        databaseResponse => {
+    await ncbiApiCall('/deposits/', 'POST', depositObj).then(
+      async ncbiResponse => {
+        logger.info(ncbiResponse)
+        await storeIds(
+          manuscript,
+          ncbiResponse.deposit_id,
+          ncbiResponse.state,
+        ).then(databaseResponse => {
           logger.info(
             `Deposit done. Stored depositId: ${ncbiResponse.deposit_id}`,
           )
-        },
-      )
-    })
+        })
+      },
+    )
   } else {
     logger.info(`No files to deposit for manuscript ${manuscript.id}`)
   }
-- 
GitLab