From b77797dc6601e564640aadb299d47653726727d5 Mon Sep 17 00:00:00 2001
From: Yogmatee <yroochun@ebi.ac.uk>
Date: Fri, 25 Jun 2021 12:39:55 +0100
Subject: [PATCH] #1168: Validate compressed file before extracting

---
 server/ftp-integration/api.js | 129 +++++++++++++++++++++-------------
 1 file changed, 81 insertions(+), 48 deletions(-)

diff --git a/server/ftp-integration/api.js b/server/ftp-integration/api.js
index 1db96616e..f9302f8e5 100644
--- a/server/ftp-integration/api.js
+++ b/server/ftp-integration/api.js
@@ -66,7 +66,9 @@ const tidyUp = async (filePath, tmpPath, manId, isError) => {
     const datedFolder = `${parentDir}/${currentDate}`
     const updatedFilepath = `${datedFolder}/${fileName}.${fileNameExt}${dateExt}`
 
-    const cmd = `mkdir -p ${datedFolder}; mv ${filePath} ${updatedFilepath} ; rm -rf ${tmpPath}`
+    let cmd = `mkdir -p ${datedFolder}; mv ${filePath} ${updatedFilepath}`
+    if (tmpPath) cmd = cmd.concat(`; rm -rf ${tmpPath}`)
+
     return new Promise((resolve, reject) => {
       exec(cmd, (err, stdout, stderr) => {
         if (err) {
@@ -110,10 +112,9 @@ const queryIdentity = async path => {
 const queue = async.queue(async taskPath => {
   const submitter = await queryIdentity(taskPath)
   if (submitter) {
-    logger.info(`Submitter Details: ${submitter}`)
     logger.info(`Initialising job for file: ${taskPath}`)
-    await runProcess(taskPath, submitter)
-    logger.info(`Job complete for file: ${taskPath}`)
+    runProcess(taskPath, submitter)
+    //logger.info(`Job complete for file: ${taskPath}`)
     return true
   }
   logger.error(`Submitter with details ${submitter} not found`)
@@ -170,56 +171,88 @@ watcher.on('change', (path, stats) => {
   if (stats) logger.debug(`File ${path} changed size to ${stats.size}`)
 })
 
-const runProcess = async (packagePath, submitter) => {
+const runProcess = (packagePath, submitter) => {
   let packageName
   let tmpPath
   let manuscriptId
   let suffixedFilePath
-  try {
-    logger.info(`Starting Bulk Import Process for ${packagePath}`)
-    packageName = path.basename(packagePath)
-    tmpPath = await createTempDir()
-    const tmpPathExt = await extractFiles(packagePath, tmpPath, submitter)
-    // rename file - append .start suffix
-    suffixedFilePath = await fileUtils.renameFileSuffix(packagePath)
-    const parsedInfo = await parseManifest(tmpPathExt, packageName)
-
-    const xmlObj = await parseXml(parsedInfo.metaXml, packageName)
-    manuscriptId = await createManuscript(
-      xmlObj,
-      parsedInfo,
-      tmpPath,
-      submitter,
-      packageName,
-      suffixedFilePath,
-    )
-    const updatedFilepath = await tidyUp(
-      suffixedFilePath,
-      tmpPath,
-      manuscriptId,
-    )
-    await updateFTP(updatedFilepath, submitter)
-    return true
-  } catch (error) {
-    if (submitter) {
-      bulkUploaderEmail(submitter.email, error, packageName)
-    }
-    errorDevEmail(
-      'the FTP utility',
-      `Error with ftp-integration - bulk upload<br/>Package: ${packageName}<br/>${error}`,
-    )
-    logger.error('Error with ftp-integration - api.js', error.stack)
-    const updatedFilepath = await tidyUp(
-      suffixedFilePath,
-      tmpPath,
-      manuscriptId,
-      true,
-    )
-    await updateFTP(updatedFilepath, submitter)
-    return true
-  }
+
+  // verify if tar.gz id corrupted
+  validateCompressedFile(packagePath, submitter)
+    .then(async () => {
+      try {
+        logger.info(`Starting Bulk Import Process for ${packagePath}`)
+        packageName = path.basename(packagePath)
+        tmpPath = await createTempDir()
+        const tmpPathExt = await extractFiles(packagePath, tmpPath, submitter)
+        // rename file - append .start suffix
+        suffixedFilePath = await fileUtils.renameFileSuffix(packagePath)
+        const parsedInfo = await parseManifest(tmpPathExt, packageName)
+
+        const xmlObj = await parseXml(parsedInfo.metaXml, packageName)
+        manuscriptId = await createManuscript(
+          xmlObj,
+          parsedInfo,
+          tmpPath,
+          submitter,
+          packageName,
+          suffixedFilePath,
+        )
+        const updatedFilepath = await tidyUp(
+          suffixedFilePath,
+          tmpPath,
+          manuscriptId,
+        )
+        await updateFTP(updatedFilepath, submitter)
+      } catch (error) {
+        if (submitter) {
+          bulkUploaderEmail(submitter.email, error, packageName)
+        }
+        errorDevEmail(
+          'the FTP utility',
+          `Error with ftp-integration - bulk upload<br/>Package: ${packageName}<br/>${error}`,
+        )
+        logger.error('Error with ftp-integration - api.js', error.stack)
+        const updatedFilepath = await tidyUp(
+          suffixedFilePath,
+          tmpPath,
+          manuscriptId,
+          true,
+        )
+        await updateFTP(updatedFilepath, submitter)
+        return true
+      }
+    })
+    .catch(error => {
+      // tar.gz id corrupted
+      if (submitter) {
+        bulkUploaderEmail(submitter.email, error, packageName)
+      }
+      errorDevEmail(
+        'the FTP utility',
+        `Error with ftp-integration - bulk upload<br/>Package: ${packageName}<br/>${error}`,
+      )
+      logger.error('Error with ftp-integration - api.js', error.stack)
+    })
+  //return true
 }
 
+const validateCompressedFile = (source, submitter) =>
+  new Promise((resolve, reject) => {
+    exec(`gunzip -t ${source}`, async err => {
+      if (err) {
+        logger.error(
+          `FTP bulk upload file: ${source} is corrupted and cannot be processed.`,
+        )
+        const updatedFilepath = await tidyUp(source, '', '', true)
+        logger.info(`File ${source} has been moved to ${updatedFilepath}`)
+        await updateFTP(updatedFilepath, submitter)
+        reject(err)
+      }
+      resolve(true)
+    })
+  })
+
 const extractFiles = (source, dest, submitter) =>
   new Promise((resolve, reject) => {
     const cmd = `tar vzxf ${source} -C ${dest} --xform='s#^.+/##x'` // the xform removes all directories
-- 
GitLab