From b77797dc6601e564640aadb299d47653726727d5 Mon Sep 17 00:00:00 2001 From: Yogmatee <yroochun@ebi.ac.uk> Date: Fri, 25 Jun 2021 12:39:55 +0100 Subject: [PATCH] #1168: Validate compressed file before extracting --- server/ftp-integration/api.js | 129 +++++++++++++++++++++------------- 1 file changed, 81 insertions(+), 48 deletions(-) diff --git a/server/ftp-integration/api.js b/server/ftp-integration/api.js index 1db96616e..f9302f8e5 100644 --- a/server/ftp-integration/api.js +++ b/server/ftp-integration/api.js @@ -66,7 +66,9 @@ const tidyUp = async (filePath, tmpPath, manId, isError) => { const datedFolder = `${parentDir}/${currentDate}` const updatedFilepath = `${datedFolder}/${fileName}.${fileNameExt}${dateExt}` - const cmd = `mkdir -p ${datedFolder}; mv ${filePath} ${updatedFilepath} ; rm -rf ${tmpPath}` + let cmd = `mkdir -p ${datedFolder}; mv ${filePath} ${updatedFilepath}` + if (tmpPath) cmd = cmd.concat(`; rm -rf ${tmpPath}`) + return new Promise((resolve, reject) => { exec(cmd, (err, stdout, stderr) => { if (err) { @@ -110,10 +112,9 @@ const queryIdentity = async path => { const queue = async.queue(async taskPath => { const submitter = await queryIdentity(taskPath) if (submitter) { - logger.info(`Submitter Details: ${submitter}`) logger.info(`Initialising job for file: ${taskPath}`) - await runProcess(taskPath, submitter) - logger.info(`Job complete for file: ${taskPath}`) + runProcess(taskPath, submitter) + //logger.info(`Job complete for file: ${taskPath}`) return true } logger.error(`Submitter with details ${submitter} not found`) @@ -170,56 +171,88 @@ watcher.on('change', (path, stats) => { if (stats) logger.debug(`File ${path} changed size to ${stats.size}`) }) -const runProcess = async (packagePath, submitter) => { +const runProcess = (packagePath, submitter) => { let packageName let tmpPath let manuscriptId let suffixedFilePath - try { - logger.info(`Starting Bulk Import Process for ${packagePath}`) - packageName = path.basename(packagePath) - tmpPath = await createTempDir() - const tmpPathExt = await extractFiles(packagePath, tmpPath, submitter) - // rename file - append .start suffix - suffixedFilePath = await fileUtils.renameFileSuffix(packagePath) - const parsedInfo = await parseManifest(tmpPathExt, packageName) - - const xmlObj = await parseXml(parsedInfo.metaXml, packageName) - manuscriptId = await createManuscript( - xmlObj, - parsedInfo, - tmpPath, - submitter, - packageName, - suffixedFilePath, - ) - const updatedFilepath = await tidyUp( - suffixedFilePath, - tmpPath, - manuscriptId, - ) - await updateFTP(updatedFilepath, submitter) - return true - } catch (error) { - if (submitter) { - bulkUploaderEmail(submitter.email, error, packageName) - } - errorDevEmail( - 'the FTP utility', - `Error with ftp-integration - bulk upload<br/>Package: ${packageName}<br/>${error}`, - ) - logger.error('Error with ftp-integration - api.js', error.stack) - const updatedFilepath = await tidyUp( - suffixedFilePath, - tmpPath, - manuscriptId, - true, - ) - await updateFTP(updatedFilepath, submitter) - return true - } + + // verify if tar.gz id corrupted + validateCompressedFile(packagePath, submitter) + .then(async () => { + try { + logger.info(`Starting Bulk Import Process for ${packagePath}`) + packageName = path.basename(packagePath) + tmpPath = await createTempDir() + const tmpPathExt = await extractFiles(packagePath, tmpPath, submitter) + // rename file - append .start suffix + suffixedFilePath = await fileUtils.renameFileSuffix(packagePath) + const parsedInfo = await parseManifest(tmpPathExt, packageName) + + const xmlObj = await parseXml(parsedInfo.metaXml, packageName) + manuscriptId = await createManuscript( + xmlObj, + parsedInfo, + tmpPath, + submitter, + packageName, + suffixedFilePath, + ) + const updatedFilepath = await tidyUp( + suffixedFilePath, + tmpPath, + manuscriptId, + ) + await updateFTP(updatedFilepath, submitter) + } catch (error) { + if (submitter) { + bulkUploaderEmail(submitter.email, error, packageName) + } + errorDevEmail( + 'the FTP utility', + `Error with ftp-integration - bulk upload<br/>Package: ${packageName}<br/>${error}`, + ) + logger.error('Error with ftp-integration - api.js', error.stack) + const updatedFilepath = await tidyUp( + suffixedFilePath, + tmpPath, + manuscriptId, + true, + ) + await updateFTP(updatedFilepath, submitter) + return true + } + }) + .catch(error => { + // tar.gz id corrupted + if (submitter) { + bulkUploaderEmail(submitter.email, error, packageName) + } + errorDevEmail( + 'the FTP utility', + `Error with ftp-integration - bulk upload<br/>Package: ${packageName}<br/>${error}`, + ) + logger.error('Error with ftp-integration - api.js', error.stack) + }) + //return true } +const validateCompressedFile = (source, submitter) => + new Promise((resolve, reject) => { + exec(`gunzip -t ${source}`, async err => { + if (err) { + logger.error( + `FTP bulk upload file: ${source} is corrupted and cannot be processed.`, + ) + const updatedFilepath = await tidyUp(source, '', '', true) + logger.info(`File ${source} has been moved to ${updatedFilepath}`) + await updateFTP(updatedFilepath, submitter) + reject(err) + } + resolve(true) + }) + }) + const extractFiles = (source, dest, submitter) => new Promise((resolve, reject) => { const cmd = `tar vzxf ${source} -C ${dest} --xform='s#^.+/##x'` // the xform removes all directories -- GitLab