From e3f08f9816cea54c382767c02c48bb8e69049dd4 Mon Sep 17 00:00:00 2001 From: Yogmatee Roochun <yroochun@ebi.ac.uk> Date: Wed, 6 Feb 2019 12:28:14 +0000 Subject: [PATCH] modularised 'taggers-ftp-upload' code --- server/ftp-integration/functions/db.js | 14 ++ server/ftp-integration/functions/files.js | 137 ++++++++++++ server/ftp-integration/functions/unTar.js | 28 +++ server/ftp-integration/functions/user.js | 34 +++ server/ftp-integration/taggedXML.js | 242 ++-------------------- server/populate-journal/package.json | 4 +- 6 files changed, 234 insertions(+), 225 deletions(-) create mode 100644 server/ftp-integration/functions/db.js create mode 100644 server/ftp-integration/functions/files.js create mode 100644 server/ftp-integration/functions/unTar.js create mode 100644 server/ftp-integration/functions/user.js diff --git a/server/ftp-integration/functions/db.js b/server/ftp-integration/functions/db.js new file mode 100644 index 000000000..5a5f5df76 --- /dev/null +++ b/server/ftp-integration/functions/db.js @@ -0,0 +1,14 @@ +const rfr = require('rfr') +const logger = require('@pubsweet/logger') + +const FileModel = rfr('server/xpub-model/entities/file/data-access') + +module.exports.createFiles = async function createFiles(filesArr) { + for (let i = 0; i < filesArr.length; i += 1) { + /* eslint-disable no-await-in-loop */ + const filedb = await new FileModel(filesArr[i]).save({ + insertMissing: true, + }) + logger.info(filedb) + } +} diff --git a/server/ftp-integration/functions/files.js b/server/ftp-integration/functions/files.js new file mode 100644 index 000000000..14fd77eb1 --- /dev/null +++ b/server/ftp-integration/functions/files.js @@ -0,0 +1,137 @@ +const fs = require('fs') +const rfr = require('rfr') +const logger = require('@pubsweet/logger') +const readline = require('readline') +const path = require('path') +const mime = require('mime-types') +const uuidv4 = require('uuid/v4') + +const minioClient = rfr('server/modules/pubsweet-component-minio/minio-client') + +module.exports.getManifestFilename = function getManifestFilename(tmpPath) { + return new Promise((resolve, reject) => { + fs.readdir(tmpPath, (err, items) => { + if (err) reject(err) + for (let i = 0; i < items.length; i += 1) { + if (items[i].indexOf('manifest') > -1) { + resolve(items[i]) + } else reject(new Error('There is no manifest file.')) + } + }) + }) +} + +module.exports.getFilesData = function getFilesData( + tempFolder, + manifestFileName, +) { + return new Promise((resolve, reject) => { + const files = [] + const manuscriptId = manifestFileName.split('manifest.txt')[0] + + const lineReader = readline.createInterface({ + input: fs.createReadStream(`${tempFolder}/${manifestFileName}`, { + encoding: 'UTF-8', + }), + }) + lineReader + .on('line', line => { + if (line) { + const row = line.split(/\t{1,}/).filter(x => x) // return only non empty tokens + const fileName = row[row.length - 1] + + if (!fileName || !fs.existsSync(`${tempFolder}/${fileName}`)) { + reject(new Error(`File ${fileName} does not exist`)) + } else { + files.push({ + fileURI: `${tempFolder}/${fileName}`, + filename: fileName, + type: row[0], + label: row[1], + manuscriptId: `EMS${manuscriptId}`, + }) + } + } + }) + .on('close', () => { + resolve(files) + }) + }) +} + +module.exports.checkFiles = function checkFiles(files, tmpPath, user) { + return new Promise((resolve, reject) => { + const filesArr = [] + const promisesArr = [] + + for (let i = 0; i < files.length; i += 1) { + fs.access(files[i].fileURI, fs.F_OK, err => { + if (err) { + reject(new Error(`${files[i]} does not exist.`)) + } + }) + const fileInfo = files[i] + const { + fileURI: file, + filename, + type: fileType, + label: fileLabel, + manuscriptId, + } = fileInfo + const stats = fs.statSync(file) + const fileSizeInBytes = stats.size + const extension = path.extname(file) + const mimeType = mime.contentType(extension) + const uuid = uuidv4() + + filesArr.push({ + manuscriptId, + filename, + mimeType, + type: fileType, + size: fileSizeInBytes, + url: file, + label: fileLabel, + updatedBy: user.userId, + }) + + promisesArr.push( + uploadFileToMinio(`${uuid}${extension}`, filename, file, mimeType), + ) + } + resolve([filesArr, promisesArr]) + }) +} + +module.exports.uploadFileToMinio = uploadFileToMinio + +module.exports.renameFile = function renameFile(path) { + return new Promise((resolve, reject) => { + fs.rename(path, `${path}_${Date.now()}`, err => { + if (err) reject(err) + }) + resolve(path) + }) +} + +function uploadFileToMinio(filename, originalFileName, filePath, mimeType) { + return new Promise((resolve, reject) => { + minioClient.uploadFile( + filename, + originalFileName, + mimeType, + filePath, + (error, etag) => { + if (error) { + fs.unlinkSync(filePath) + logger.error(error) + reject(error) + } else { + logger.debug(`${originalFileName} file uploaded to minio`) + fs.unlinkSync(filePath) + resolve(filePath) + } + }, + ) + }) +} diff --git a/server/ftp-integration/functions/unTar.js b/server/ftp-integration/functions/unTar.js new file mode 100644 index 000000000..9ddfa832f --- /dev/null +++ b/server/ftp-integration/functions/unTar.js @@ -0,0 +1,28 @@ +const tar = require('tar') +const tmp = require('tmp') + +module.exports.untar = function untar(source, dest) { + return new Promise((resolve, reject) => { + tar.x( + { + file: source, + cwd: dest, + }, + err => { + if (err) { + reject(err) + } + resolve(dest) + }, + ) + }) +} + +module.exports.createTempDir = function createTempDir() { + return new Promise((resolve, reject) => { + tmp.dir({ mode: '0750', prefix: 'xpubTmpDir_' }, (err, tmpPath) => { + if (err) reject(err) + resolve(tmpPath) + }) + }) +} diff --git a/server/ftp-integration/functions/user.js b/server/ftp-integration/functions/user.js new file mode 100644 index 000000000..e7780b8bf --- /dev/null +++ b/server/ftp-integration/functions/user.js @@ -0,0 +1,34 @@ +const rfr = require('rfr') +const config = require('config') + +const Identity = rfr('server/xpub-model/entities/identity/data-access') +const ftpUsers = config.get('users') + +module.exports = function getUser(rootPath, filename) { + return new Promise((resolve, reject) => { + // const userName = path.basename(path.dirname(filename)) + const rootParts = rootPath.split('/') + const fileNameParts = filename.split('/') + const userName = fileNameParts[rootParts.length - 1] + + Identity.findByFieldEager( + 'email', + `${userName}@internal.ebi.ac.uk`, + '[user]', + ) + .then(identityDb => { + const user = ftpUsers.find(user => + user.identities.find( + identity => identity.email === `${userName}@internal.ebi.ac.uk`, + ), + ) + const identity = user.identities.find( + identity => identity.email === `${userName}@internal.ebi.ac.uk`, + ) + /* eslint-disable dot-notation */ + identity['userId'] = identityDb[0].userId + resolve(identity) + }) + .catch(error => reject(error)) + }) +} diff --git a/server/ftp-integration/taggedXML.js b/server/ftp-integration/taggedXML.js index e2dbb1e88..ec5caad7f 100644 --- a/server/ftp-integration/taggedXML.js +++ b/server/ftp-integration/taggedXML.js @@ -1,19 +1,10 @@ const chokidar = require('chokidar') -const logger = require('@pubsweet/logger') -const tar = require('tar') -const fs = require('fs') -const tmp = require('tmp') -const readline = require('readline') const config = require('config') -const rfr = require('rfr') -const path = require('path') -const mime = require('mime-types') -const uuidv4 = require('uuid/v4') - -const minioClient = rfr('server/modules/pubsweet-component-minio/minio-client') -const FileModel = rfr('server/xpub-model/entities/file/data-access') -const Identity = rfr('server/xpub-model/entities/identity/data-access') -const ftpUsers = config.get('users') +const logger = require('@pubsweet/logger') +const getUser = require('./functions/user.js') +const tar = require('./functions/unTar.js') +const files = require('./functions/files.js') +const db = require('./functions/db.js') const rootPath = `${process.env.HOME}/${config.get('ftp_directory')}/` @@ -50,218 +41,23 @@ async function processFile(path) { logger.info(`Processing tagged XML packages: ${path}`) const user = await getUser(rootPath, path) - const tmpPath = await createTempDir() - const extractedFilePath = await extractFiles(path, tmpPath) - const manifestFilename = await getManifestFilename(extractedFilePath) - const files = await getFilesData(extractedFilePath, manifestFilename) - const [filesArr, promisesArr] = await checkFiles(files, tmpPath, user) - await createFiles(filesArr) + const tmpPath = await tar.createTempDir() + const extractedFilePath = await tar.untar(path, tmpPath) + const manifestFilename = await files.getManifestFilename(extractedFilePath) + const filesData = await files.getFilesData( + extractedFilePath, + manifestFilename, + ) + const [filesArr, promisesArr] = await files.checkFiles( + filesData, + tmpPath, + user, + ) + await db.createFiles(filesArr) await Promise.all(promisesArr) - await renameFile(path) + await files.renameFile(path) logger.info('Uploading to Minio and the database has been completed.') } catch (err) { logger.error('Error', err.message) } } - -function getUser(rootPath, filename) { - return new Promise((resolve, reject) => { - // const userName = path.basename(path.dirname(filename)) - const rootParts = rootPath.split('/') - const fileNameParts = filename.split('/') - const userName = fileNameParts[rootParts.length - 1] - - Identity.findByFieldEager( - 'email', - `${userName}@internal.ebi.ac.uk`, - '[user]', - ) - .then(identityDb => { - const user = ftpUsers.find(user => - user.identities.find( - identity => identity.email === `${userName}@internal.ebi.ac.uk`, - ), - ) - const identity = user.identities.find( - identity => identity.email === `${userName}@internal.ebi.ac.uk`, - ) - /* eslint-disable dot-notation */ - identity['userId'] = identityDb[0].userId - resolve(identity) - }) - .catch(error => reject(error)) - }) -} - -function createTempDir() { - return new Promise((resolve, reject) => { - tmp.dir({ mode: '0750', prefix: 'xpubTmpDir_' }, (err, tmpPath) => { - if (err) reject(err) - resolve(tmpPath) - }) - }) -} - -function extractFiles(source, dest) { - return new Promise((resolve, reject) => { - tar.x( - { - file: source, - cwd: dest, - }, - err => { - if (err) { - reject(err) - } - resolve(dest) - }, - ) - }) -} - -function getManifestFilename(tmpPath) { - return new Promise((resolve, reject) => { - fs.readdir(tmpPath, (err, items) => { - if (err) reject(err) - for (let i = 0; i < items.length; i += 1) { - if (items[i].indexOf('manifest') > -1) { - resolve(items[i]) - } else reject(new Error('There is no manifest file.')) - } - }) - }) -} - -function getFilesData(tempFolder, manifestFileName) { - return new Promise((resolve, reject) => { - const files = [] - const manuscriptId = manifestFileName.split('manifest.txt')[0] - - const lineReader = readline.createInterface({ - input: fs.createReadStream(`${tempFolder}/${manifestFileName}`, { - encoding: 'UTF-8', - }), - }) - lineReader - .on('line', line => { - if (line) { - const row = line.split(/\t{1,}/).filter(x => x) // return only non empty tokens - const fileName = row[row.length - 1] - - if (!fileName || !fs.existsSync(`${tempFolder}/${fileName}`)) { - reject(new Error(`File ${fileName} does not exist`)) - } else { - files.push({ - fileURI: `${tempFolder}/${fileName}`, - filename: fileName, - type: row[0], - label: row[1], - manuscriptId: `EMS${manuscriptId}`, - }) - } - } - }) - .on('close', () => { - resolve(files) - }) - }) -} - -function checkFiles(files, tmpPath, user) { - return new Promise((resolve, reject) => { - const filesArr = [] - const promisesArr = [] - - for (let i = 0; i < files.length; i += 1) { - fs.access(files[i].fileURI, fs.F_OK, err => { - if (err) { - reject(new Error(`${files[i]} does not exist.`)) - } - }) - const fileInfo = files[i] - const { - fileURI: file, - filename, - type: fileType, - label: fileLabel, - manuscriptId, - } = fileInfo - const stats = fs.statSync(file) - const fileSizeInBytes = stats.size - const extension = path.extname(file) - const mimeType = mime.contentType(extension) - const uuid = uuidv4() - - filesArr.push({ - manuscriptId, - filename, - mimeType, - type: fileType, - size: fileSizeInBytes, - url: file, - label: fileLabel, - updatedBy: user.userId, - }) - - promisesArr.push( - uploadFile( - `${uuid}${extension}`, - filename, - extension, - file, - fileSizeInBytes, - mimeType, - ), - ) - } - resolve([filesArr, promisesArr]) - }) -} - -function uploadFile( - filename, - originalFileName, - extension, - filePath, - item_size, - mimeType, -) { - return new Promise((resolve, reject) => { - minioClient.uploadFile( - filename, - originalFileName, - mimeType, - filePath, - (error, etag) => { - if (error) { - fs.unlinkSync(filePath) - logger.error(error) - reject(error) - } else { - logger.debug(`${originalFileName} file uploaded to minio`) - fs.unlinkSync(filePath) - resolve(filePath) - } - }, - ) - }) -} - -async function createFiles(filesArr) { - for (let i = 0; i < filesArr.length; i += 1) { - /* eslint-disable no-await-in-loop */ - const filedb = await new FileModel(filesArr[i]).save({ - insertMissing: true, - }) - logger.info(filedb) - } -} - -async function renameFile(path) { - return new Promise((resolve, reject) => { - fs.rename(path, `${path}_${Date.now()}`, err => { - if (err) reject(err) - }) - resolve(path) - }) -} diff --git a/server/populate-journal/package.json b/server/populate-journal/package.json index a2b8516df..6a68723ad 100755 --- a/server/populate-journal/package.json +++ b/server/populate-journal/package.json @@ -1,8 +1,8 @@ { "name": "xpub-epmc-populate-journal", "version": "1.0.0", - "description": "API for Europe PMC GRIST search with private key", - "main": "index.js", + "description": "Populate the journal table with data from NCBI eutils fetch", + "main": "api.js", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, -- GitLab