Skip to content
Snippets Groups Projects
Commit a3efa212 authored by ahamelers's avatar ahamelers
Browse files

xmlValidation bug fix (#282)

parent 957c8287
No related branches found
No related tags found
2 merge requests!60Dev,!57Shared data model
......@@ -14,7 +14,6 @@
"./server/eutils",
"./server/email",
"./server/xmlValidation",
"./server/db",
"./server/annotator",
"./server/cron",
"./server/xpub-server",
......
const path = require('path')
const mime = require('mime-types')
const libxml = require('libxmljs')
const fetch = require('node-fetch')
const xslt4node = require('xslt4node')
const fs = require('fs')
const uuidv4 = require('uuid/v4')
const tar = require('../ftp-integration/functions/unTar.js')
const download = require('download')
const getUser = require('../ftp-integration/functions/user.js')
const files = require('../ftp-integration/functions/files.js')
const db = require('../ftp-integration/functions/db.js')
const logger = require('@pubsweet/logger')
xslt4node.addLibrary('./saxon9he.jar')
xslt4node.addOptions('-Xmx1g')
/*
pushXML(
'http://localhost:3000/download/afa255c0-2af0-11e9-b3b7-33043ebc220c.xml',
'EMS80002',
)
*/
module.exports.pushXML = async function pushXML(url, manuscriptId) {
try {
const xml = await fetchFile(url)
const xsdBlue = await readData(
path.resolve(__dirname, 'xsl/xsd/publishing/journalpublishing3.xsd'),
)
const xsdGreen = await readData(
path.resolve(__dirname, 'xsl/xsd/archiving/archivearticle3.xsd'),
)
const xmlIsWellformed = libxml.parseXml(xml)
const baseUrlB = path.resolve(__dirname, 'xsl/xsd/publishing/')
const xsdBlueDoc = libxml.parseXml(xsdBlue, {
baseUrl: `${baseUrlB}/`,
})
const baseUrlG = path.resolve(__dirname, 'xsl/xsd/archiving/')
const xsdGreenDoc = libxml.parseXml(xsdGreen, {
baseUrl: `${baseUrlG}/`,
})
const xmlIsValid = xmlIsWellformed.validate(xsdBlueDoc)
const errors = xmlIsWellformed.validationErrors
if (!xmlIsValid) {
let errString = 'Invalid XML: \n'
errors.forEach((err, i) => {
errString += `${err}\nLine: ${err.line}.`
if (err.str1) {
errString += ` ID: ${err.str1}.`
}
if (i !== errors.length - 1) {
errString += `\n\n`
}
})
logger.info(errString)
} else {
const nxml = await transformXML(
xml,
path.resolve(__dirname, 'xsl/pnihms2pmc3.xsl'),
)
const nxmlIsWellformed = libxml.parseXml(nxml)
const nxmlIsValid = nxmlIsWellformed.validate(xsdGreenDoc)
const nxmlErrors = nxmlIsWellformed.validationErrors
if (!nxmlIsValid) {
let errString = 'Invalid NXML: \n\n'
nxmlErrors.forEach((err, i) => {
errString += `${err}\nLine: ${err.line}.`
if (err.str1) {
errString += ` ID: ${err.str1}.`
}
if (i !== nxmlErrors.length - 1) {
errString += `\n\n`
}
})
logger.info(errString)
} else {
// Check NXML against the stylechecker
const checked = await transformXML(
nxml,
path.resolve(__dirname, 'xsl/stylechecker/nlm-stylechecker.xsl'),
)
const result = libxml.parseXml(checked)
const styleErrors = result.find('//error')
if (styleErrors.length === 0) {
const user = await getUser.getAdminUser()
const tmpPath = await tar.createTempDir()
const file = await downloadFile(url, tmpPath)
const filename = file.substring(url.lastIndexOf('/') + 1)
const fileType = 'xml'
const fileLabel = '0'
const stats = fs.statSync(file)
const fileSizeInBytes = stats.size
const extension = path.extname(file)
const mimeType = mime.contentType(extension)
const uuid = uuidv4()
const fileInfo = {
manuscriptId,
filename,
mimeType,
type: fileType,
size: fileSizeInBytes,
url: file,
label: fileLabel,
updatedBy: user.userId,
}
// upload to minio
files.uploadFileToMinio(
`${uuid}${extension}`,
filename,
file,
mimeType,
)
await db.createFiles([fileInfo])
logger.info('Uploading to Minio and the database has been completed.')
} else {
let styleErrString = `Style Errors: <br/><br/>`
styleErrors.forEach((err, i) => {
styleErrString += err.text()
if (i !== styleErrors.length - 1) {
styleErrString += `<br/><br/>`
}
})
logger.info(styleErrString)
}
}
}
} catch (err) {
logger.error('Error', err.message)
}
}
function downloadFile(url, tmpPath) {
return new Promise((resolve, reject) => {
download(url, tmpPath)
.then(() => {
resolve(`${tmpPath}/${fs.readdirSync(tmpPath)[0]}`)
})
.catch(err => {
reject(err)
})
})
}
function readData(url) {
return new Promise((resolve, reject) => {
fs.readFile(url, 'utf8', (err, data) => {
if (err) reject(err)
resolve(data)
})
})
}
function fetchFile(fileUrl) {
return new Promise((resolve, reject) => {
fetch(fileUrl, {
method: 'GET',
headers: {
Accept: 'application/xml',
},
})
.then(data => {
resolve(data.text())
})
.catch(err => {
reject(err)
})
})
}
function transformXML(xmlString, xslPath, params) {
xmlString = xmlString.replace(/<!DOCTYPE[^>[]*(\\[[^]]*\\])?>/, '')
const config = {
xsltPath: xslPath,
source: xmlString,
result: String,
props: {
indent: 'yes',
},
}
if (params) {
config.params = params
}
return new Promise((resolve, reject) => {
xslt4node.transform(config, (err, result) => {
if (err) {
reject(result)
} else {
resolve(result)
}
})
})
}
module.exports = {
backend: () => require('./api'),
const path = require('path')
const mime = require('mime-types')
const libxml = require('libxmljs')
const fetch = require('node-fetch')
const xslt4node = require('xslt4node')
const fs = require('fs')
const uuidv4 = require('uuid/v4')
const tar = require('../ftp-integration/functions/unTar.js')
const download = require('download')
const getUser = require('../ftp-integration/functions/user.js')
const files = require('../ftp-integration/functions/files.js')
const db = require('../ftp-integration/functions/db.js')
const logger = require('@pubsweet/logger')
xslt4node.addLibrary('./saxon9he.jar')
xslt4node.addOptions('-Xmx1g')
/*
pushXML(
'http://localhost:3000/download/afa255c0-2af0-11e9-b3b7-33043ebc220c.xml',
'EMS80002',
)
*/
module.exports.pushXML = async function pushXML(url, manuscriptId) {
try {
const xml = await fetchFile(url)
const xsdBlue = await readData(
path.resolve(__dirname, 'xsl/xsd/publishing/journalpublishing3.xsd'),
)
const xsdGreen = await readData(
path.resolve(__dirname, 'xsl/xsd/archiving/archivearticle3.xsd'),
)
const xmlIsWellformed = libxml.parseXml(xml)
const baseUrlB = path.resolve(__dirname, 'xsl/xsd/publishing/')
const xsdBlueDoc = libxml.parseXml(xsdBlue, {
baseUrl: `${baseUrlB}/`,
})
const baseUrlG = path.resolve(__dirname, 'xsl/xsd/archiving/')
const xsdGreenDoc = libxml.parseXml(xsdGreen, {
baseUrl: `${baseUrlG}/`,
})
const xmlIsValid = xmlIsWellformed.validate(xsdBlueDoc)
const errors = xmlIsWellformed.validationErrors
if (!xmlIsValid) {
let errString = 'Invalid XML: \n'
errors.forEach((err, i) => {
errString += `${err}\nLine: ${err.line}.`
if (err.str1) {
errString += ` ID: ${err.str1}.`
}
if (i !== errors.length - 1) {
errString += `\n\n`
}
})
logger.info(errString)
} else {
const nxml = await transformXML(
xml,
path.resolve(__dirname, 'xsl/pnihms2pmc3.xsl'),
)
const nxmlIsWellformed = libxml.parseXml(nxml)
const nxmlIsValid = nxmlIsWellformed.validate(xsdGreenDoc)
const nxmlErrors = nxmlIsWellformed.validationErrors
if (!nxmlIsValid) {
let errString = 'Invalid NXML: \n\n'
nxmlErrors.forEach((err, i) => {
errString += `${err}\nLine: ${err.line}.`
if (err.str1) {
errString += ` ID: ${err.str1}.`
}
if (i !== nxmlErrors.length - 1) {
errString += `\n\n`
}
})
logger.info(errString)
} else {
// Check NXML against the stylechecker
const checked = await transformXML(
nxml,
path.resolve(__dirname, 'xsl/stylechecker/nlm-stylechecker.xsl'),
)
const result = libxml.parseXml(checked)
const styleErrors = result.find('//error')
if (styleErrors.length === 0) {
const user = await getUser.getAdminUser()
const tmpPath = await tar.createTempDir()
const file = await downloadFile(url, tmpPath)
const filename = file.substring(url.lastIndexOf('/') + 1)
const fileType = 'xml'
const fileLabel = '0'
const stats = fs.statSync(file)
const fileSizeInBytes = stats.size
const extension = path.extname(file)
const mimeType = mime.contentType(extension)
const uuid = uuidv4()
const fileInfo = {
manuscriptId,
filename,
mimeType,
type: fileType,
size: fileSizeInBytes,
url: file,
label: fileLabel,
updatedBy: user.userId,
}
// upload to minio
files.uploadFileToMinio(
`${uuid}${extension}`,
filename,
file,
mimeType,
)
await db.createFiles([fileInfo])
logger.info('Uploading to Minio and the database has been completed.')
} else {
let styleErrString = `Style Errors: <br/><br/>`
styleErrors.forEach((err, i) => {
styleErrString += err.text()
if (i !== styleErrors.length - 1) {
styleErrString += `<br/><br/>`
}
})
logger.info(styleErrString)
}
}
}
} catch (err) {
logger.error('Error', err.message)
}
}
function downloadFile(url, tmpPath) {
return new Promise((resolve, reject) => {
download(url, tmpPath)
.then(() => {
resolve(`${tmpPath}/${fs.readdirSync(tmpPath)[0]}`)
})
.catch(err => {
reject(err)
})
})
}
function readData(url) {
return new Promise((resolve, reject) => {
fs.readFile(url, 'utf8', (err, data) => {
if (err) reject(err)
resolve(data)
})
})
}
function fetchFile(fileUrl) {
return new Promise((resolve, reject) => {
fetch(fileUrl, {
method: 'GET',
headers: {
Accept: 'application/xml',
},
})
.then(data => {
resolve(data.text())
})
.catch(err => {
reject(err)
})
})
}
function transformXML(xmlString, xslPath, params) {
xmlString = xmlString.replace(/<!DOCTYPE[^>[]*(\\[[^]]*\\])?>/, '')
const config = {
xsltPath: xslPath,
source: xmlString,
result: String,
props: {
indent: 'yes',
},
}
if (params) {
config.params = params
}
return new Promise((resolve, reject) => {
xslt4node.transform(config, (err, result) => {
if (err) {
reject(result)
} else {
resolve(result)
}
})
})
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment