Commit 3589a60f authored by M. Haseeb's avatar M. Haseeb

ENA-4161

- introduced json manifest file format.
- added support for read type attribute in fastq manifest field.
parent 5c3dd054
......@@ -11,7 +11,7 @@ plugins {
apply plugin: 'io.spring.dependency-management'
group = 'uk.ac.ebi.ena.webin-cli'
version = '3.6.0'
version = '3.7.0'
sourceCompatibility = '1.8'
targetCompatibility = '1.8'
......@@ -75,7 +75,7 @@ task printWebinTestCredentials {
}
dependencies {
implementation( 'uk.ac.ebi.ena.webin-cli:webin-cli-validator:1.2.0' )
implementation( 'uk.ac.ebi.ena.webin-cli:webin-cli-validator:1.3.0' )
implementation( 'uk.ac.ebi.ena.sequence:sequencetools:2.1.3' )
implementation( 'uk.ac.ebi.ena.txmbtools:txmbtools:1.0.3' )
implementation( 'uk.ac.ebi.ena:readtools:1.11.1' )
......
......@@ -445,12 +445,30 @@ public class WebinCli {
if (field.getType() == ManifestFieldType.META) {
for (ManifestFieldProcessor processor : field.getFieldProcessors()) {
if (processor instanceof CVFieldProcessor) {
value = ": <br/>* " + ((CVFieldProcessor) processor).getValues().stream().collect(Collectors.joining("<br/>* "));
value = ": <br/>* " + ((CVFieldProcessor) processor).getValues().stream()
.collect(Collectors.joining("<br/>* "));
}
}
}
StringBuilder attHelpText = new StringBuilder();
if (!field.getFieldAttributes().isEmpty()) {
//attHelpText.append("<br/><br/>Attributes:");
field.getFieldAttributes().stream().forEach(att -> {
attHelpText.append("<br/>" + att.getName() + " attribute");
for (ManifestFieldProcessor processor : att.getFieldProcessors()) {
if (processor instanceof CVFieldProcessor) {
attHelpText.append(":<br/> * " + ((CVFieldProcessor) processor).getValues().stream()
.collect(Collectors.joining("<br/> * ")));
}
}
});
}
table.addRule();
table.addRow(name, cardinality, field.getDescription() + value);
table.addRow(name, cardinality, field.getDescription() + value + attHelpText);
}
private static void printManifestFileGroupHelp(ManifestReader<?> manifestReader, PrintStream out) {
......
......@@ -77,6 +77,8 @@ public enum WebinCliMessage implements MessageFormatSource {
MANIFEST_READER_INVALID_FILE_GROUP_ERROR("An invalid set of files has been specified{1}. Expected data files are: {0}."),
MANIFEST_READER_INVALID_FILE_COMPRESSION_ERROR("Failed to uncompress file: \"{0}\". The file must be compressed with {1}."),
MANIFEST_READER_MISSING_ADDRESS_OR_AUTHOR_ERROR("Please provide both address and author details or neither."),
MANIFEST_READER_MANIFEST_FILE_MALFORMED("Malformed manifest file content."),
MANIFEST_READER_UNKNOWN_ATTRIBUTE_FIELD_ERROR("Unknown attribute field: {0}."),
FILE_SUFFIX_PROCESSOR_ERROR("Invalid {0} file suffix: \"{1}\". Valid file suffixes are: {2}."),
CV_FIELD_PROCESSOR_ERROR("Invalid {0} field value: \"{1}\". Valid values are: {2}."),
......
......@@ -29,6 +29,8 @@ import uk.ac.ebi.ena.webin.cli.validator.manifest.ReadsManifest;
import uk.ac.ebi.ena.webin.cli.validator.manifest.ReadsManifest.QualityScore;
import java.io.File;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
public class
......@@ -52,6 +54,7 @@ ReadsManifestReader extends ManifestReader<ReadsManifest> {
String FASTQ = "FASTQ";
String BAM = "BAM";
String CRAM = "CRAM";
String READ_TYPE = "READ_TYPE";
}
public interface Description {
......@@ -72,6 +75,7 @@ ReadsManifestReader extends ManifestReader<ReadsManifest> {
String FASTQ = "Fastq file";
String BAM = "BAM file";
String CRAM = "CRAM file";
String READ_TYPE = "10x Fastq read types";
}
private final static String INSTRUMENT_UNSPECIFIED = "unspecified";
......@@ -89,6 +93,9 @@ ReadsManifestReader extends ManifestReader<ReadsManifest> {
QUALITY_SCORE_PHRED_64,
QUALITY_SCORE_LOGODDS
);
public final static ManifestCVList CV_READ_TYPE = new ManifestCVList(
"single", "paired", "cell_barcode", "umi_barcode", "feature_barcode", "sample_barcode"
);
private final ReadsManifest manifest = new ReadsManifest();
......@@ -112,10 +119,20 @@ ReadsManifestReader extends ManifestReader<ReadsManifest> {
.meta().optional().name(Field.LIBRARY_CONSTRUCTION_PROTOCOL).desc(Description.LIBRARY_CONSTRUCTION_PROTOCOL).and()
.meta().optional().name(Field.LIBRARY_NAME).desc(Description.LIBRARY_NAME).and()
.meta().optional().name(Field.INSERT_SIZE).desc(Description.INSERT_SIZE).and()
.file().optional(2).name(Field.FASTQ).desc(Description.FASTQ).processor(getFastqProcessors()).and()
.file()
.optional(2)
.name(Field.FASTQ)
.desc(Description.FASTQ)
.processor(getFastqProcessors())
.attributes(new ManifestFieldDefinition.Builder()
.attribute().optional().name(Field.READ_TYPE).desc(Description.READ_TYPE)
.processor(new CVFieldProcessor(CV_READ_TYPE)).build())
.and()
.file().optional().name(Field.BAM).desc(Description.BAM).processor(getBamProcessors()).and()
.file().optional().name(Field.CRAM).desc(Description.CRAM).processor(getCramProcessors()).and()
.meta().optional().hidden().name(Field.QUALITY_SCORE).desc(Description.QUALITY_SCORE).processor(new CVFieldProcessor(CV_QUALITY_SCORE)).and()
.meta().optional().hidden().name(Field.QUALITY_SCORE).desc(Description.QUALITY_SCORE).processor(new CVFieldProcessor(CV_QUALITY_SCORE)).and()
.meta().optional().hidden().name(Field.__HORIZON).desc(Description.__HORIZON).and()
.meta().optional().name(Fields.SUBMISSION_TOOL).desc(Descriptions.SUBMISSION_TOOL).and()
.meta().optional().name(Fields.SUBMISSION_TOOL_VERSION).desc(Descriptions.SUBMISSION_TOOL_VERSION)
......@@ -217,7 +234,9 @@ ReadsManifestReader extends ManifestReader<ReadsManifest> {
getFiles( getInputDir(), getManifestReaderResult(), ReadsManifestReader.Field.BAM ).forEach(file -> submissionFiles.add( new SubmissionFile( ReadsManifest.FileType.BAM, file ) ) );
getFiles( getInputDir(), getManifestReaderResult(), ReadsManifestReader.Field.CRAM ).forEach(file -> submissionFiles.add( new SubmissionFile( ReadsManifest.FileType.CRAM, file ) ) );
getFiles( getInputDir(), getManifestReaderResult(), ReadsManifestReader.Field.FASTQ ).forEach(file -> submissionFiles.add( new SubmissionFile( ReadsManifest.FileType.FASTQ, file ) ) );
List<Map.Entry<String, String>> fastqAtts = getAttributes(getManifestReaderResult(), ReadsManifestReader.Field.FASTQ);
getFiles( getInputDir(), getManifestReaderResult(), ReadsManifestReader.Field.FASTQ ).forEach(file -> submissionFiles.add( new SubmissionFile( ReadsManifest.FileType.FASTQ, file, fastqAtts ) ) );
}
private void
......
......@@ -224,16 +224,16 @@ public class ReadsXmlWriter implements XmlWriter<ReadsManifest, ReadsValidationR
manifest.files(FileType.BAM).stream()
.map(file -> file.getFile().toPath())
.forEach(file -> filesE.addContent(createFileElement(inputDir, uploadDir, file, "bam")));
.forEach(file -> filesE.addContent(createFileElement(inputDir, uploadDir, file, "bam", null)));
manifest.files(FileType.CRAM).stream()
.map(file -> file.getFile().toPath())
.forEach(file -> filesE.addContent(createFileElement(inputDir, uploadDir, file, "cram")));
.forEach(file -> filesE.addContent(createFileElement(inputDir, uploadDir, file, "cram", null)));
manifest.files(FileType.FASTQ).stream()
.map(file -> file.getFile().toPath())
.forEach(
file -> filesE.addContent(createFileElement(inputDir, uploadDir, file, "fastq")));
file -> filesE.addContent(createFileElement(
inputDir, uploadDir, file.getFile().toPath(), "fastq", file.getAttributes())));
Element runAttributesE = new Element("RUN_ATTRIBUTES");
......
......@@ -29,6 +29,7 @@ public class ManifestFieldDefinition {
private final int recommendedMinCount;
private final int recommendedMaxCount;
private final List<ManifestFieldProcessor> processors;
private final List<ManifestFieldDefinition> attributes;
private ManifestFieldDefinition(
String name,
......@@ -39,7 +40,8 @@ public class ManifestFieldDefinition {
int maxCount,
int recommendedMinCount,
int recommendedMaxCount,
List<ManifestFieldProcessor> processors) {
List<ManifestFieldProcessor> processors,
List<ManifestFieldDefinition> attributes) {
Assert.notNull(name, "Field name must not be null");
Assert.notNull(description, "Field description must not be null");
Assert.notNull(type, "Field type must not be null");
......@@ -52,6 +54,7 @@ public class ManifestFieldDefinition {
this.recommendedMinCount = recommendedMinCount;
this.recommendedMaxCount = recommendedMaxCount;
this.processors = processors;
this.attributes = attributes;
}
public String getName() {
......@@ -94,6 +97,10 @@ public class ManifestFieldDefinition {
return processors;
}
public List<ManifestFieldDefinition> getFieldAttributes() {
return attributes;
}
public static class Builder {
private final List<ManifestFieldDefinition> fields = new ArrayList<>();
......@@ -106,6 +113,10 @@ public class ManifestFieldDefinition {
return new Field(this, ManifestFieldType.FILE);
}
public Field attribute() {
return new Field(this, ManifestFieldType.ATTRIBUTE);
}
public Field type(ManifestFieldType type) {
return new Field(this, type);
}
......@@ -121,6 +132,7 @@ public class ManifestFieldDefinition {
private boolean hidden = false;
private boolean recommended = false;
private List<ManifestFieldProcessor> processors = new ArrayList<>();
private List<ManifestFieldDefinition> attributes = new ArrayList<>();
private Field(Builder builder, ManifestFieldType type) {
this.builder = builder;
......@@ -176,6 +188,12 @@ public class ManifestFieldDefinition {
return this;
}
public Field attributes(List<ManifestFieldDefinition> attributes) {
this.attributes.addAll(attributes.stream().filter(Objects::nonNull).collect(Collectors.toList()));
return this;
}
public Builder and() {
add();
return builder;
......@@ -207,7 +225,8 @@ public class ManifestFieldDefinition {
type,
minCount, maxCount,
recommendedMinCount, recommendedMaxCount,
processors));
processors,
attributes));
}
}
}
......
......@@ -12,5 +12,6 @@ package uk.ac.ebi.ena.webin.cli.manifest;
public enum ManifestFieldType {
META,
FILE
FILE,
ATTRIBUTE
}
......@@ -10,26 +10,28 @@
*/
package uk.ac.ebi.ena.webin.cli.manifest;
import uk.ac.ebi.ena.webin.cli.validator.message.ValidationOrigin;
import java.util.ArrayList;
import java.util.List;
import uk.ac.ebi.ena.webin.cli.validator.message.ValidationOrigin;
public class
ManifestFieldValue
{
private final ManifestFieldDefinition definition;
private String value;
private List<ManifestFieldValue> attributes;
private final List<ValidationOrigin> origin = new ArrayList<>();
private boolean validFieldValueOrFileSuffix = true;
public
ManifestFieldValue( ManifestFieldDefinition definition, String value, ValidationOrigin origin )
{
public ManifestFieldValue(
ManifestFieldDefinition definition, String value, List<ManifestFieldValue> attributes, ValidationOrigin origin ) {
assert( definition != null );
assert( value != null );
this.definition = definition;
this.value = value;
this.attributes = attributes;
this.origin.add(origin);
this.origin.add(new ValidationOrigin("field", definition.getName()));
this.origin.add(new ValidationOrigin("value", value));
......@@ -82,4 +84,12 @@ ManifestFieldValue
{
return origin;
}
public List<ManifestFieldValue> getAttributes() {
return attributes;
}
public void setAttributes(List<ManifestFieldValue> attributes) {
this.attributes = attributes;
}
}
......@@ -10,6 +10,8 @@
*/
package uk.ac.ebi.ena.webin.cli.manifest;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.lang.StringUtils;
import uk.ac.ebi.ena.webin.cli.WebinCliMessage;
......@@ -32,6 +34,7 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
......@@ -46,6 +49,8 @@ import static uk.ac.ebi.ena.webin.cli.manifest.ManifestReader.ManifestReaderStat
public abstract class
ManifestReader<M extends Manifest> {
public static final String FIELD_NAME_REGEX = "^[\\s]*(#|;|\\/\\/).*$";
public abstract M getManifest();
public interface Fields {
......@@ -227,6 +232,24 @@ ManifestReader<M extends Manifest> {
private void
parseManifest( Path inputDir, List<String> lines )
{
if (isJsonBasedFormat(inputDir, lines)) {
parseManifestNew(inputDir, lines);
} else {
parseManifestOld(inputDir, lines);
}
}
private boolean isJsonBasedFormat( Path inputDir, List<String> lines ) {
if (lines.isEmpty()) {
return false;
}
return lines.get(0).trim().startsWith("{");
}
private void parseManifestOld( Path inputDir, List<String> lines )
{
state.state = PARSE;
......@@ -258,7 +281,7 @@ ManifestReader<M extends Manifest> {
String fieldName = StringUtils.stripEnd( tokens[ 0 ].trim().toUpperCase(), ": " );
String fieldValue = ( tokens.length == 2 ) ? tokens[ 1 ].trim() : null;
if( fieldName.matches( "^[\\s]*(#|;|\\/\\/).*$" ) )
if( fieldName.matches( FIELD_NAME_REGEX ) )
return null;
try
......@@ -276,9 +299,10 @@ ManifestReader<M extends Manifest> {
ManifestFieldValue field = new ManifestFieldValue(
fieldDefinition,
fieldValue,
new ArrayList<>(), //attributes are not supported in the old manifest format.
new ValidationOrigin("line number", state.lineNo));
ValidationResult result = getValidationResult().create(field.getOrigin());
getValidationResult().create(field.getOrigin());
if( field.getDefinition().getType() == ManifestFieldType.FILE )
{
......@@ -297,6 +321,87 @@ ManifestReader<M extends Manifest> {
return null;
}
private void parseManifestNew( Path inputDir, List<String> lines ) {
state.state = PARSE;
try {
ObjectMapper objectMapper = new ObjectMapper();
JsonNode jsonNode = objectMapper.readTree(lines.stream().collect(Collectors.joining("\n")));
jsonNode.fields().forEachRemaining(field -> {
String fieldName = field.getKey();
if( fieldName.matches( FIELD_NAME_REGEX ) )
return;
//find field definition
ManifestFieldDefinition fieldDefinition = Stream.concat( infoFields.stream(), fields.stream() )
.filter(fieldDef -> fieldDef.getName().equalsIgnoreCase( fieldName ) ||
fieldDef.matchSynonym( fieldName ))
.findFirst().orElse(null);
if (fieldDefinition == null) {
error( WebinCliMessage.MANIFEST_READER_UNKNOWN_FIELD_ERROR, fieldName );
return;
}
String fieldValue;
List<ManifestFieldValue> fieldAttributes = new ArrayList<>();
JsonNode fieldData = field.getValue();
if (fieldData.isValueNode()) {
fieldValue = fieldData.asText();
} else {
fieldValue = fieldData.get("value").asText();
//not only should the attributes object be present in the JSON, the field definition must also
//have attribute definitions in it.
if (fieldData.has("attributes") && !fieldDefinition.getFieldAttributes().isEmpty()) {
fieldData.get("attributes").fields().forEachRemaining(att -> {
String attName = att.getKey();
if( attName.matches( FIELD_NAME_REGEX ) )
return;
//find attribute definition in field's attributes definitions.
ManifestFieldDefinition attDef = fieldDefinition.getFieldAttributes().stream()
.filter(attFieldDef -> attFieldDef.getName().equalsIgnoreCase( attName ) ||
attFieldDef.matchSynonym( attName ))
.findFirst().orElse(null);
if (attDef == null) {
error( WebinCliMessage.MANIFEST_READER_UNKNOWN_ATTRIBUTE_FIELD_ERROR, attName );
return;
}
if (att.getValue().isArray()) {
att.getValue().elements().forEachRemaining(elements -> {
fieldAttributes.add(new ManifestFieldValue(attDef, elements.asText(), new ArrayList<>(),
new ValidationOrigin("file name", state.fileName)));
});
} else {
fieldAttributes.add(new ManifestFieldValue(attDef, att.getValue().asText(), new ArrayList<>(),
new ValidationOrigin("file name", state.fileName)));
}
});
}
}
if( fieldValue != null ) {
ManifestFieldValue manifestField = new ManifestFieldValue(fieldDefinition, fieldValue, fieldAttributes,
new ValidationOrigin("file name", state.fileName));
getValidationResult().create(manifestField.getOrigin());
if( manifestField.getDefinition().getType() == ManifestFieldType.FILE ) {
validateFileExists( inputDir, manifestField );
}
manifestReaderResult.getFields().add( manifestField );
}
});
} catch (IOException e) {
error( WebinCliMessage.MANIFEST_READER_MANIFEST_FILE_MALFORMED);
}
}
private void
validateManifest()
......@@ -348,6 +453,18 @@ ManifestReader<M extends Manifest> {
processor.process( result, fieldValue );
fieldValue.setValidFieldValueOrFileSuffix( result.isValid() );
}
//iterate over field attributes and run their processors.
for (ManifestFieldValue att : fieldValue.getAttributes()) {
ManifestFieldDefinition attDef = att.getDefinition();
for( ManifestFieldProcessor attProcessor : attDef.getFieldProcessors() )
{
ValidationResult result = getValidationResult().create(att.getOrigin());
attProcessor.process( result, att );
att.setValidFieldValueOrFileSuffix( result.isValid() );
}
}
}
// Validate file count.
......@@ -628,6 +745,20 @@ ManifestReader<M extends Manifest> {
return new File(fileName);
}
protected static List<Map.Entry<String, String>> getAttributes(ManifestReaderResult result, String fieldName) {
if (result.getField(fieldName) == null) {
return null;
}
return result.getField(fieldName).getAttributes().stream()
.map(attField -> {
Map<String, String> map = new HashMap<>();
map.put(attField.getName(), attField.getValue());
return map.entrySet().stream().findFirst().get();
}).collect(Collectors.toList());
}
/** Adds an error to the validation result.
*/
protected final void
......
......@@ -11,6 +11,9 @@
package uk.ac.ebi.ena.webin.cli.xml;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.jdom2.Element;
......@@ -25,17 +28,29 @@ public class XmlWriterHelper {
}
private static Element createFileElement(
String fileName, String fileType, String digest, String checksum) {
String fileName, String fileType, String digest, String checksum, List<Map.Entry<String, String>> attributes) {
Element e = new Element("FILE");
e.setAttribute("filename", fileName);
e.setAttribute("filetype", String.valueOf(fileType));
e.setAttribute("checksum_method", digest);
e.setAttribute("checksum", checksum);
if (attributes != null && !attributes.isEmpty()) {
attributes.stream()
.map(att -> createAttributeElement(att.getKey(), att.getValue()))
.filter(Objects::nonNull)
.forEach(attElement -> e.addContent(attElement));
}
return e;
}
public static Element createFileElement(
Path inputDir, Path uploadDir, Path file, String fileType) {
return createFileElement(inputDir, uploadDir, file, fileType, null);
}
public static Element createFileElement(
Path inputDir, Path uploadDir, Path file, String fileType) {
Path inputDir, Path uploadDir, Path file, String fileType, List<Map.Entry<String, String>> attributes) {
String path =
file.startsWith(inputDir) ? inputDir.relativize(file).toString() : file.toFile().getName();
......@@ -43,6 +58,21 @@ public class XmlWriterHelper {
String.valueOf(uploadDir.resolve(path)).replaceAll("\\\\+", "/"),
String.valueOf(fileType),
"MD5",
FileUtils.calculateDigest("MD5", file.toFile()));
FileUtils.calculateDigest("MD5", file.toFile()),
attributes);
}
private static Element createAttributeElement(String attName, String attValue) {
switch (attName) {
case "READ_TYPE": {
Element e = new Element(attName);
e.addContent(attValue);
return e;
}
default:
return null;
}
}
}
......@@ -10,12 +10,28 @@
*/
package uk.ac.ebi.ena.webin.cli;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import java.io.File;
import java.nio.file.Path;
public class ManifestBuilder {
private boolean isNew;
private String manifest = "";
private ObjectNode jsonManifest;
public ManifestBuilder newFormat() {
isNew = true;
return this;
}
public ManifestBuilder manifest(String manifest) {
if (manifest != null) {
this.manifest += manifest;
......@@ -34,11 +50,67 @@ public class ManifestBuilder {
return field("NAME", String.format("TEST %X", System.currentTimeMillis()));
}
public ManifestBuilder field(String field, String value) {
if (field != null && value != null) {
manifest += field + "\t" + value + "\n";
if (!isNew) {
if (field != null && value != null) {
manifest += field + "\t" + value + "\n";
}
} else {
if (jsonManifest == null) {
jsonManifest = new ObjectMapper().createObjectNode();
}
jsonManifest.put(field, value);
}
return this;
}
public ManifestBuilder attribute(String field, String attributeKey, String attributeValue) {
if (!isNew) {
return this;
}
JsonNode fieldNode = jsonManifest.get(field);
if (fieldNode == null) {
throw new RuntimeException("Field not found : " + field);
}