Commit 6fe144d2 authored by Hermann Zellner's avatar Hermann Zellner

Merge branch 'TRM-24976-ITTest-VJ' into 'master'

Add integration tests for Unifire and PIRSR app

Closes TRM-24976

See merge request uniprot-public/unifire!15
parents 537c2e27 d178a81e
/*
* Copyright (c) 2018 European Molecular Biology Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package uk.ac.ebi.uniprot.urml.core.utils;
import java.util.*;
......
/*
* Copyright (c) 2018 European Molecular Biology Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package uk.ac.ebi.uniprot.urml.core.xml.writers;
import uk.ac.ebi.uniprot.urml.core.xml.schema.JAXBContextInitializationException;
......
/*
* Copyright (c) 2018 European Molecular Biology Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package uk.ac.ebi.uniprot.urml.core.utils;
import java.util.ArrayList;
......
/*
* Copyright (c) 2018 European Molecular Biology Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package uk.ac.ebi.uniprot.urml.core.xml.writers;
import org.junit.jupiter.api.Test;
......@@ -22,7 +39,7 @@ class URMLFactWriterTest {
File tempDir;
@Test
public void testShouldVerifyThatFactSuperSetIsWrittenSuccessfully() throws FileNotFoundException, JAXBException, XMLStreamException {
void testShouldVerifyThatFactSuperSetIsWrittenSuccessfully() throws FileNotFoundException, JAXBException, XMLStreamException {
//given
File outputFile = new File(tempDir.getAbsolutePath()+"/test.xml");
......
......@@ -122,6 +122,19 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<configuration>
<excludes>
<exclude>org/drools/compiler/**/*</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
......
/*
* Copyright (c) 2018 European Molecular Biology Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.proteininformationresource.pirsr;
/**
......
/*
* Copyright (c) 2018 European Molecular Biology Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.proteininformationresource.pirsr;
import uk.ac.ebi.uniprot.urml.core.utils.SelectorEnum;
......@@ -57,7 +74,7 @@ public class PIRSRApp {
exit(0);
}
PIRSRRunner pirsrRunner = null;
PIRSRRunner pirsrRunner;
try {
CommandLineParser parser = new DefaultParser();
CommandLine cmd = parser.parse(options, args);
......@@ -68,15 +85,15 @@ public class PIRSRApp {
InputType inputType = parseOption(cmd, inputTypeOption, InputTypeChecker::check,
InputType.INTERPROSCAN_XML);
File hmmalignCommand = parseOption(cmd, hmmalignOption, FileCreatorChecker::createAndCheck, null);
pirsrRunner = new PIRSRRunner(pirsrDataDirectory, inputFactFile, inputType, outputDirectory,
hmmalignCommand);
} catch (Exception e) {
pirsrRunner = new PIRSRRunner(pirsrDataDirectory, inputFactFile, inputType, outputDirectory, hmmalignCommand);
pirsrRunner.run();
} catch (ParseException e) {
logger.error(e.getMessage());
displayUsage(options);
exit(1);
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
pirsrRunner.run();
}
private static void displayUsage(Options options) {
......
/*
* Copyright (c) 2018 European Molecular Biology Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.proteininformationresource.pirsr;
import uk.ac.ebi.uniprot.urml.core.utils.FactMerger;
......@@ -332,7 +349,7 @@ public class PIRSRRunner {
}
private void logArguments() {
logger.info("Launching PRISR with:");
logger.info("Launching PIRSR with:");
logger.info(" PIRSR Data Directory = {}", this.pirsrDataDirectory.getAbsolutePath());
logger.info(" Input fact file = {}", this.inputFactFile.getAbsolutePath());
logger.info(" Output Directory = {}", this.outputDirectory.getAbsolutePath());
......
/*
* Copyright (c) 2018 European Molecular Biology Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package uk.ac.ebi.uniprot.unifire;
import org.uniprot.urml.facts.*;
......
......@@ -16,18 +16,18 @@
package uk.ac.ebi.uniprot.unifire;
import com.google.common.base.Strings;
import org.apache.commons.cli.*;
import org.drools.core.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.ebi.uniprot.urml.core.utils.SelectorEnum;
import uk.ac.ebi.uniprot.urml.input.InputType;
import uk.ac.ebi.uniprot.urml.output.OutputFormat;
import com.google.common.base.Strings;
import java.io.File;
import java.util.Comparator;
import java.util.function.Function;
import org.apache.commons.cli.*;
import org.drools.core.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.google.common.primitives.Booleans.trueFirst;
import static java.lang.System.exit;
......@@ -50,30 +50,14 @@ public class UniFireApp {
public static void main(String[] args) throws Exception {
Options options = new Options();
Option ruleFileOption = Option.builder("r").longOpt("rules").hasArg().argName("RULE_URML_FILE")
.desc("Rule base file (path) provided by UniProt (e.g UniRule or ARBA) (format: URML).")
.type(File.class).required().build();
Option inputFileOption = Option.builder("i").longOpt("input").hasArg().argName("INPUT_FILE")
.desc("Input file (path) containing the proteins to annotate and required data, in the format specified by the -s option.")
.type(File.class).required().build();
Option outputFileOption = Option.builder("o").longOpt("output").hasArg().argName("OUTPUT_FILE")
.desc("Output file (path) containing predictions in the format specified in the -f option.")
.type(File.class).required().build();
Option inputSourceOption = Option.builder("s").longOpt("input-source").hasArg().argName("INPUT_SOURCE")
.desc("Input source type. Supported input sources are:\n"+ prettyPrint(InputType.values(), DEFAULT_INPUT_TYPE)+".")
.type(String.class).build();
Option outputFormatOption = Option.builder("f").longOpt("output-format").hasArg().argName("OUTPUT_FORMAT")
.desc("Output file format. Supported formats are:\n"+prettyPrint(OutputFormat.values(), DEFAULT_OUTPUT_FORMAT)+".")
.type(String.class).build();
Option templateFileOption = Option.builder("t").longOpt("templates").hasArg().argName("TEMPLATE_FACTS")
.desc("UniRule template sequence matches, provided by UniProt (format: Fact Model XML).")
.type(File.class).build();
Option inputChunkSizeOption = Option.builder("n").longOpt("chunksize").hasArg().argName("INPUT_CHUNK_SIZE")
.desc("Chunk size (number of proteins) to be batch processed simultaneously \n(default: "+DEFAULT_CHUNK_SIZE+").")
.type(Integer.class).build();
Option memoryOption = Option.builder("m").hasArg().argName("MAX_MEMORY")
.desc("Max size of the memory allocation pool in MB (JVM -Xmx) \n(default: "+DEFAULT_MAX_MEMORY+" MB).")
.type(Integer.class).build();
Option ruleFileOption = Option.builder("r").longOpt("rules").hasArg().argName("RULE_URML_FILE").desc("Rule base file (path) provided by UniProt (e.g UniRule or ARBA) (format: URML).").type(File.class).required().build();
Option inputFileOption = Option.builder("i").longOpt("input").hasArg().argName("INPUT_FILE").desc("Input file (path) containing the proteins to annotate and required data, in the format specified by the -s option.").type(File.class).required().build();
Option outputFileOption = Option.builder("o").longOpt("output").hasArg().argName("OUTPUT_FILE").desc("Output file (path) containing predictions in the format specified in the -f option.").type(File.class).required().build();
Option inputSourceOption = Option.builder("s").longOpt("input-source").hasArg().argName("INPUT_SOURCE").desc("Input source type. Supported input sources are:\n" + prettyPrint(InputType.values(), DEFAULT_INPUT_TYPE) + ".").type(String.class).build();
Option outputFormatOption = Option.builder("f").longOpt("output-format").hasArg().argName("OUTPUT_FORMAT").desc("Output file format. Supported formats are:\n" + prettyPrint(OutputFormat.values(), DEFAULT_OUTPUT_FORMAT) + ".").type(String.class).build();
Option templateFileOption = Option.builder("t").longOpt("templates").hasArg().argName("TEMPLATE_FACTS").desc("UniRule template sequence matches, provided by UniProt (format: Fact Model XML).").type(File.class).build();
Option inputChunkSizeOption = Option.builder("n").longOpt("chunksize").hasArg().argName("INPUT_CHUNK_SIZE").desc("Chunk size (number of proteins) to be batch processed simultaneously \n(default: " + DEFAULT_CHUNK_SIZE + ").").type(Integer.class).build();
Option memoryOption = Option.builder("m").hasArg().argName("MAX_MEMORY").desc("Max size of the memory allocation pool in MB (JVM -Xmx) \n(default: " + DEFAULT_MAX_MEMORY + " MB).").type(Integer.class).build();
Option helpOption = Option.builder("h").longOpt("help").desc("Print this usage.").build();
options.addOption(ruleFileOption);
......@@ -86,9 +70,9 @@ public class UniFireApp {
options.addOption(memoryOption);
options.addOption(helpOption);
UniFireRunner uniFireRunner = null;
UniFireRunner uniFireRunner;
if (hasHelp(helpOption, args)){
if (hasHelp(helpOption, args)) {
displayUsage(options);
exit(0);
}
......@@ -106,13 +90,14 @@ public class UniFireApp {
File templateFactFile = parseOption(cmd, templateFileOption, FileCreatorChecker::createAndCheck, null);
uniFireRunner = new UniFireRunner(ruleBaseFile, inputFactFile, outputFactFile, inputSource, outputFormat, inputChunkSize, templateFactFile);
} catch (Exception e){
uniFireRunner.run();
} catch (ParseException e) {
logger.error(e.getMessage());
displayUsage(options);
exit(1);
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
uniFireRunner.run();
}
private static void displayUsage(Options options){
......
/*
* Copyright (c) 2018 European Molecular Biology Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package matchers;
import org.xmlunit.diff.DefaultNodeMatcher;
import org.xmlunit.diff.ElementSelector;
import org.xmlunit.diff.ElementSelectors;
import org.xmlunit.diff.NodeMatcher;
import java.util.Collections;
/**
* @author Vishal Joshi
*/
public class NodeMatcherBuilder {
public static NodeMatcher factXMLNodeMatcher() {
ElementSelector proteinSelector = buildSelector("protein");
ElementSelector signatureSelector = buildSelector("signature");
ElementSelector frequencySelector = buildSelector("frequency");
ElementSelector posStartSelector = buildSelector("positionStart");
ElementSelector posEndSelector = buildSelector("positionEnd");
ElementSelector alignmentSelector = buildSelector("alignment");
ElementSelector sequenceSelector = buildSelector("sequence");
ElementSelector organismSelector = buildSelector("organism");
ElementSelector scientificNameSelector = buildSelector("scientificName");
ElementSelector lineageSelector = buildSelector("lineage");
return new DefaultNodeMatcher(ElementSelectors.and(proteinSelector, signatureSelector, frequencySelector, posStartSelector, posEndSelector, alignmentSelector, sequenceSelector, organismSelector, scientificNameSelector, lineageSelector), ElementSelectors.byNameAndText);
}
public static NodeMatcher unifireXMLNodeMatcher() {
ElementSelector proteinSelector = buildSelector("protein");
ElementSelector evidenceSelector = buildSelector("evidence");
ElementSelector typeSelector = buildSelector("type");
ElementSelector valueSelector = buildSelector("value");
ElementSelector posStartSelector = buildSelector("positionStart");
ElementSelector posEndSelector = buildSelector("positionEnd");
return new DefaultNodeMatcher(ElementSelectors.and(proteinSelector, evidenceSelector, typeSelector, valueSelector, posStartSelector, posEndSelector), ElementSelectors.byNameAndText);
}
private static ElementSelector buildSelector(String xmlElement) {
return ElementSelectors.conditionalBuilder().whenElementIsNamed("fact").thenUse(ElementSelectors.byXPath("./x:" + xmlElement, Collections.singletonMap("x", "http://uniprot.org/urml/facts"), ElementSelectors.byNameAndText)).build();
}
}
/*
* Copyright (c) 2018 European Molecular Biology Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.proteininformationresource.pirsr;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.util.stream.Stream;
import static org.hamcrest.Matchers.containsString;
import static org.junit.Assert.assertThat;
/**
* @author Vishal Joshi
*/
class PIRSRAppFailureCasesIntegrationTest {
private final PrintStream standardOut = System.out;
private final ByteArrayOutputStream outputStreamCaptor = new ByteArrayOutputStream();
@BeforeEach
public void setUp() {
System.setOut(new PrintStream(outputStreamCaptor));
}
@ParameterizedTest
@MethodSource("invalidOrIncompleteArguments")
void testShouldVerifyThatIfRequiredArgumentsAreNotPassedTheAppFailsWithAnError(String inCompleteArguments,
String missingArgument) throws Exception {
//when
String[] argsArray = inCompleteArguments.split(" ");
PIRSRApp.main(argsArray);
//then
String expected = "Missing required option: "+missingArgument+" \n" +
"usage: pirsr -a <HMMALIGN> -d <PIRSR_DATA_DIR> -i <INPUT_FILE> -o <OUTPUT_DIR> [-t <INPUT_TYPE>]\n" +
" [-h]\n" +
"----------------------------------------------------------------------------------------------------\n" +
" -a,--hmmalign <HMMALIGN> Path to hmmalign command.\n" +
" -d,--pirsr_data_dir <PIRSR_DATA_DIR> Directory for PIRSR data.\n" +
" -i,--input_file <INPUT_FILE> Input file (path) containing the proteins to annotate\n" +
" and required data in InterProScan Output XML format.\n" +
" -o,--output_dir <OUTPUT_DIR> Directory for SRHMM hmmalign result and enhanced\n" +
" IPRScan Facts XML file.\n" +
" -t,--input_type <INPUT_TYPE> Type of the input file provided by -i option.\n" +
" Supported Input types are\n" +
" - InterProScan (InterProScan Output XML)\n" +
" - XML (Input Fact XML)\n" +
" (default: InterProScan)\n" +
" -h,--help Print this usage.\n" +
"----------------------------------------------------------------------------------------------------";
assertThat(outputStreamCaptor.toString().trim(), containsString(expected));
}
private static Stream<Arguments> invalidOrIncompleteArguments() {
return Stream.of(
Arguments.of("-i /fasta/file/path -d /pirsr/data/dir -a /hmmalign/path", "o"),
Arguments.of("-i /fasta/file/path -d /pirsr/data/dir -o /output/dir", "a"),
Arguments.of("-i /fasta/file/path -a /hmm/align -o /output/dir", "d"),
Arguments.of("-d /pirsr/data/dir -a /hmm/align -o /output/dir", "i")
);
}
@AfterEach
public void tearDown() {
System.setOut(standardOut);
}
}
\ No newline at end of file
/*
* Copyright (c) 2018 European Molecular Biology Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.proteininformationresource.pirsr;
import matchers.NodeMatcherBuilder;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.xmlunit.matchers.CompareMatcher;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.function.Consumer;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.core.Is.is;
import static org.hamcrest.core.IsEqual.equalTo;
/**
* @author Vishal Joshi
*/
class PIRSRAppIntegrationTest {
@TempDir
File outputDir;
static String hmmAlignPath;
@BeforeAll
public static void setUpWholeClass() throws IOException, InterruptedException {
String inputDirPath = PIRSRAppIntegrationTest.class.getResource("/pirsrapp/input/").getPath();
String hmmmerSourcePath = inputDirPath + "hmmer-3.3.2/";
hmmAlignPath = hmmmerSourcePath + "/bin/hmmalign";
if (!new File(hmmAlignPath).exists()) {
ProcessBuilder builder = new ProcessBuilder();
builder.command("tar", "zxf", "hmmer.tar.gz");
builder.directory(new File(inputDirPath));
Process process = builder.start();
StreamGobbler streamGobbler = new StreamGobbler(process.getInputStream(), System.out::println);
Executors.newSingleThreadExecutor().submit(streamGobbler);
int exitCode = process.waitFor();
assertThat("hmmer tar didn't untar successfully", exitCode, is(equalTo(0)));
builder.command("./configure", "--prefix", hmmmerSourcePath);
builder.directory(new File(hmmmerSourcePath));
process = builder.start();
streamGobbler = new StreamGobbler(process.getInputStream(), System.out::println);
Executors.newSingleThreadExecutor().submit(streamGobbler);
exitCode = process.waitFor();
assertThat("hmmer configure didn't go successfully", exitCode, is(equalTo(0)));
builder = new ProcessBuilder();
builder.command("make", "install");
builder.directory(new File(hmmmerSourcePath));
process = builder.start();
streamGobbler = new StreamGobbler(process.getInputStream(), System.out::println);
Executors.newSingleThreadExecutor().submit(streamGobbler);
exitCode = process.waitFor();
assertThat("hmmer make install didn't go successfully", exitCode, is(equalTo(0)));
}
}
@Test
void testShouldVerifyThatDefaultInterproScanOutputXmlMatchesTheExpectedFile() throws Exception {
//given
String inputIprScanFile = this.getClass().getResource("/pirsrapp/input/pirsr_data/PIRSR-input-iprscan.xml").getPath();
String pirsrDataDir = this.getClass().getResource("/pirsrapp/input/pirsr_data/").getPath();
String outputPath = outputDir.getPath();
List<String> args = new ArrayList<>();
args.add("-i");
args.add(inputIprScanFile);
args.add("-d");
args.add(pirsrDataDir);
args.add("-a");
args.add(hmmAlignPath);