Commit d63954d0 authored by Hermann Zellner's avatar Hermann Zellner

TRM-24502

- PIRSRRunner: Extracted and simplified the logic to merge facts, in this case adding PositionalProteinSignature
- Avoid duplications in the facts coming from FactMerger
parent a6bc2e1f
......@@ -153,8 +153,7 @@ $ ./distribution/bin/unifire.sh -r samples/unirule-urml-latest.xml -i samples/in
**Example with ARBA rules & Fact XML input:**
``` bash
$ ./distribution/bin/unifire.sh -r samples/arba-urml-latest.xml -i samples/input_facts.xml -s XML -o
output_arba_annotations.csv
$ ./distribution/bin/unifire.sh -r samples/arba-urml-latest.xml -i samples/input_facts.xml -s XML -o output_arba_annotations.csv
```
<br/>
......
package uk.ac.ebi.uniprot.urml.core.utils;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.uniprot.urml.facts.Fact;
import org.uniprot.urml.facts.FactSet;
import org.uniprot.urml.facts.PositionalProteinSignature;
/**
* Created by Hermann Zellner on 21/07/2020.
*/
public class FactMerger {
public List<Fact> merge(Iterator<FactSet> factSetIterator, List<PositionalProteinSignature> factsToAdd) {
Iterable<FactSet> factSetIterable = () -> factSetIterator;
Set<Fact> facts = StreamSupport.stream(factSetIterable.spliterator(), false)
.flatMap(e -> e.getFact().stream())
.collect(Collectors.toCollection(HashSet::new));
facts.addAll(factsToAdd);
return new ArrayList<>(facts);
}
}
......@@ -3,6 +3,7 @@ package uk.ac.ebi.uniprot.urml.core.xml.writers;
import uk.ac.ebi.uniprot.urml.core.xml.schema.JAXBContextInitializationException;
import com.sun.xml.txw2.output.IndentingXMLStreamWriter;
import java.io.IOException;
import java.io.OutputStream;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
......@@ -44,7 +45,7 @@ public abstract class AbstractURMLWriter<S, E> implements URMLWriter<S, E> {
}
@Override
public void close() throws Exception {
public void close() throws IOException, XMLStreamException {
os.close();
xmlStreamWriter.close();
}
......
package uk.ac.ebi.uniprot.urml.core.utils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.uniprot.urml.facts.*;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.is;
import static org.uniprot.urml.facts.SignatureType.*;
/**
* Created by Hermann Zellner on 21/07/2020.
*/
public class FactMergerTest {
private static Organism organism_1;
private static Organism organism_2;
private static String sequence_1 =
"MSNLLKLGRGLTVAVITTSVLSGCSYVIKSGANIALNFGENHIVPPILKMDDVDMICNSGSSLTPVVMSTKDMGADPTRMAVLLYAASGMCAENQALEQELRYLRASKAGQVSEAQDARIEQKRWAAVAAERQYSGYQLFAKRWETKYKYHLGDSCPTMRNDLDQTIYMLGLISGLQAVTNDINSGGAVNVPKDIAGIVERSMTCLDNNKYWGVPNATRAVIWTLLPGAGDGKPDPYVTLKQSVQIGEQKGVRLSHALYAVAAQASGDDSKIRDALKTYDASRADDKPVNPDFKLLDAMAGVMIQGISDRYWTEHTGIRTEDGGMSTFWDEQNKSSELEDLFNSDGGAASEPAASDKTAAK";;
private static String sequence_2 =
"MNKDHFLISRIIFIAWCASISACMQVFSDRFIYWREDLPQQFWRFWTAHWVHVGWMHFVLNMLAFACLPFIFPQSKNWQLLILILCISPIISLGFYWFMPYISAYAGFSGVLHGLYVAVALVSLKYKKERNFAGLVLGLVIAKIVWENTFGNTGTAQLIGSPVLIESHLLGALSGALAGSVYLCWIKLKVRVS";
private static Protein protein_1;
private static Protein protein_2;
private static Protein protein_3;
private static ProteinSignature proteinSignature_1_1;
private static ProteinSignature proteinSignature_1_2;
private static ProteinSignature proteinSignature_2_1;
private static ProteinSignature proteinSignature_2_2;
private static ProteinSignature proteinSignature_3_1;
private static ProteinSignature proteinSignature_3_2;
private static PositionalProteinSignature positionalProteinSignature_1;
private static PositionalProteinSignature positionalProteinSignature_2;
FactMerger factMerger = new FactMerger();
@BeforeAll
public static void setUp() {
organism_1 = createOrganism("1", "Acinetobacter baylyi ADP1",
Arrays.asList(1, 131567, 2, 1224, 1236, 72274, 468, 469, 202950, 62977));
organism_2 = createOrganism("1", "Pasteurella multocida (strain Pm70)",
Arrays.asList(1,131567,2,1224,1236,135625,712,745,747,44283,272843));
protein_1 = createProtein("Q6F7U4", organism_1, sequence_1, "ACIAD3186");
protein_2 = createProtein("Q6F7U5", organism_1, sequence_2, "ACIAD3185");
protein_3 = createProtein("Q6F7U5", organism_2, sequence_2, "ACIAD3185");
proteinSignature_1_1 = createProteinSignature(protein_1, GENE_3_D, "G3DSA:1.20.1540.10", 1);
proteinSignature_1_2 = createProteinSignature(protein_1, PFAM, "PF01694", 1);
proteinSignature_2_1 = createProteinSignature(protein_2, TIGRFAM, "TIGR03902", 1);
proteinSignature_2_2 = createProteinSignature(protein_2, INTER_PRO, "IPR022764", 1);
proteinSignature_3_1 = createProteinSignature(protein_3, TIGRFAM, "TIGR03902", 1);
proteinSignature_3_2 = createProteinSignature(protein_3, INTER_PRO, "IPR022764", 1);
positionalProteinSignature_1 = createPositionalProteinSignature(
protein_1, SRHMM, "SRHMM000077-4", 1, 1, 141,
"mivvcpscgaknrvpenklaeqpqcgqchakllplap---IELNEQNFSHFITYSDLPVLIDLWAEWCGPCKMMAPHFAHVAAQ-NPQVIFAKINTETSPRLSQAFHVRSIPTLVLMNKTTEIARISGALRSTELQQWLDQQ-lhs"
);
positionalProteinSignature_2 = createPositionalProteinSignature(
protein_3, SRHMM, "SRHMM006247-1", 1, 1, 487,
"m--RILSIIRIVGILVMCFSLTMLAPAFVALLYGDGGGKAFMQTFVMSAIVGMLLWWPCHHHKEE-LRSRDGFLIVVAFWLVLGSIGAIPFMLFEKPHLSFSSAIFESFSGLTTTGATVIEGLDQLPKAILFYRQLLQWLGGMGIIVLAVAIIPLLGIGGTQLYRAESSGPlKEQKLRPRIAEVAKLLWILYFSLTVLCAIAYWFAGMNAFDAIGHSFSTVANGGFSTHDASMGYFNNATIYLITTFFMLIAGVNFNLHISALTylGKQslwKNYWKDPEFRFFVAIQVLFILLFSLSLYFYDVLTNLSDAFIQGSLQLTSMSMTAGYSIFDMNNLPAFSAMLLVIASVIGGCGGSTTGGLKTIRVLILWLQVKRELRSLVHPNLVQPIKLGQNILPIRMLESIWAFLMIFILVYWVCVFAVILCGMDVFDAMGSVFATLTNAGPGLG--VIHQDFLNVPESAKIVFAFAMICGRLEIFSLLVLFTPTFWK-e"
);
}
@Test
public void emptyFactSetMergedWithEmptyMap() {
FactSet factSet = new FactSet();
Iterator<FactSet> factSetIterator = Arrays.asList(factSet).iterator();
List<PositionalProteinSignature> factsToAdd = new ArrayList<>();
List<Fact> mergedFacts = factMerger.merge(factSetIterator, factsToAdd);
assertThat(mergedFacts.size(), is(0));
}
@Test
public void emtryFactSetMergedWithOneAlignment() {
FactSet factSet = new FactSet();
Iterator<FactSet> factSetIterator = Arrays.asList(factSet).iterator();
List<PositionalProteinSignature> factsToAdd = Arrays.asList(positionalProteinSignature_1);
List<Fact> mergedFacts = factMerger.merge(factSetIterator, factsToAdd);
assertThat(mergedFacts.size(), is(1));
assertThat(mergedFacts, containsInAnyOrder(positionalProteinSignature_1));
}
@Test
public void oneProteinMergedWithEmptyMap() {
FactSet factSet = new FactSet();
factSet.setFact(Arrays.asList(protein_1, organism_1, proteinSignature_1_1, proteinSignature_1_2));
Iterator<FactSet> factSetIterator = Arrays.asList(factSet).iterator();
List<PositionalProteinSignature> factsToAdd = new ArrayList<>();
List<Fact> mergedFacts = factMerger.merge(factSetIterator, factsToAdd);
assertThat(mergedFacts.size(), is(4));
assertThat(mergedFacts, containsInAnyOrder(
protein_1, organism_1, proteinSignature_1_1, proteinSignature_1_2
));
}
@Test
public void oneProteinMergedWithOneAlignment() {
FactSet factSet = new FactSet();
factSet.setFact(Arrays.asList(protein_1, organism_1, proteinSignature_1_1, proteinSignature_1_2));
Iterator<FactSet> factSetIterator = Arrays.asList(factSet).iterator();
List<PositionalProteinSignature> factsToAdd = Arrays.asList(positionalProteinSignature_1);
List<Fact> mergedFacts = factMerger.merge(factSetIterator, factsToAdd);
assertThat(mergedFacts.size(), is(5));
assertThat(mergedFacts, containsInAnyOrder(protein_1, organism_1, proteinSignature_1_1, proteinSignature_1_2,
positionalProteinSignature_1));
}
@Test
public void oneProteinNotMergedWithAlignmentForOtherProtein() {
FactSet factSet = new FactSet();
factSet.setFact(Arrays.asList(protein_1, organism_1, proteinSignature_1_1, proteinSignature_1_2));
Iterator<FactSet> factSetIterator = Arrays.asList(factSet).iterator();
List<PositionalProteinSignature> factsToAdd = Arrays.asList(positionalProteinSignature_1);
List<Fact> mergedFacts = factMerger.merge(factSetIterator, factsToAdd);
assertThat(mergedFacts.size(), is(5));
assertThat(mergedFacts, containsInAnyOrder(
protein_1, organism_1, proteinSignature_1_1, proteinSignature_1_2, positionalProteinSignature_1
));
}
@Test
public void twoProteinsWithSameOrganismWithAlignments() {
FactSet factSet = new FactSet();
factSet.setFact(Arrays.asList(
protein_1, organism_1, proteinSignature_1_1, proteinSignature_1_2,
protein_2, organism_1, proteinSignature_2_1, proteinSignature_2_2
));
Iterator<FactSet> factSetIterator = Arrays.asList(factSet).iterator();
List<PositionalProteinSignature> factsToAdd = Arrays.asList(positionalProteinSignature_1);
List<Fact> mergedFacts = factMerger.merge(factSetIterator, factsToAdd);
assertThat(mergedFacts.size(), is(8));
assertThat(mergedFacts, containsInAnyOrder(
protein_1, organism_1, proteinSignature_1_1, proteinSignature_1_2, positionalProteinSignature_1,
protein_2, proteinSignature_2_1, proteinSignature_2_2
));
}
@Test
public void twoProteinsWithDifferentOrganismWithAlignments() {
FactSet factSet = new FactSet();
factSet.setFact(Arrays.asList(
protein_1, organism_1, proteinSignature_1_1, proteinSignature_1_2,
protein_3, organism_2, proteinSignature_3_1, proteinSignature_3_2
));
Iterator<FactSet> factSetIterator = Arrays.asList(factSet).iterator();
List<PositionalProteinSignature> factsToAdd = Arrays.asList(positionalProteinSignature_1,
positionalProteinSignature_2);
List<Fact> mergedFacts = factMerger.merge(factSetIterator, factsToAdd);
assertThat(mergedFacts.size(), is(10));
assertThat(mergedFacts, containsInAnyOrder(
protein_1, organism_1, proteinSignature_1_1, proteinSignature_1_2, positionalProteinSignature_1,
protein_3, organism_2, proteinSignature_3_1, proteinSignature_3_2, positionalProteinSignature_2
));
}
private static PositionalProteinSignature createPositionalProteinSignature(Protein protein, SignatureType type,
String value,
int frequency, int start, int end, String alignment) {
PositionalProteinSignature positionalProteinSignature = new PositionalProteinSignature();
positionalProteinSignature.setProtein(protein);
Signature signature = new Signature();
signature.setType(type);
signature.setValue(value);
positionalProteinSignature.setSignature(signature);
positionalProteinSignature.setFrequency(frequency);
positionalProteinSignature.setPositionStart(start);
positionalProteinSignature.setPositionEnd(end);
SequenceAlignment sequenceAlignment = new SequenceAlignment();
sequenceAlignment.setValue(alignment);
positionalProteinSignature.setAlignment(sequenceAlignment);
return positionalProteinSignature;
}
private static ProteinSignature createProteinSignature(Protein protein, SignatureType type, String value, int frequency) {
ProteinSignature proteinSignature = new ProteinSignature();
proteinSignature.setProtein(protein);
Signature signature = new Signature();
signature.setType(type);
signature.setValue(value);
proteinSignature.setSignature(signature);
proteinSignature.setFrequency(frequency);
return proteinSignature;
}
private static Protein createProtein(String id, Organism organism, String sequence, String oln) {
Protein protein = new Protein();
protein.setId(id);
protein.setOrganism(organism);
GeneInformation geneInformation = createGeneInformation(oln);
protein.setGene(geneInformation);
ProteinSequence proteinSequence = createProteinSequence(sequence, false);
protein.setSequence(proteinSequence);
return protein;
}
private static GeneInformation createGeneInformation(String oln) {
GeneInformation geneInformation = new GeneInformation();
geneInformation.setOrfOrOlnNames(Arrays.asList(oln));
return geneInformation;
}
private static ProteinSequence createProteinSequence(String sequence, boolean isFragment) {
ProteinSequence proteinSequence = new ProteinSequence();
proteinSequence.setValue(sequence);
proteinSequence.setLength(sequence.length());
proteinSequence.setIsFragment(isFragment);
return proteinSequence;
}
private static Organism createOrganism(String id, String scientificName, List<Integer> taxIds) {
Organism organism = new Organism();
organism.setId(id);
organism.setScientificName(scientificName);
Lineage lineage = new Lineage();
lineage.setIds(taxIds);
organism.setLineage(lineage);
return organism;
}
}
package org.proteininformationresource.pirsr;
import static com.google.common.primitives.Booleans.trueFirst;
import static java.lang.System.exit;
import uk.ac.ebi.uniprot.urml.core.utils.SelectorEnum;
import uk.ac.ebi.uniprot.urml.input.InputType;
import com.google.common.base.Strings;
import java.io.File;
import java.util.Comparator;
import java.util.function.Function;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.MissingArgumentException;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.*;
import org.drools.core.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Strings;
import uk.ac.ebi.uniprot.urml.input.InputType;
import static com.google.common.primitives.Booleans.trueFirst;
import static java.lang.System.exit;
/**
* Entry point for the PIRSR application
......@@ -40,6 +32,11 @@ public class PIRSRApp {
Option inputFileOption = Option.builder("i").longOpt("input_file").hasArg().argName("INPUT_FILE")
.desc("Input file (path) containing the proteins to annotate and required data in " + DEFAULT_INPUT_TYPE.getDescription() + " format.")
.type(File.class).required().build();
Option inputTypeOption = Option.builder("t").longOpt("input_type").hasArg().argName("INPUT_TYPE")
.desc(String.format("Type of the input file provided by -i option. Supported Input types are " +
"\n%s\n%s %s", prettyPrint(InputType.INTERPROSCAN_XML),
prettyPrint(InputType.FACT_XML), printDefault(InputType.INTERPROSCAN_XML)))
.type(InputType.class).optionalArg(true).build();
Option pirsrDataDirOption = Option.builder("d").longOpt("pirsr_data_dir").hasArg().argName("PIRSR_DATA_DIR").desc("Directory for PIRSR data.")
.type(File.class).required().build();
Option outputDirOption = Option.builder("o").longOpt("output_dir").hasArg().argName("OUTPUT_DIR")
......@@ -50,6 +47,7 @@ public class PIRSRApp {
options.addOption(pirsrDataDirOption);
options.addOption(inputFileOption);
options.addOption(inputTypeOption);
options.addOption(hmmalignOption);
options.addOption(outputDirOption);
options.addOption(helpOption);
......@@ -67,9 +65,12 @@ public class PIRSRApp {
File outputDirectory = parseOption(cmd, outputDirOption, File::new, null);
File pirsrDataDirectory = parseOption(cmd, pirsrDataDirOption, FileCreatorChecker::createAndCheck, null);
File inputFactFile = parseOption(cmd, inputFileOption, FileCreatorChecker::createAndCheck, null);
InputType inputType = parseOption(cmd, inputTypeOption, InputTypeChecker::check,
InputType.INTERPROSCAN_XML);
File hmmalignCommand = parseOption(cmd, hmmalignOption, FileCreatorChecker::createAndCheck, null);
pirsrRunner = new PIRSRRunner(pirsrDataDirectory, inputFactFile, outputDirectory, hmmalignCommand);
pirsrRunner = new PIRSRRunner(pirsrDataDirectory, inputFactFile, inputType, outputDirectory,
hmmalignCommand);
} catch (Exception e) {
logger.error(e.getMessage());
displayUsage(options);
......@@ -126,4 +127,32 @@ public class PIRSRApp {
}
}
private static class InputTypeChecker {
static InputType check(String type) {
if ( type == null || type.equals("InterProScan") ) {
return InputType.INTERPROSCAN_XML;
}
else if (type.equals("XML")) {
return InputType.FACT_XML;
}
else {
throw new IllegalArgumentException(
String.format("Invalid input type %s. Must be InterProScan or XML", type));
}
}
}
private static String prettyPrint(SelectorEnum selectorEnum){
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append(" - ").append(selectorEnum.getCode()).append(" (").append(selectorEnum.getDescription())
.append(")\n");
return stringBuilder.toString();
}
private static String printDefault(SelectorEnum defaultValue) {
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append("(default: ").append(defaultValue.getCode()).append(")");
return stringBuilder.toString();
}
}
package org.proteininformationresource.pirsr;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.Collections;
import uk.ac.ebi.uniprot.urml.core.utils.FactMerger;
import uk.ac.ebi.uniprot.urml.core.xml.writers.URMLFactWriter;
import uk.ac.ebi.uniprot.urml.input.InputType;
import uk.ac.ebi.uniprot.urml.input.parsers.FactSetParser;
import java.io.*;
import java.util.*;
import java.util.Map.Entry;
import javax.xml.bind.JAXBException;
import javax.xml.stream.XMLStreamException;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.uniprot.urml.facts.Fact;
import org.uniprot.urml.facts.FactSet;
import org.uniprot.urml.facts.PositionalProteinSignature;
import org.uniprot.urml.facts.Protein;
import org.uniprot.urml.facts.ProteinSignature;
import org.uniprot.urml.facts.SequenceAlignment;
import org.uniprot.urml.facts.Signature;
import org.uniprot.urml.facts.SignatureType;
import uk.ac.ebi.uniprot.urml.core.xml.writers.URMLFactWriter;
import uk.ac.ebi.uniprot.urml.input.InputType;
import uk.ac.ebi.uniprot.urml.input.parsers.FactSetParser;
import org.uniprot.urml.facts.*;
/**
* Runner to launch the PIRSR application
......@@ -49,18 +25,21 @@ public class PIRSRRunner {
private static final Logger logger = LoggerFactory.getLogger(PIRSRRunner.class);
private final File pirsrDataDirectory;
private final File inputFactFile;
private final InputType inputType;
private final File outputDirectory;
private final File hmmalignCommand;
public PIRSRRunner(File pirsrDataDirectory, File inputFactFile, File outputDirectory, File hmmalignCommand) {
public PIRSRRunner(File pirsrDataDirectory, File inputFactFile, InputType inputType, File outputDirectory,
File hmmalignCommand) {
this.pirsrDataDirectory = pirsrDataDirectory;
this.inputFactFile = inputFactFile;
this.inputType = inputType;
this.outputDirectory = outputDirectory;
this.hmmalignCommand = hmmalignCommand;
logArguments();
}
public void run() throws IOException, JAXBException {
public void run() throws IOException {
Set<PIRSR> pirsrInfo = getPIRSRInfo(pirsrDataDirectory);
Map<String, Set<PIRSR>> pirsrTriggerMap = getTriggerMap(pirsrInfo);
logger.info("Collecting triggered proteins from InterProScan XML file...");
......@@ -71,29 +50,48 @@ public class PIRSRRunner {
logger.info("Running hmmalign of triggered proteins against SRHMM...");
runHMMAlign(triggeredPIRSR);
logger.info("Done running hmmalign of matched proteins against SRHMM.");
Map<Protein, Set<PositionalProteinSignature>> matchedProteinFacts = getMatchedProteinFacts(triggeredProteins);
List<PositionalProteinSignature> matchedProteinFacts = getMatchedProteinFacts(triggeredProteins);
logger.info("Adding SRHMM signatures to InterProScan XML file...");
logger.info("Adding SRHMM signatures to InterProScan input file...");
addPositionalProteinSignature(matchedProteinFacts);
logger.info("Done adding SRHMM signatures to InterProScan XML file.");
logger.info("Done adding SRHMM signatures to InterProScan input file.");
logger.info("The enhanced InterProScan XML file is at \"" + this.outputDirectory + "/"
+ this.inputFactFile.getName().replaceAll("(?i).xml$", "-urml.xml") + "\"");
logger.info(String.format("The enhanced InterProScan XML file is at \"%s/%s\"", this.outputDirectory,
this.inputFactFile.getName().replaceAll("(?i).xml$", "-urml.xml")));
}
private void addPositionalProteinSignature(Map<Protein, Set<PositionalProteinSignature>> matchedProteinFacts) {
private void addPositionalProteinSignature(List<PositionalProteinSignature> matchedProteinFacts) {
String outFile = this.inputFactFile.getName().replaceAll("(?i).xml$", "-urml.xml");
try {
InputStream factInputStream = new FileInputStream(this.inputFactFile);
OutputStream outputStream = new FileOutputStream(this.outputDirectory + "/" + outFile);
try (InputStream factInputStream = new FileInputStream(this.inputFactFile);
OutputStream outputStream = new FileOutputStream(this.outputDirectory + "/" + outFile);
URMLFactWriter factWriter = new URMLFactWriter(outputStream)) {
URMLFactWriter factWriter = new URMLFactWriter(outputStream);
Iterator<FactSet> factSetIterator = FactSetParser.of(inputType).parse(factInputStream);
FactMerger factMerger = new FactMerger();
FactSet updatedFactSet = new FactSet();
List<Fact> allFacts = new ArrayList<Fact>();
Iterator<FactSet> factSetIterator = FactSetParser.of(InputType.INTERPROSCAN_XML).parse(factInputStream);
logger.info("Merging facts...");
updatedFactSet.setFact(factMerger.merge(factSetIterator, matchedProteinFacts));
logger.info("Done merging facts. In total we have {} facts.", updatedFactSet.getFact().size());
logger.info("Writing facts...");
factWriter.write(updatedFactSet);
logger.info("Done writing facts.");
}
catch (IOException | JAXBException | XMLStreamException e) {
logger.error(e.getMessage());
}
}
private Map<Protein, Set<PIRSR>> getTriggerProteins(Map<String, Set<PIRSR>> pirsrTriggerMap)
throws IOException {
Map<Protein, Set<PIRSR>> triggeredProteins = new HashMap<>();
try(InputStream factInputStream = new FileInputStream(this.inputFactFile)) {
Iterator<FactSet> factSetIterator = FactSetParser.of(inputType).parse(factInputStream);
while (factSetIterator.hasNext()) {
FactSet factSet = factSetIterator.next();
List<Fact> facts = new ArrayList<>(factSet.getFact());
Protein protein = null;
......@@ -101,53 +99,18 @@ public class PIRSRRunner {
if (fact instanceof Protein) {
protein = (Protein) fact;
}
}
for (PositionalProteinSignature positionalProteinSignature : matchedProteinFacts.getOrDefault(protein, Collections.emptySet())) {
facts.add(positionalProteinSignature);
}
allFacts.addAll(facts);
}
updatedFactSet.setFact(allFacts);
factWriter.write(updatedFactSet);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (JAXBException e) {
e.printStackTrace();
} catch (XMLStreamException e) {
e.printStackTrace();
}
}
private Map<Protein, Set<PIRSR>> getTriggerProteins(Map<String, Set<PIRSR>> pirsrTriggerMap) throws FileNotFoundException, IOException {
Map<Protein, Set<PIRSR>> triggeredProteins = new HashMap<Protein, Set<PIRSR>>();
InputStream factInputStream = new FileInputStream(this.inputFactFile);
Iterator<FactSet> factSetIterator = FactSetParser.of(InputType.INTERPROSCAN_XML).parse(factInputStream);
while (factSetIterator.hasNext()) {
FactSet factSet = factSetIterator.next();
List<Fact> facts = new ArrayList<>(factSet.getFact());
Protein protein = null;
for (Fact fact : facts) {
if (fact instanceof Protein) {
protein = (Protein) fact;
}
if (fact instanceof ProteinSignature) {
ProteinSignature proteinSignature = (ProteinSignature) fact;
String signature = proteinSignature.getSignature().getValue();
Set<PIRSR> pirsrList = pirsrTriggerMap.get(signature);
if (pirsrList != null) {
Set<PIRSR> triggeredPIRSRList = triggeredProteins.get(protein);
if (triggeredPIRSRList == null) {
triggeredPIRSRList = new HashSet<PIRSR>();