Commit be5b01d0 authored by reddyk's avatar reddyk
Browse files

ENA-2421

parent 4cd26701
allprojects
{
ext.version_base = '1.1.221'
ext.version_base = '1.1.225'
version = version_base
tasks.withType(Javadoc).all { enabled = false }
}
......
......@@ -56,7 +56,7 @@ public class AGPValidationCheck extends EntryValidationCheck
result = new ValidationResult();
if (entry == null)
if (entry == null||entry.getSequence()==null)
{
return result;
}
......
......@@ -41,7 +41,7 @@ public class AnnotationOnlySequenceCheck extends EntryValidationCheck
return result;
}
if(entry.getSequence().getContigs().size()!=0||entry.getSequence().getAgpRows().size()!=0)//CO line exists
if(entry.getSequence()!=null&&(entry.getSequence().getContigs().size()!=0||entry.getSequence().getAgpRows().size()!=0))//CO line exists
{
return result;
}
......
......@@ -37,7 +37,7 @@ public class AgpComponentAccessionFix extends EntryValidationCheck
{
result = new ValidationResult();
if (entry == null||getEntryDAOUtils()==null||entry.getSequence().getAgpRows().size()==0||!FileType.AGP.equals(getEmblEntryValidationPlanProperty().fileType.get()))
if (entry == null||getEntryDAOUtils()==null||(entry.getSequence()!=null&&entry.getSequence().getAgpRows().size()==0)||!FileType.AGP.equals(getEmblEntryValidationPlanProperty().fileType.get()))
{
return result;
}
......
......@@ -63,7 +63,7 @@ public class AgptoConFix extends EntryValidationCheck
{
result = new ValidationResult();
if (entry == null||getEntryDAOUtils()==null||entry.getSequence().getAgpRows().size()==0||!FileType.AGP.equals(getEmblEntryValidationPlanProperty().fileType.get()))
if (entry == null||getEntryDAOUtils()==null||(entry.getSequence()!=null&&entry.getSequence().getAgpRows().size()==0)||!FileType.AGP.equals(getEmblEntryValidationPlanProperty().fileType.get()))
{
return result;
}
......
......@@ -56,7 +56,7 @@ public class AnnotationOnlySequenceFix extends EntryValidationCheck
return result;
}
if(entry.getSequence()!=null&&entry.getSequence().getContigs()!=null&&entry.getSequence().getContigs().size()!=0||entry.getSequence().getAgpRows().size()!=0)//CO line exists
if(entry.getSequence()!=null&&((entry.getSequence().getContigs()!=null&&entry.getSequence().getContigs().size()!=0)||entry.getSequence().getAgpRows().size()!=0))//CO line exists
{
return result;
}
......
......@@ -63,6 +63,7 @@ SequenceMinCountCheck=Invalid number of sequences : {0}, Minimum number of seque
SequenceMaxCountCheck=Invalid number of sequences : {0}, Maximum number of sequences for {1} is: {2}
FileFormatCheck=Invalid File Format
ChromosomeEntryNameMissingCheck = Entry name missing, unable to retrieve chromosome list information.
EmptyFileCheck = File must not be empty.
#AGP VALIDATION MESSAGES
......
......@@ -9,7 +9,6 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import uk.ac.ebi.embl.api.entry.genomeassembly.ChromosomeEntry;
import uk.ac.ebi.embl.api.validation.ValidationResult;
import uk.ac.ebi.embl.flatfile.validation.FlatFileOrigin;
......@@ -18,8 +17,9 @@ public class ChromosomeListFileReader extends GCSEntryReader
{
private final String MESSAGE_KEY_INVALID_NO_OF_FIELDS_ERRORS = "InvalidNoOfFields";
private final String MESSAGE_KEY_DUPLICATE_CHROMOSOME_NAME_ERROR = "ChromosomeListChromosomeNameDuplicationCheck";
private static final String INVALID_FILE_FORMAT_ERROR = "FileFormatCheck";
private static final String EMPTY_FILE_ERROR = "EmptyFileCheck";
Pattern pattern = Pattern.compile("\\s+");
private final static int MIN_NUMBER_OF_COLUMNS = 3;
......@@ -35,10 +35,20 @@ public class ChromosomeListFileReader extends GCSEntryReader
{
this.file=file;
}
@Override
public ValidationResult read() throws FileNotFoundException, IOException
{
if(file!=null&&file.length()==0)
{
error(1, EMPTY_FILE_ERROR);
return validationResult;
}
if(!validateFileFormat(file))
return validationResult;
int lineNumber = 1;
try(BufferedReader reader = getBufferedReader(file))
......@@ -47,11 +57,11 @@ public class ChromosomeListFileReader extends GCSEntryReader
while ((line = reader.readLine()) != null)
{
line = line.trim();
if (line.isEmpty()) // Skip empty lines
if (line.isEmpty()) // Skip empty lines
{
continue;
}
String[] fields = pattern.split(line);
int numberOfColumns = fields.length;
if (numberOfColumns < MIN_NUMBER_OF_COLUMNS || numberOfColumns > MAX_NUMBER_OF_COLUMNS)
......@@ -66,7 +76,7 @@ public class ChromosomeListFileReader extends GCSEntryReader
chromosomeEntry.setChromosomeType(fields[CHROMOSOME_TYPE_COLUMN]);
if (numberOfColumns == MAX_NUMBER_OF_COLUMNS)
{
chromosomeEntry.setChromosomeLocation(fields[CHROMOSOME_LOCATION_COLUMN]);
chromosomeEntry.setChromosomeLocation(fields[CHROMOSOME_LOCATION_COLUMN].toLowerCase());
}
chromosomeEntry.setOrigin(new FlatFileOrigin(lineNumber));
if(!chromosomeNames.add(chromosomeEntry.getChromosomeName()))
......@@ -103,4 +113,32 @@ public class ChromosomeListFileReader extends GCSEntryReader
return validationResult.isValid();
}
public boolean validateFileFormat(File file) throws IOException
{
int emptylines =0;
String line=null;
try(BufferedReader fileReader=getBufferedReader(file))
{
while(line==null||line.isEmpty())
{
line=fileReader.readLine();
emptylines++;
if(emptylines>30)
{
error(1, INVALID_FILE_FORMAT_ERROR);
return false;
}
}
if(line.split("\\s+").length<MIN_NUMBER_OF_COLUMNS||line.split("\\s+").length>MAX_NUMBER_OF_COLUMNS)
{
error(1, INVALID_FILE_FORMAT_ERROR);
}
}
return true;
}
}
......@@ -29,7 +29,7 @@ public class AGPFileReaderTest extends AGPReaderTest{
"IWGSC_CSS_6DL_scaff_3330716 1 330 1 W IWGSC_CSS_6DL_contig_209591; 1 330 +\n"+
"IWGSC_CSS_6DL_scaff_3330716 331 354 2 N 24 scaffold yes paired-ends\n"+
"IWGSC_CSS_6DL_scaff_3330716 355 654 3 W IWGSC_CSS_6DL_contig_209592 1 300 +\n";
String expectedEntryString="ID XXX; SV XXX; linear; XXX; XXX; XXX; 0 BP.\n"+
String expectedEntryString="ID XXX; SV XXX; linear; XXX; XXX; XXX; 654 BP.\n"+
"XX\n"+
"AC ;\n"+
"XX\n"+
......@@ -157,7 +157,7 @@ public class AGPFileReaderTest extends AGPReaderTest{
"IWGSC_CSS_6DL_scaff_3330716 1 330 1 W IWGSC_CSS_6DL_contig_209591; 1 330 +\n"+
"IWGSC_CSS_6DL_scaff_3330716 331 354 2 N 24 scaffold no\n"+
"IWGSC_CSS_6DL_scaff_3330716 355 654 3 W IWGSC_CSS_6DL_contig_209592 1 300 +\n";
String expectedEntryString ="ID XXX; SV XXX; linear; XXX; XXX; XXX; 0 BP.\n"+
String expectedEntryString ="ID XXX; SV XXX; linear; XXX; XXX; XXX; 654 BP.\n"+
"XX\n"+
"AC ;\n"+
"XX\n"+
......
package uk.ac.ebi.embl.flatfile.reader.genomeassembly;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import org.junit.Test;
import uk.ac.ebi.embl.api.validation.ValidationResult;
public class ChromosomeListFileReaderTest
{
@Test
public void testRead_validFile() throws IOException
{
String fileName=null;
URL url = ChromosomeListFileReaderTest.class.getClassLoader().getResource( "valid_chromosome_list.txt");
if (url != null)
fileName = url.getPath().replaceAll("%20", " ");
ChromosomeListFileReader reader = new ChromosomeListFileReader(new File(fileName));
ValidationResult parseResult=reader.read();
assertTrue(parseResult.isValid());
assertEquals(2,reader.getentries().size());
}
@Test
public void testRead_EmptyFile() throws IOException
{
String fileName=null;
URL url = ChromosomeListFileReaderTest.class.getClassLoader().getResource( "empty_chromosome_list.txt");
if (url != null)
fileName = url.getPath().replaceAll("%20", " ");
ChromosomeListFileReader reader = new ChromosomeListFileReader(new File(fileName));
ValidationResult parseResult=reader.read();
assertTrue(!parseResult.isValid());
assertEquals(1,parseResult.getMessages("EmptyFileCheck").size());
}
@Test
public void testRead_invalidFileFormat() throws IOException
{
String fileName=null;
URL url = ChromosomeListFileReaderTest.class.getClassLoader().getResource( "invalid_fileformat_chromosome_list.txt");
if (url != null)
fileName = url.getPath().replaceAll("%20", " ");
ChromosomeListFileReader reader = new ChromosomeListFileReader(new File(fileName));
ValidationResult parseResult=reader.read();
assertTrue(!parseResult.isValid());
assertEquals(1,parseResult.getMessages("FileFormatCheck").size());
}
@Test
public void testRead_duplilcateChromosomeNames() throws IOException
{
String fileName=null;
URL url = ChromosomeListFileReaderTest.class.getClassLoader().getResource( "duplicate_name_chromosome_list.txt");
if (url != null)
fileName = url.getPath().replaceAll("%20", " ");
ChromosomeListFileReader reader = new ChromosomeListFileReader(new File(fileName));
ValidationResult parseResult=reader.read();
assertTrue(!parseResult.isValid());
assertEquals(1,parseResult.getMessages("ChromosomeListChromosomeNameDuplicationCheck").size());
}
@Test
public void testRead_invalidnoofColumns() throws IOException
{
String fileName=null;
URL url = ChromosomeListFileReaderTest.class.getClassLoader().getResource( "invalid_no_of_columns_chromosome_list.txt");
if (url != null)
fileName = url.getPath().replaceAll("%20", " ");
ChromosomeListFileReader reader = new ChromosomeListFileReader(new File(fileName));
ValidationResult parseResult=reader.read();
assertTrue(!parseResult.isValid());
assertEquals(1,parseResult.getMessages("InvalidNoOfFields").size());
}
}
IWGSC_CSS_6DL_scaff_3330716 I chromosome mitochondrion
IWGSC_CSS_6DL_scaff_3330717 II multipartite
IWGSC_CSS_6DL_scaff_3330717 II multipartite
IWGSC_CSS_6DL_scaff_3330716 mitochondrion
IWGSC_CSS_6DL_scaff_3330717 II multipartite
IWGSC_CSS_6DL_scaff_3330716 I chromosome mitochondrion
IWGSC_CSS_6DL_scaff_3330717 II multipartite
IWGSC_CSS_6DL_scaff_333071
IWGSC_CSS_6DL_scaff_3330716 I chromosome mitochondrion
IWGSC_CSS_6DL_scaff_3330717 II multipartite
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment