Commit 2b25f4b4 authored by Dmitry Smirnov's avatar Dmitry Smirnov

merged sampler, removed platform part from frankenstein

parent 639cdaa1
......@@ -12,3 +12,4 @@ gradle-app.setting
# # Work around https://youtrack.jetbrains.com/issue/IDEA-116898
# gradle/wrapper/gradle-wrapper.properties
/bin/
# This file is a template, and might need editing before it works on your project.
# This is the Gradle build system for JVM applications
# https://gradle.org/
# https://github.com/gradle/gradle
image: gradle:5.2.1-jdk8
# Disable the Gradle daemon for Continuous Integration servers as correctness
# is usually a priority over speed in CI environments. Using a fresh
# runtime for each build is more reliable since the runtime is completely
# isolated from any previous builds.
variables:
GRADLE_OPTS: "-Dorg.gradle.daemon=false -Dorg.gradle.jvmargs=-Xmx3g -XX:MaxPermSize=2048m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8"
before_script:
- export GRADLE_USER_HOME=`pwd`/.gradle
build:
stage: build
script: gradle --build-cache assemble
cache:
key: "$CI_COMMIT_REF_NAME"
policy: push
paths:
- ./build
- ./.gradle
test:
stage: test
script:
- gradle test --info
cache:
key: "$CI_COMMIT_REF_NAME"
policy: pull
paths:
- ./build
- ./.gradle
plugins {
id "io.github.ddimtirov.codacy" version "0.1.0"
}
apply plugin: 'java'
apply plugin: 'maven-publish'
apply plugin: 'maven'
apply plugin: 'eclipse'
apply plugin: 'idea'
apply plugin: "jacoco"
wrapper{
gradleVersion = '5.2.1'
distributionType = Wrapper.DistributionType.BIN
}
group = 'uk.ac.ebi.ena.frankenstein'
ext.version_base = '1.2.0'
version = version_base
sourceCompatibility = '1.8'
targetCompatibility = '1.8'
jar {
into( 'native' ) {
from fileTree( dir: 'native', includes: [ '**/*.so' ] )
}
manifest {
attributes 'Implementation-Title': 'frankenstein',
'Implementation-Version': version_base,
'Built-By': System.getProperty( 'user.name' ),
'Built-Date': new Date(),
'Main-Class': 'uk.ac.ebi.ena.frankenstein.loader.fastq.Loader'
}
}
configurations.all {
if( !hasProperty( 'artifactoryUsername' ) )
project.ext.properties.artifactoryUsername = ''
else
project.ext.properties.artifactoryUsername = artifactoryUsername;
if( !hasProperty( 'artifactoryPassword' ) )
project.ext.properties.artifactoryPassword = ''
else
project.ext.properties.artifactoryPassword = artifactoryPassword;
if( !hasProperty( 'gitlab_private_token' ) )
project.ext.properties.gitlab_private_token = ''
else
project.ext.properties.gitlab_private_token = gitlab_private_token;
}
test {
jvmArgs "-Dfile.encoding=UTF-8"
maxHeapSize = '5G'
}
repositories {
mavenLocal()
maven { url "https://gitlab.ebi.ac.uk/api/v4/groups/enasequence/-/packages/maven" }
mavenCentral()
maven { url "http://maven.imagej.net/content/repositories/public/" }
}
dependencies {
compile( group: 'net.java.dev.jna', name: 'jna', version: '5.2.0' )
compile( group:'cisd', name: 'jhdf5', version: '14.12.6' )
compile( group: 'org.apache.commons', name: 'commons-compress', version: '1.18' )
compile group: 'com.beust', name: 'jcommander', version: '1.72'
compile( group: 'uk.ac.ebi.ena', name: 'ena-cram-processing-tools', version: '1.1.0' )
compile( group: 'com.github.samtools', name: 'htsjdk', version: '2.15.0' )
compile 'log4j:log4j:1.2.17'
testCompile "junit:junit:4.11"
// implementation("org.projectlombok:lombok:1.18.4")
// testImplementation('org.assertj:assertj-core:3.11.1')
// testImplementation('org.mockito:mockito-core:2.23.4')
}
tasks.withType(JavaCompile) {
options.encoding = 'UTF-8'
}
task sourceJar(type: Jar) {
from sourceSets.main.allJava
archiveClassifier = 'sources'
}
publishing {
publications {
mavenJava(MavenPublication) {
from components.java
artifact sourceJar
}
}
repositories {
maven {
// Project specific maven repository in Gitlab.
url "https://gitlab.ebi.ac.uk/api/v4/projects/<811>/packages/maven"
// Developer token in Gitlab.
credentials(HttpHeaderCredentials) {
name = "Private-Token"
value = project.ext.properties.gitlab_private_token
}
authentication {
header( HttpHeaderAuthentication )
}
}
}
}
task writePom {
doLast {
pom {
project {
inceptionYear '2019'
licenses {
license {
name 'The Apache Software License, Version 2.0'
url 'http://www.apache.org/licenses/LICENSE-2.0.txt'
distribution 'repo'
}
}
}
}.writeTo( "pom.xml" )
}
}
compileJava.dependsOn( writePom )
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-5.2.1-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
#!/usr/bin/env sh
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m"'
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn () {
echo "$*"
}
die () {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin, switch paths to Windows format before running java
if $cygwin ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=$((i+1))
done
case $i in
(0) set -- ;;
(1) set -- "$args0" ;;
(2) set -- "$args0" "$args1" ;;
(3) set -- "$args0" "$args1" "$args2" ;;
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi
# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=$(save "$@")
# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
cd "$(dirname "$0")"
fi
exec "$JAVACMD" "$@"
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m"
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto init
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:init
@rem Get command-line arguments, handling Windows variants
if not "%OS%" == "Windows_NT" goto win9xME_args
:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2
:win9xME_args_slurp
if "x%~1" == "x" goto execute
set CMD_LINE_ARGS=%*
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega
# Root logger option
log4j.rootLogger=INFO, stdout
# Direct log messages to stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
\ No newline at end of file
package uk.ac.ebi.ena.frankenstein.loader.bam;
import java.io.File;
import java.util.List;
import htsjdk.samtools.SAMRecord;
import uk.ac.ebi.ena.frankenstein.loader.common.eater.AbstractPagedDataEater;
import uk.ac.ebi.ena.frankenstein.loader.common.eater.DataEater;
import uk.ac.ebi.ena.frankenstein.loader.common.eater.DataEaterException;
import uk.ac.ebi.ena.frankenstein.loader.fastq.IlluminaSpot;
public class
BamEater extends AbstractPagedDataEater<BamSpot, IlluminaSpot>
implements DataEater<BamSpot, IlluminaSpot>
{
private static final Object BAM_STAR = "*";
private boolean paired_fetched = false;
private boolean allow_paired = false;
private boolean allow_empty = false;
private int errors = 0;
private boolean verbose = true;
public
BamEater( File tmp_root, boolean allow_paired, boolean allow_empty, int spill_page_size )
{
super( tmp_root, spill_page_size );
this.allow_paired = allow_paired;
this.allow_empty = allow_empty;
}
@Override
public void
eat( BamSpot object ) throws DataEaterException
{
//do not load supplementary reads
if( object.spot.getSupplementaryAlignmentFlag() )
return;
if( object.spot.getSecondaryAlignmentFlag() )
return;
if( object.spot.getDuplicateFlag() )
return;
//do not process empty reads
if( allow_empty
&& ( ( 0 == object.spot.getReadString().length() && 0 == object.spot.getBaseQualityString().length() )
|| ( 1 == object.spot.getReadString().length() && 1 == object.spot.getBaseQualityString().length()
&& object.spot.getReadString().equals( BAM_STAR ) && object.spot.getReadString().equals( BAM_STAR ) ) ) )
return;
// allows only either paired or non-paired records
if( !allow_paired && object.spot.getReadPairedFlag() )
throw new UnsupportedOperationException(); //TODO: MetaDataException()3
if( allow_paired )
paired_fetched |= object.spot.getReadPairedFlag();
super.eat( object );
}
@Override
public Object
getKey( BamSpot object )
{
return object.spot.getReadName();
}
@Override
public IlluminaSpot
assemble( final Object key, List<BamSpot> list )
{
IlluminaSpot s = null;
if( !allow_paired )
{
s = IlluminaSpot.initSingle();
BamSpot record = list.get( 0 );
s.bases = record.spot.getReadString();
s.quals = record.spot.getBaseQualityString();
s.name = record.spot.getReadName();
s.read_length[ IlluminaSpot.FORWARD ] = s.bases.length();
s.read_start[ IlluminaSpot.FORWARD ] = 0;
if( record.spot.getReadNegativeStrandFlag() )
{
s.bases = new StringBuilder( new String( complement( s.bases.getBytes() ) ) ).reverse().toString();
s.quals = new StringBuilder( s.quals ).reverse().toString();
}
} else
{
// normalize
if( list.size() == 1 )
list.add( null );
if( ( list.get( 0 ).spot.getReadPairedFlag() && !list.get( 0 ).spot.getFirstOfPairFlag() )
|| ( null != list.get( 1 ) && list.get( 1 ).spot.getFirstOfPairFlag() ) )
{
//swap
BamSpot first = list.get( 0 );
list.set( 0, list.get( 1 ) );
list.set( 1, first );
}
s = IlluminaSpot.initPaired();
s.read_start[ IlluminaSpot.FORWARD ] = 0;
s.read_start[ IlluminaSpot.REVERSE ] = 0;
s.read_length[ IlluminaSpot.FORWARD ] = 0;
s.read_length[ IlluminaSpot.REVERSE ] = 0;
StringBuilder bases = new StringBuilder();
StringBuilder quals = new StringBuilder();
int i = -1;
for( BamSpot spot : list )
{
++i;
if( null == spot )
continue;
//printSpot( spot, i + 1 );
bases.append( spot.spot.getReadNegativeStrandFlag() ? new StringBuilder( new String( complement( spot.spot.getReadString().getBytes() ) ) ).reverse().toString() : spot.spot.getReadString() );
quals.append( spot.spot.getReadNegativeStrandFlag() ? new StringBuilder( spot.spot.getBaseQualityString() ).reverse().toString() : spot.spot.getBaseQualityString() );
s.name = (String)getKey( spot );
s.read_length[ i ] = spot.spot.getReadString().length();
}
s.bases = bases.toString();
s.quals = quals.toString();
s.read_start[ IlluminaSpot.REVERSE ] = s.read_length[ IlluminaSpot.FORWARD ];
}
if( null != s.name )
s.name = s.name.replaceAll( " ", "_" );
return s;
}
private static final byte a='a', c='c', g='g', t='t', A='A', C='C', G='G', T='T';