diff --git a/data/api.json b/data/api.json new file mode 100644 index 0000000000000000000000000000000000000000..b174dccbe7c841648827b3157064fb502e8b337b --- /dev/null +++ b/data/api.json @@ -0,0 +1,55 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "IGS4EU trigger pgsc_calc", + "type": "object", + "properties": { + "pipeline_param": { + "type": "object", + "properties": { + "nxf_params_file": { + "$ref": "nxf_params.json" + }, + "target_genomes": { + "$ref": "samplesheet.json" + }, + "nxf_work": { + "type": "string", + "description": "A path to the nextflow working directory in the ReadWriteMany Persistent Volume Claim shared by the driver and worker pods. Must be a unique path in the PVC not shared by other workflow instances, or weird things will happen." + }, + "id": { + "type": "string", + "description": "An identifier assigned to the launched workflow instance. It should be universally unique to monitor the status of the launched workflow." + } + }, + "required": [ + "target_genomes", + "nxf_params_file", + "nxf_work", + "id" + ] + }, + "globus_details": { + "type": "object", + "properties": { + "dir_path_on_guest_collection": { + "type": "string", + "description": "A globus endpoint ID. Files are transferred from this destination to local storage." + }, + "guest_collection_id": { + "type": "string", + "description": "A globus collection ID", + "format": "uuid" + } + }, + "required": [ + "dir_path_on_guest_collection", + "guest_collection_id" + ] + } + }, + "required": [ + "pipeline_param", + "globus_details" + ] +} + diff --git a/data/nxf_params.json b/data/nxf_params.json new file mode 100644 index 0000000000000000000000000000000000000000..d69259ff371e2d5a0bdce9988f5a8e91929fd21d --- /dev/null +++ b/data/nxf_params.json @@ -0,0 +1,78 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/pgscatalog/pgsc_calc/dev/assets/schemas/nxf_params.json", + "title": "Nextflow JSON parameters for pgsc_calc", + "type": "object", + "properties": { + "scorefile": { + "type": "string", + "description": "Path to a scoring file" + }, + "pgs_id": { + "type": "string", + "description": "PGS Catalog score ID(s), if a list separate with commas", + "pattern": "(PGS[0-9]{6}[,]?)+" + }, + "pgp_id": { + "type": "string", + "description": "PGS Catalog publication ID(s), if a list separate with commas", + "pattern": "(PGP[0-9]{6}[,]?)+" + }, + "trait_efo": { + "type": "string", + "description": "PGS Catalog trait ID(s), if a list separate with commas" + }, + "min_overlap": { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1, + "description": "Minimum proportion of variants in target genomic data matched in the provided scorefile" + }, + "max_memory": { + "type": "string", + "description": "The maximum memory used by a workflow process (worker pod limit). String uses nextflow format e.g. '4.GB'" + }, + "max_cpus": { + "type": "integer", + "description": "The maximum number of CPUs used by workflow process (worker pod limit).", + "minimum": 1 + }, + "outdir": { + "type": "string", + "description": "Path to an output directory" + }, + "target_build": { + "type": "string", + "enum": [ + "GRCh37", + "GRCh38" + ], + "description": "Genome build of target (i.e. input) genomes" + } + }, + "required": [ + "target_build" + ], + "oneOf": [ + { + "required": [ + "scorefile" + ] + }, + { + "required": [ + "pgs_id" + ] + }, + { + "required": [ + "pgp_id" + ] + }, + { + "required": [ + "trait_efo" + ] + } + ] +} \ No newline at end of file diff --git a/data/samplesheet.json b/data/samplesheet.json new file mode 100644 index 0000000000000000000000000000000000000000..0ab3500890bd971733d76411de9a2d342f745f23 --- /dev/null +++ b/data/samplesheet.json @@ -0,0 +1,129 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/pgscatalog/pgsc_calc/dev/assets/schemas/samplesheet.json", + "title": "Target genome schema", + "description": "Validates the JSON representation of a samplesheet", + "type": "array", + "uniqueItems": true, + "minItems": 1, + "items": { + "type": "object", + "properties": { + "sampleset": { + "type": "string", + "pattern": "^\\S+$", + "description": "Sampleset name must be provided and cannot contain spaces" + }, + "vcf_path": { + "type": [ + "string", + "null" + ], + "pattern": "^\\S+\\.vcf\\.gz$", + "description": "VCF path must end with .vcf.gz, mutually exclusive with other formats", + "type": [ + "null", + "string" + ], + "pattern": "^\\S+\\.vcf\\.gz$", + "minLength": 5 + }, + "vcf_genotype_field": { + "description": "Specify whether to import genotypes (default: GT), or imputed dosages (DS) from the VCF file.", + "type": [ + "string", + "null" + ], + "pattern": "/^(GT|DS)$/" + }, + "bed": { + "description": "Plink 1 binary genotype file", + "type": [ + "string", + "null" + ], + "pattern": "^.*bed$", + "minLength": 5 + }, + "bim": { + "description": "Plink 1 variant information file", + "type": [ + "string", + "null" + ], + "pattern": "^.*bim$", + "minLength": 5 + }, + "fam": { + "description": "Plink 1 sample information file", + "type": [ + "string", + "null" + ], + "pattern": "^.*fam$", + "minLength": 5 + }, + "pgen": { + "description": "Plink 2 binary genotype file", + "type": [ + "string", + "null" + ], + "pattern": "^.*pgen$", + "minLength": 6 + }, + "psam": { + "description": "Plink 2 sample information file", + "type": [ + "string", + "null" + ], + "pattern": "^.*psam$", + "minLength": 6 + }, + "pvar": { + "description": "Plink 2 variant information file", + "type": [ + "string", + "null" + ], + "pattern": "^.*pvar$", + "minLength": 6 + }, + "chrom": { + "description": "Specify the chromosome of associated genotyping data (must be in {1-22, X, XY, Y}). If all chromosomes are in the associated file (e.g. your data is not split by chromosome), set to null.", + "type": [ + "null", + "string" + ], + "minLength": 1 + } + }, + "required": [ + "sampleset", + "chrom" + ], + "dependentRequired": { + "pgen": [ + "pvar", + "psam" + ], + "pvar": [ + "pgen", + "psam" + ], + "psam": [ + "pvar", + "pgen" + ], + "bed": [ + "bim", + "fam" + ], + "bim": [ + "fam", + "bed" + ] + } + } +} \ No newline at end of file