Skip to content
Snippets Groups Projects
Commit 623ec6af authored by Benjamin Wingfield's avatar Benjamin Wingfield
Browse files

set up pydantic models

parent 20ba0e38
No related branches found
No related tags found
No related merge requests found
......@@ -7,7 +7,3 @@ repos:
- id: ruff
# Run the formatter.
- id: ruff-format
- repo: https://github.com/RobertCraigie/pyright-python
rev: v1.1.355
hooks:
- id: pyright
......@@ -23,6 +23,9 @@ pyright = "^1.1.355"
[tool.poetry.group.standard.dependencies]
uvicorn = "^0.29.0"
[tool.pyright]
venv = ".venv"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
from fastapi import FastAPI
from .models import PGSJob
app = FastAPI()
@app.post("/launch")
async def launch(job: PGSJob):
return job
import enum
import pathlib
from typing import Optional, Self
from pgscatalog.core import GenomeBuild
from pydantic import BaseModel, UUID4, model_validator, field_validator
class GlobusFile(BaseModel):
"""Globus files have a name and size. Size is used to restart interrupted transfers."""
filename: str
size: int
class GlobusConfig(BaseModel):
"""Details required to stage files from Globus for working on"""
guest_collection_id: UUID4
dir_path_on_guest_collection: str
files: list[GlobusFile]
class TargetFormat(enum.Enum):
"""Genotypes can be in plink1 (bfile) or plink2 (pfile) format currently
plink2 is preferred"""
PFILE = "pfile"
BFILE = "bfile"
class TargetGenome(BaseModel):
"""A target genome contains one or more genotypes and associated metadata
Genomes may optionally be split by chromosome to speed up calculation on larger datasets"""
sampleset: str
chrom: str | None
vcf_import_dosage: bool = False
geno: pathlib.Path
pheno: pathlib.Path
variants: pathlib.Path
format: TargetFormat
class SamplesheetFormat(enum.Enum):
"""Nextflow samplesheet format. The API only accepts json, currently.
By default, the nextflow workflow accepts and uses csv.
"""
JSON = "json"
class PGSParams(BaseModel):
"""Runtime parameters for the PGS calculation workflow"""
pgs_id: Optional[str] = None
pgp_id: Optional[str] = None
trait_efo: Optional[str] = None
target_build: GenomeBuild
format: SamplesheetFormat = SamplesheetFormat.JSON
@model_validator(mode="after")
def check_pgs(self) -> Self:
if all(getattr(self, x) is None for x in ("pgs_id", "pgp_id", "trait_efo")):
raise ValueError("Missing all pgs_id, pgp_id, or trait_efo")
return self
class PGSJobParams(BaseModel):
id: str
target_genomes: list[TargetGenome]
nxf_params_file: PGSParams
nxf_work: str
@field_validator("id")
@classmethod
def validate_id(cls, x: str) -> str:
if not x.startswith("INTP"):
raise ValueError(f"id must start with INTP, got {x}")
return x
class PGSJob(BaseModel):
globus_details: GlobusConfig
pipeline_param: PGSJobParams
{
"pipeline_param": {
"id": "INTP00000000322",
"target_genomes": [{
"variants": "hapnest.pvar",
"geno": "hapnest.pgen",
"pheno": "hapnest.psam",
"sampleset": "testtest",
"format": "pfile",
"chrom": null
}],
"nxf_params_file": {
"pgs_id": "PGS001229",
"format": "json",
"target_build": "GRCh38"
},
"nxf_work": "/workspace/work/"
},
"globus_details": {
"guest_collection_id": "c1e6310c-11d5-4e8a-9443-211884f04c6f",
"dir_path_on_guest_collection": "bwingfield@ebi.ac.uk/INTP00000000322",
"files": [{
"filename": "hapnest.pvar",
"size": 7
},
{
"filename": "hapnest.pgen",
"size": 8
},
{
"filename": "hapnest.psam",
"size": 9
}
]
}
}
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment