set up pydantic models

623ec6af · Benjamin Wingfield · 20ba0e38 · 623ec6af · 623ec6af · 623ec6af
Commit 623ec6af authored 11 months ago by Benjamin Wingfield
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,7 +7,3 @@ repos:
    - id: ruff
    # Run the formatter.
    - id: ruff-format
- repo: https://github.com/RobertCraigie/pyright-python
-  rev: v1.1.355
-  hooks:
-    - id: pyright
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,9 @@ pyright = "^1.1.355"
 [tool.poetry.group.standard.dependencies]
 uvicorn = "^0.29.0"

+[tool.pyright]
+venv = ".venv"
+
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
--- a/src/hattivatti/main.py
+++ b/src/hattivatti/main.py
+from fastapi import FastAPI
+
+from .models import PGSJob
+
+app = FastAPI()
+
+
+@app.post("/launch")
+async def launch(job: PGSJob):
+    return job
--- a/src/hattivatti/models.py
+++ b/src/hattivatti/models.py
+import enum
+import pathlib
+from typing import Optional, Self
+
+from pgscatalog.core import GenomeBuild
+
+from pydantic import BaseModel, UUID4, model_validator, field_validator
+
+
+class GlobusFile(BaseModel):
+    """Globus files have a name and size. Size is used to restart interrupted transfers."""
+
+    filename: str
+    size: int
+
+
+class GlobusConfig(BaseModel):
+    """Details required to stage files from Globus for working on"""
+
+    guest_collection_id: UUID4
+    dir_path_on_guest_collection: str
+    files: list[GlobusFile]
+
+
+class TargetFormat(enum.Enum):
+    """Genotypes can be in plink1 (bfile) or plink2 (pfile) format currently
+
+    plink2 is preferred"""
+
+    PFILE = "pfile"
+    BFILE = "bfile"
+
+
+class TargetGenome(BaseModel):
+    """A target genome contains one or more genotypes and associated metadata
+
+    Genomes may optionally be split by chromosome to speed up calculation on larger datasets"""
+
+    sampleset: str
+    chrom: str | None
+    vcf_import_dosage: bool = False
+    geno: pathlib.Path
+    pheno: pathlib.Path
+    variants: pathlib.Path
+    format: TargetFormat
+
+
+class SamplesheetFormat(enum.Enum):
+    """Nextflow samplesheet format. The API only accepts json, currently.
+
+    By default, the nextflow workflow accepts and uses csv.
+    """
+
+    JSON = "json"
+
+
+class PGSParams(BaseModel):
+    """Runtime parameters for the PGS calculation workflow"""
+
+    pgs_id: Optional[str] = None
+    pgp_id: Optional[str] = None
+    trait_efo: Optional[str] = None
+    target_build: GenomeBuild
+    format: SamplesheetFormat = SamplesheetFormat.JSON
+
+    @model_validator(mode="after")
+    def check_pgs(self) -> Self:
+        if all(getattr(self, x) is None for x in ("pgs_id", "pgp_id", "trait_efo")):
+            raise ValueError("Missing all pgs_id, pgp_id, or trait_efo")
+        return self
+
+
+class PGSJobParams(BaseModel):
+    id: str
+    target_genomes: list[TargetGenome]
+    nxf_params_file: PGSParams
+    nxf_work: str
+
+    @field_validator("id")
+    @classmethod
+    def validate_id(cls, x: str) -> str:
+        if not x.startswith("INTP"):
+            raise ValueError(f"id must start with INTP, got {x}")
+        return x
+
+
+class PGSJob(BaseModel):
+    globus_details: GlobusConfig
+    pipeline_param: PGSJobParams
--- a/tests/data/test.json
+++ b/tests/data/test.json
+{
+	"pipeline_param": {
+		"id": "INTP00000000322",
+		"target_genomes": [{
+			"variants": "hapnest.pvar",
+			"geno": "hapnest.pgen",
+			"pheno": "hapnest.psam",
+			"sampleset": "testtest",
+			"format": "pfile", 
+			"chrom": null
+		}],
+		"nxf_params_file": {
+			"pgs_id": "PGS001229",
+			"format": "json",
+			"target_build": "GRCh38"
+		},
+		"nxf_work": "/workspace/work/"
+	},
+	"globus_details": {
+		"guest_collection_id": "c1e6310c-11d5-4e8a-9443-211884f04c6f",
+		"dir_path_on_guest_collection": "bwingfield@ebi.ac.uk/INTP00000000322",
+		"files": [{
+				"filename": "hapnest.pvar",
+				"size": 7
+			},
+			{
+				"filename": "hapnest.pgen",
+				"size": 8
+			},
+			{
+				"filename": "hapnest.psam",
+				"size": 9
+			}
+		]
+	}
+}
\ No newline at end of file