...
 
Commits (4)
......@@ -14,7 +14,7 @@
"available_chromosomes": 24,
"ols_terms_location":"https://www.ebi.ac.uk/ols/api/terms?id=",
"gwas_study_location":"http://www.ebi.ac.uk/gwas/rest/api/studies/",
"logging_path":"/logs",
"logging_path":"logs",
"LOG_LEVEL": "INFO",
"APPLICATION_ROOT": "/eqtl/api",
"sumstats_meta": "/files/output/SumStatsMeta.db",
......
......@@ -8,7 +8,7 @@ chr_dir="bychr"
trait_dir="bytrait"
ols_terms_location= "https://www.ebi.ac.uk/ols/api/terms?id="
gwas_study_location="http://wwwdev.ebi.ac.uk/gwas/beta/rest/api/studies/"
logging_path="./logs"
logging_path="logs"
APPLICATION_ROOT="/gwas/summary-statistics/api"
sqlite_path="./files/output/SumStatsMeta.db"
LOG_LEVEL="INFO"
......
......@@ -10,7 +10,7 @@ image:
tag: latest
pullPolicy: Always
containerPort: 8000
logMountPath: "/logs"
logMountPath: "logs"
dataMountPath: "/files/output"
uid: 1000
gid: 1000
......@@ -23,7 +23,7 @@ volume:
ClaimName: eqtl-data
nfsServer: "<host name or IP>"
path: "/path/to/files/"
# VolumeName: eqtl-data
# type: persistentVolumeClaim
# storageClassName: standard
......
......@@ -10,3 +10,4 @@ spec:
resources:
requests:
storage: 2Gi
storageClassName: standard-nfs-production
......@@ -38,7 +38,7 @@ TISSUE_LABEL_DSET = 'tissue_label'
DSET_TYPES = {SNP_DSET: str, RSID_DSET: str, MUTATION_DSET: str, AC_DSET: float, AN_DSET: float, PVAL_DSET: float, MANTISSA_DSET: float, EXP_DSET: "int64", STUDY_DSET: str,
CHR_DSET: str, BP_DSET: "int64", R2_DSET: float, BETA_DSET: float, SE_DSET: float,
CHR_DSET: str, BP_DSET: "int64", R2_DSET: float, BETA_DSET: float, SE_DSET: float, GENE_DSET: str, PHEN_DSET: str,
EFFECT_DSET: str, OTHER_DSET: str, FREQ_DSET: float, EXPR_DSET: float, TISSUE_DSET: str,
QTL_GROUP_DSET: str, CONDITION_DSET: str, CONDITION_LABEL_DSET: str, TISSUE_LABEL_DSET: str}
......@@ -47,7 +47,7 @@ REFERENCE_DSET = SNP_DSET
HARMONISATION_PREFIX = 'hm_'
GWAS_CATALOG_STUDY_PREFIX = 'GCST'
TO_DISPLAY_DEFAULT = {SNP_DSET, PVAL_DSET, STUDY_DSET, CHR_DSET, BP_DSET, EFFECT_DSET, OTHER_DSET, BETA_DSET, RSID_DSET, MUTATION_DSET, AC_DSET, AN_DSET, FREQ_DSET, R2_DSET, EXPR_DSET, QTL_GROUP_DSET, CONDITION_DSET, CONDITION_LABEL_DSET, TISSUE_LABEL_DSET}
TO_DISPLAY_DEFAULT = {SNP_DSET, PVAL_DSET, STUDY_DSET, CHR_DSET, BP_DSET, EFFECT_DSET, OTHER_DSET, BETA_DSET, RSID_DSET, MUTATION_DSET, AC_DSET, AN_DSET, FREQ_DSET, R2_DSET, EXPR_DSET, QTL_GROUP_DSET, CONDITION_DSET, CONDITION_LABEL_DSET, TISSUE_LABEL_DSET, SE_DSET}
TO_DISPLAY_RAW = {SNP_DSET, PVAL_DSET, STUDY_DSET, CHR_DSET, BP_DSET, BETA_DSET,
EFFECT_DSET, OTHER_DSET}
......@@ -61,6 +61,6 @@ TO_LOAD_DSET_HEADERS_DEFAULT = {PHEN_DSET, SNP_DSET, PVAL_DSET, CHR_DSET, BP_DSE
TO_STORE_DSETS_DEFAULT = {SNP_DSET, PVAL_DSET, STUDY_DSET, CHR_DSET, BP_DSET, EFFECT_DSET, OTHER_DSET, BETA_DSET, RSID_DSET, MUTATION_DSET, AC_DSET, AN_DSET, FREQ_DSET, SE_DSET, R2_DSET, EXPR_DSET}
TO_QUERY_DSETS_DEFAULT = {SNP_DSET, PVAL_DSET, STUDY_DSET, CHR_DSET, BP_DSET, BETA_DSET, RSID_DSET, MUTATION_DSET, AC_DSET, AN_DSET, FREQ_DSET, R2_DSET, EXPR_DSET,
EFFECT_DSET, OTHER_DSET, TISSUE_DSET}
TO_INDEX = [PHEN_DSET, BP_DSET, PVAL_DSET, SNP_DSET, GENE_DSET]
TO_INDEX = [PHEN_DSET, BP_DSET, PVAL_DSET, SNP_DSET, GENE_DSET, STUDY_DSET, QTL_GROUP_DSET, TISSUE_DSET]
TRAIT_FILE_INDEX = ['phenotype_id', 'gene_id']
CHROMOSOMES = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y', 'MT']
import pandas as pd
from sumstats.common_constants import *
import os
import argparse
def consolidate(in_file, out_file, study, key, qtl_group, tissue_ont):
max_string = 255
dfin = pd.read_hdf(in_file, chunksize=1000000)
group = key
with pd.HDFStore(out_file) as store:
count = 1
for df in dfin:
print(count)
df[STUDY_DSET] = study
df[TISSUE_DSET] = tissue_ont
df[QTL_GROUP_DSET] = qtl_group
df.astype(DSET_TYPES)
df.to_hdf(store, group,
complib='blosc',
complevel=9,
format='table',
mode='a',
append=True,
data_columns=list(TO_INDEX),
min_itemsize={OTHER_DSET: max_string,
EFFECT_DSET: max_string,
PHEN_DSET: max_string,
GENE_DSET: max_string,
MTO_DSET: max_string,
STUDY_DSET: max_string,
QTL_GROUP_DSET: max_string,
TISSUE_DSET: max_string,
RSID_DSET: 24,
CHR_DSET: 2,
SNP_DSET: max_string},
index = False
)
count += 1
def main():
argparser = argparse.ArgumentParser()
argparser.add_argument('-in', help='The path to the hdf in file', required=False)
argparser.add_argument('-out', help='The path to the hdf out file', required=False)
argparser.add_argument('-key', help='The hdf key/group', required=False)
argparser.add_argument('-study', help='The study identifier', required=False)
argparser.add_argument('-qtl_group', help='The qtl group e.g. "LCL"', required=False)
argparser.add_argument('-tissue_ont', help='The tissue ontology term', required=False)
args = argparser.parse_args()
in_file = args.in
out_file = args.out
study = args.study
key = args.key
qtl_group = args.qtl_group
tissue_ont = args.tissue_ont
consolidate(in_file, out_file, study, key, qtl_group, tissue_ont)
if __name__ == "__main__":
main()