...
 
Commits (10)
......@@ -46,7 +46,7 @@ class AssociationSearch:
self.trait_dir = self.properties.trait_dir
self.database = self.properties.sqlite_path
self.snpdb = self.properties.snpdb
self.trait_file = "phen_meta"
self.trait_file = os.path.join(self.search_path, self.trait_dir, "file_phen_meta.sqlite")
self.hdfs = []
......@@ -78,8 +78,8 @@ class AssociationSearch:
def chrom_for_trait(self):
h5file = fsutils.create_h5file_path(self.search_path, self.trait_dir, self.trait_file)
trait_service = ts.TraitService(h5file)
#h5file = fsutils.create_h5file_path(self.search_path, self.trait_dir, self.trait_file)
trait_service = ts.TraitService(self.trait_file)
chroms = trait_service.chrom_from_trait(self.trait)
if len(chroms) == 1:
self.chromosome = chroms[0]
......@@ -89,8 +89,8 @@ class AssociationSearch:
logger.debug("No chrom for this trait?") # need to handle this error
def chrom_for_gene(self):
h5file = fsutils.create_h5file_path(self.search_path, self.trait_dir, self.trait_file)
trait_service = ts.TraitService(h5file)
trait_service = ts.TraitService(self.trait_file)
#h5file = fsutils.create_h5file_path(self.search_path, self.trait_dir, self.trait_file)
chroms = trait_service.chrom_from_gene(self.gene)
if len(chroms) == 1:
self.chromosome = chroms[0]
......
......@@ -42,7 +42,7 @@ def main(): # pragma: no cover
search = Search(properties) # pragma: no cover
if find_all: # pragma: no cover
result, index_marker = search.search(start=start, size=size, pval_interval=pval_interval)
result, index_marker, paginate = search.search(start=start, size=size, pval_interval=pval_interval)
elif any([trait, gene, study, chromosome, bp_interval, snp, pval_interval, tissue, quant_method, qtl_group]):
result, index_marker, paginate = search.search(start=start, size=size, pval_interval=pval_interval,
......
import argparse
import sys
from os.path import isfile
import os
import sumstats.utils.filesystem_utils as fsutils
import sumstats.trait.search.access.trait_service as trait_service
import sumstats.study.search.access.study_service as study_service
......@@ -21,7 +22,7 @@ class Explorer:
self.study_dir = self.properties.study_dir
self.trait_dir = self.properties.trait_dir
self.sqlite_db = self.properties.sqlite_path
self.trait_file = "phen_meta"
self.trait_file = os.path.join(self.search_path, self.trait_dir, "file_phen_meta.sqlite")
def get_list_of_studies(self):
sq = sql_client.sqlClient(self.sqlite_db)
......@@ -29,14 +30,12 @@ class Explorer:
return sorted(list(set(studies)))
def get_list_of_traits(self):
h5file = fsutils.create_h5file_path(self.search_path, self.trait_dir, self.trait_file)
service = trait_service.TraitService(h5file=h5file)
service = trait_service.TraitService(self.trait_file)
traits = service.list_traits()
return traits
def get_list_of_genes(self):
h5file = fsutils.create_h5file_path(self.search_path, self.trait_dir, self.trait_file)
service = trait_service.TraitService(h5file=h5file)
service = trait_service.TraitService(self.trait_file)
genes = service.list_genes()
return genes
......@@ -72,15 +71,22 @@ class Explorer:
def has_trait(self, trait):
search = cr.search_all_assocs(trait=trait, start=0, size=0, properties=self.properties)
if search[-1] > 0:
service = trait_service.TraitService(self.trait_file)
#search = cr.search_all_assocs(trait=trait, start=0, size=0, properties=self.properties)
#if search[-1] > 0:
# return True
if service.has_trait(trait):
return True
raise NotFoundError("Trait " + trait)
def has_gene(self, gene):
search = cr.search_all_assocs(gene=gene, start=0, size=0, properties=self.properties)
if search[-1] > 0:
service = trait_service.TraitService(self.trait_file)
#search = cr.search_all_assocs(gene=gene, start=0, size=0, properties=self.properties)
#if search[-1] > 0:
# return True
if service.has_gene(gene):
return True
raise NotFoundError("Gene " + gene)
......
......@@ -24,56 +24,67 @@ import sumstats.utils.restrictions as rst
from sumstats.common_constants import *
import logging
from sumstats.utils import register_logger
import sumstats.utils.sqlite_client as sql_client
logger = logging.getLogger(__name__)
register_logger.register(__name__)
class TraitService:
def __init__(self, h5file):
def __init__(self, file):
# Open the file with read permissions
self.file = pd.HDFStore(h5file, 'r')
#self.file = pd.HDFStore(h5file, 'r')
self.datasets = {}
self.groups = self.file.keys()
#self.groups = self.file.keys()
#for (path, subgroups, subkeys) in self.file.walk():
# for subkey in subkeys:
# self.groups.append('/'.join([path, subkey]))
#self.groups = ['/'.join([path, subkey]) for subkey in subkeys for (path, subgroups, subkeys) in self.file.walk()]
self.file = file
def list_traits(self):
traits = []
for group in self.groups:
traits.extend(get_data(hdf=self.file, key=group, fields=['phenotype_id'])['phenotype_id'].drop_duplicates().values.tolist())
return traits
sq = sql_client.sqlClient(self.file)
traits = sq.get_traits()
#traits = []
#for group in self.groups:
# traits.extend(get_data(hdf=self.file, key=group, fields=['phenotype_id'])['phenotype_id'].drop_duplicates().values.tolist())
return list(set(traits))
def list_genes(self):
genes = []
for group in self.groups:
genes.extend(get_data(hdf=self.file, key=group, fields=['gene_id'])['gene_id'].drop_duplicates().values.tolist())
return genes
sq = sql_client.sqlClient(self.file)
genes = sq.get_genes()
return list(set(genes))
def has_trait(self, trait):
list_of_traits = self.list_traits()
if trait in list_of_traits:
sq = sql_client.sqlClient(self.file)
search = sq.get_trait(trait)
if search:
return True
return False
def has_gene(self, gene):
sq = sql_client.sqlClient(self.file)
search = sq.get_gene(gene)
if search:
return True
return False
def chrom_from_trait(self, trait):
chroms_found = []
for group in self.groups:
chroms_found.extend(self.file.select(group, where='phenotype_id == trait', columns=['chromosome'], index=False).drop_duplicates().values.tolist())
chroms_found = [item for sublist in chroms_found for item in sublist] # flatten
sq = sql_client.sqlClient(self.file)
chroms_found = sq.get_chrom_from_trait(trait)
#for group in self.groups:
# chroms_found.extend(self.file.select(group, where='phenotype_id == trait', columns=['chromosome'], index=False).drop_duplicates().values.tolist())
chroms_found = list(set(chroms_found)) # remove dupes
return chroms_found
def chrom_from_gene(self, gene):
chroms_found = []
for group in self.groups:
chroms_found.extend(self.file.select(group, where='gene_id == gene', columns=['chromosome'], index=False).drop_duplicates().values.tolist())
sq = sql_client.sqlClient(self.file)
chroms_found = sq.get_chrom_from_gene(gene)
#for group in self.groups:
# chroms_found.extend(self.file.select(group, where='gene_id == gene', columns=['chromosome'], index=False).drop_duplicates().values.tolist())
#chroms_found.extend(get_data(hdf=self.file, key=group, condition=condition, fields=['chromosome'])['chromosome'].drop_duplicates().values.tolist())
chroms_found = [item for sublist in chroms_found for item in sublist] # flatten
chroms_found = list(set(chroms_found)) # remove dupes
return chroms_found
......
......@@ -250,6 +250,54 @@ class sqlClient():
else:
return False
def get_traits(self):
data = []
for row in self.cur.execute("SELECT phenotype_id FROM trait_meta"):
data.append(row[0])
if data:
return data
else:
return False
def get_genes(self):
data = []
for row in self.cur.execute("SELECT gene_id FROM trait_meta"):
data.append(row[0])
if data:
return data
else:
return False
def get_trait(self, trait):
data = []
for row in self.cur.execute("SELECT * FROM trait_meta where phenotype_id =?", (trait,)):
data.append(row[0])
if data:
return data
else:
return False
def get_gene(self, gene):
data = []
for row in self.cur.execute("SELECT * FROM trait_meta where gene_id =?", (gene,)):
data.append(row[0])
if data:
return data
else:
return False
def get_chrom_from_trait(self, trait):
data = []
for row in self.cur.execute("SELECT chromosome FROM trait_meta where phenotype_id =?", (trait,)):
data.append(row[0])
return data
def get_chrom_from_gene(self, gene):
data = []
for row in self.cur.execute("SELECT chromosome FROM trait_meta where gene_id =?", (gene,)):
data.append(row[0])
return data
""" OTHER STATEMENTS """
......