VEP API query returns errors
Created by: sayonidas
I have tried submitting queries to the VEP API to fetch variant consequences for multiple ids (POST vep/:species/id).
I have a dataset of about ~4000 variants for which I want to get VEP annotations. The maximum POST size is written as 200, however, I am getting 504 Gateway time out errors if I query more than 50 at a time. Then, after getting two or three queries for 50 variants at a time, I am getting 503 Service Unavailable error.
Please could you advice how I can use the Ensembl API for fetching the data from VEP?
class EnsemblRestClient(object):
"""
Rest client for Ensembl API.
"""
def __init__(self, server='http://rest.ensembl.org', reqs_per_sec=15):
self.server = server
self.reqs_per_sec = reqs_per_sec
self.req_count = 0
self.last_req = 0
def perform_rest_action(self, endpoint, hdrs=None, params=None, data=None):
headers = { 'Content-Type': 'application/json', "Accept" : "application/json" }
if params:
endpoint += '?' + urlencode(params)
# check if we need to rate limit ourselves
if self.req_count >= self.reqs_per_sec:
delta = time.time() - self.last_req
if delta < 1:
time.sleep(1 - delta)
self.last_req = time.time()
self.req_count = 0
try:
request = Request(self.server + endpoint, headers=headers, data=data)
response = urlopen(request)
content = response.read()
if content:
data = json.loads(content)
self.req_count += 1
except HTTPError as e:
# check if we are being rate limited by the server
if e.code == 429:
if 'Retry-After' in e.headers:
retry = e.headers['Retry-After']
time.sleep(float(retry))
self.perform_rest_action(endpoint, hdrs, params)
else:
sys.stderr.write('Request failed for {0}: Status code: {1.code} Reason: {1.reason}\n'.format(endpoint, e))
return data
def get_vep(self, snp_ids):
id_dict= {}
id_dict["ids"] = snp_ids
#print(id_dict)
data_dict = json.dumps(id_dict)
#print(data_dict)
params = {'Blosum62': 1, 'Conservation': 1, 'protein': 1, 'domains': 1}
response = self.perform_rest_action(
endpoint='/vep/human/id',
data=data_dict.encode(),
params=params,
)
if response:
return response
return None
client = EnsemblRestClient()
n = 50
snp_chunks = [input_snp_list[i:i + n] for i in range(0, len(input_snp_list), n)]
for snp_list in snp_chunks:
client = EnsemblRestClient()
snp_response_list = client.get_vep(snp_list)