PDBE-3714: Changes for depth, return raw scores, return only for best structure

parent 34e18621
......@@ -2322,14 +2322,23 @@ def get_uniprot_generic_annotations_api(uniprot_accession):
bottle.response.status = 404
return {}
# get the top structure for the uniprot accession
best_structure = get_uniprot_best_entity(uniprot_accession)
if not best_structure:
bottle.response.status = 404
return {}
[best_entry, best_entity] = best_structure.split("_")
query = """
MATCH (unp:UniProt {ACCESSION:$uniprot_accession})-[:HAS_UNP_RESIDUE]->(unp_res:UNPResidue)<-[:MAP_TO_UNIPROT_RESIDUE]-(pdb_res:PDBResidue)
<-[res_rel:FUNPDBE_ANNOTATION_FOR]-(fun_group:FunPDBeResidueGroup)-[:FUNPDBE_RESIDUE_GROUP_OF]->(fun_entry:FunPDBeEntry)
WHERE (NOT fun_entry.DATA_RESOURCE IN ["dynamine", "FoldX", "3Dcomplex","MetalPDB","ChannelsDB","POPScomp_PDBML","Missense3D"]) AND (NOT (fun_entry.DATA_RESOURCE="p2rank" AND res_rel.CONFIDENCE_CLASSIFICATION <> "high" ))
<-[res_rel:FUNPDBE_ANNOTATION_FOR]-(fun_group:FunPDBeResidueGroup)-[:FUNPDBE_RESIDUE_GROUP_OF]->(fun_entry:FunPDBeEntry), (pdb_res)<-[:HAS_PDB_RESIDUE]-(entity:Entity)
WHERE (NOT fun_entry.DATA_RESOURCE IN ["dynamine", "FoldX", "3Dcomplex","MetalPDB","ChannelsDB","POPScomp_PDBML","Missense3D"]) AND (NOT (fun_entry.DATA_RESOURCE="p2rank" AND res_rel.CONFIDENCE_CLASSIFICATION <> "high" )) AND entity.BEST_CHAIN_ID=res_rel.CHAIN_LABEL
WITH SPLIT(pdb_res.UNIQID, '_')[0] AS entry_id, SPLIT(pdb_res.UNIQID, '_')[1] AS entity_id, res_rel.CHAIN_LABEL AS auth_asym_id, fun_entry.DATA_RESOURCE AS data_resource,
fun_entry.RESOURCE_ENTRY_URL AS resource_url, unp_res.ID AS unp_res_id, unp_res.ONE_LETTER_CODE AS amino_acid_code, pdb_res.ID AS pdb_res_id, fun_group.LABEL AS group_label
WITH DISTINCT data_resource, group_label, toInteger(unp_res_id) AS unp_res_id, entry_id, toInteger(entity_id) AS entity_id, auth_asym_id, amino_acid_code, resource_url ORDER BY unp_res_id
RETURN data_resource, group_label, resource_url, unp_res_id, amino_acid_code, entry_id, entity_id, COLLECT(auth_asym_id)
fun_entry.RESOURCE_ENTRY_URL AS resource_url, unp_res.ID AS unp_res_id, unp_res.ONE_LETTER_CODE AS amino_acid_code, pdb_res.ID AS pdb_res_id, fun_group.LABEL AS group_label, res_rel.RAW_SCORE AS raw_score
WITH DISTINCT data_resource, group_label, raw_score, toInteger(unp_res_id) AS unp_res_id, entry_id, toInteger(entity_id) AS entity_id, auth_asym_id, amino_acid_code, resource_url ORDER BY unp_res_id
RETURN data_resource, group_label, raw_score, resource_url, unp_res_id, amino_acid_code, entry_id, entity_id, COLLECT(auth_asym_id)
"""
annotations_mappings = list(graph.run(query, parameters={
......@@ -2344,10 +2353,15 @@ def get_uniprot_generic_annotations_api(uniprot_accession):
annotation_residue_dict = dict()
for annotations_mapping in annotations_mappings:
(data_resource, group_label, resource_url, unp_res_id, amino_acid_code, entry_id, entity_id, auth_asym_ids) = annotations_mapping
(data_resource, group_label, raw_score, resource_url, unp_res_id, amino_acid_code, entry_id, entity_id, auth_asym_ids) = annotations_mapping
# PDBE-3714: Set data resource to specific group label in case of depth
if data_resource == "depth":
# skip structures which are not best
if not (best_entry == entry_id and int(best_entity) == entity_id):
continue
data_resource = group_label
if is_processed_protein:
......@@ -2365,7 +2379,7 @@ def get_uniprot_generic_annotations_api(uniprot_accession):
}
annotation_residue_dict[data_resource].append(unp_res_id)
annotation_dict[dict_key]["entries"].append((entry_id, entity_id, auth_asym_ids, resource_url))
annotation_dict[dict_key]["entries"].append((entry_id, entity_id, auth_asym_ids, resource_url, raw_score))
resource_master_dict = dict()
......@@ -2373,8 +2387,8 @@ def get_uniprot_generic_annotations_api(uniprot_accession):
for residue in sorted(list(set(residues))):
entries = []
for entry_id, entity_id, auth_asym_ids, resource_url in annotation_dict[(data_resource, residue)]["entries"]:
entries.append((entry_id, entity_id, tuple(auth_asym_ids), resource_url))
for entry_id, entity_id, auth_asym_ids, resource_url, raw_score in annotation_dict[(data_resource, residue)]["entries"]:
entries.append((entry_id, entity_id, tuple(auth_asym_ids), resource_url, raw_score))
entries = tuple(entries)
......@@ -2424,7 +2438,8 @@ def get_uniprot_generic_annotations_api(uniprot_accession):
"entityId": entry[1],
"chainIds": entry[2],
"additionalData": {
"resourceUrl": entry[3]
"resourceUrl": entry[3],
"rawScore": float(entry[4]) if entry[4] else None
}
})
......@@ -3679,3 +3694,21 @@ def get_uniprot_annotation_partners_api(uniprot_accession):
bottle.response.status = 200
return partner_results
def get_uniprot_best_entity(uniprot_accession):
query = """
MATCH (unp:UniProt {ACCESSION:$uniprot_accession})<-[rel:HAS_UNIPROT_OBS_SEGMENT]-(entity:Entity)
WITH toInteger(rel.RANKING_SCORES[7]) AS rankingScore, entity.UNIQID AS entity ORDER BY rankingScore DESC
RETURN entity LIMIT 1
"""
result = list(graph.run(query, parameters={
"uniprot_accession": uniprot_accession
}))
if result:
return result[0][0]
else:
return None
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment