Commit a9500e72 authored by David Hoksza's avatar David Hoksza
Browse files

qurying opentargets for variatns; fix issue with possibly missing gene name for a variant in dbsnp

parent 65c91a03
......@@ -31,6 +31,12 @@ def get_dbsnp(ids: List[str]) -> List[Dict]:
if "dbsnp" not in res:
continue
if "gene" not in res["dbsnp"]:
# This can happen for variants which are not mapped onto a gene in dbSNP but for example OpenTargets can
# obtain this information from a difference source. Example would be rs9388451 which is not associated
# with a gene in dbSNP but it is in GWAS Catalog (or at least this was the situation as of 2019-12-02)
continue
genes = res["dbsnp"]["gene"]
if not isinstance(genes, list):
genes = [genes]
......
......@@ -29,14 +29,14 @@ def get_genes_and_evidence(orphanet_ids:List[str], association_score: float) ->
return genes
def get_variants(genes_evidence: Dict):
for ge in genes_evidence:
s_ge = ge['evidence_id'].split('-')
def get_variants(genes: Dict[str, Dict]):
for gene_name in genes:
s_ge = genes[gene_name]['evidence_id'].split('-')
target = s_ge[0]
disease = s_ge[1]
res = requests.os(
"https://platform-api.opentargets.io/v3/platform/public/association/filter",
res = requests.get(
"https://platform-api-qc.opentargets.io/v3/platform/public/evidence/filter",
params={'target': target,
'disease': disease,
'data_type': 'genetic_association',
......@@ -45,6 +45,15 @@ def get_variants(genes_evidence: Dict):
)
if res.status_code == requests.codes.ok:
content = json.loads(res.text)
if 'data' in content:
for content_item in content['data']:
if 'variant' in content_item:
if 'dbsnp' in content_item['variant']['id']:
rsId = content_item['variant']['id'].split("/")[-1]
genes[gene_name]['variants'].append(rsId)
# Not sure whether we can't get duplicities from the API so we better uniquify
genes[gene_name]['variants'] = list(set(genes[gene_name]['variants']))
if __name__ == '__main__':
......@@ -71,7 +80,6 @@ if __name__ == '__main__':
args = parser.parse_args()
genes_evidence = get_genes_and_evidence(args.orphanet_ids.split(","), args.threshold_score)
# get_variants(genes_evidence)
genes_sorted = {}
cnt = args.top_n
i = 0
......@@ -81,6 +89,8 @@ if __name__ == '__main__':
i += 1
genes_sorted[k] = v
get_variants(genes_sorted)
print(json.dumps({"name": "opentargets", "genes": genes_sorted}, indent=2))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment