Commit 580ad3e7 authored by David Hoksza's avatar David Hoksza
Browse files

myvariants multiple genes fix; opentargets association score in file name fix

parent 9c0850dc
#!/usr/bin/env bash
# ------------------------- PARAMETERS TO SET -------------------------
ORPHANET_IDS="791"
#ORPHANET_IDS="130"
ORPHANET_IDS="33,67046,79327,79321,86309"
#ORPHANET_IDS="33,67046,79327,79321,86309"
DISGENET_CNT_THRESHOLD=50
DISGENET_ASSOCIATION_SCORE_THRESHOLD=0
DISGENET_ASSOCIATION_SCORE_THRESHOLD="0"
OPENTARGETS_CNT_THRESHOLD=50
OPENTARGETS_ASSOCIATION_SCORE_THRESHOLD=0.2
OPENTARGETS_ASSOCIATION_SCORE_THRESHOLD="0.5"
BUILD_MAP_GENERATOR=0 #0 for building it
STOP_AFTER_STAGE=-1
# ------------------------- PARAMETERS TO SET -------------------------
......@@ -21,17 +22,20 @@ MAP_GENERATOR_DIR=map_generator/
PYTHON_BIN=python3
PIP_BIN=pip3
DISGENET_ASSOCIATION_SCORE_THRESHOLD_STR=${DISGENET_ASSOCIATION_SCORE_THRESHOLD//./_}
OPENTARGETS_ASSOCIATION_SCORE_THRESHOLD_STR=${OPENTARGETS_ASSOCIATION_SCORE_THRESHOLD//./_}
ORPHANET_IDS_UNDERSCORE=${ORPHANET_IDS//,/_}
mkdir $RES_DIR
# Get associations from DisGeNET
disgenet_out_path=${RES_DIR}/01-disgenet-id_${ORPHANET_IDS_UNDERSCORE}-n_${DISGENET_CNT_THRESHOLD}-s_${DISGENET_ASSOCIATION_SCORE_THRESHOLD}.json
disgenet_out_path=${RES_DIR}/01-disgenet-id_${ORPHANET_IDS_UNDERSCORE}-n_${DISGENET_CNT_THRESHOLD}-s_${DISGENET_ASSOCIATION_SCORE_THRESHOLD_STR}.json
$PYTHON_BIN $ASSOCIATIONS_DIR/disgenet.py -o ${ORPHANET_IDS} -n ${DISGENET_CNT_THRESHOLD} -s ${DISGENET_ASSOCIATION_SCORE_THRESHOLD} > ${disgenet_out_path}
echo "Disgenet gene and variant associations stored in ${disgenet_out_path}"
# Get associations from OpenTargets
opentargets_out_path=${RES_DIR}/01-opentargets-id_${ORPHANET_IDS_UNDERSCORE}-n_${DISGENET_CNT_THRESHOLD}-s_${DISGENET_ASSOCIATION_SCORE_THRESHOLD}.json
opentargets_out_path=${RES_DIR}/01-opentargets-id_${ORPHANET_IDS_UNDERSCORE}-n_${DISGENET_CNT_THRESHOLD}-s_${OPENTARGETS_ASSOCIATION_SCORE_THRESHOLD_STR}.json
$PYTHON_BIN $ASSOCIATIONS_DIR/opentargets.py -o ${ORPHANET_IDS} -n ${OPENTARGETS_CNT_THRESHOLD} -s ${OPENTARGETS_ASSOCIATION_SCORE_THRESHOLD} > ${opentargets_out_path}
echo "Opentargets gene and variant associations stored in ${opentargets_out_path}"
......
......@@ -18,7 +18,7 @@ def get_dbsnp(ids: List[str]) -> List[Dict]:
"ids": ",".join(ids)
},
params={
"fields": "hg19.start, vcf.ref, dbnsfp.aa.ref, vcf.alt, dbnsfp.aa.alt, dbnsfp.genename, "
"fields": "hg19.start, vcf.ref, dbnsfp.aa.ref, vcf.alt, dbnsfp.aa.alt, dbsnp.gene.symbol, "
"dbsnp.vartype, chrom, dbsnp.rsid, dbnsfp.aa.pos, dbnsfp.uniprot.acc"
}
)
......@@ -57,59 +57,66 @@ def get_dbsnp(ids: List[str]) -> List[Dict]:
if "ref" not in aa:
continue
position = str(res["hg19"]["start"])
original_dna = res["vcf"]["ref"]
original_aa = aa["ref"]
alternative_dna = res["vcf"]["alt"]
alternative_aa = aa["alt"]
name = res["dbnsfp"]["genename"]
description = res["dbsnp"]["vartype"]
color = "#ff0000"
contig = res["chrom"]
allele_frequency = "0.8" #there is probably a bug in myvariant since it shows wrong allele in dbsnp and then the frequency might be wrong as well
variant_identifier = res["dbsnp"]["rsid"]
# Sometimes, you get something like this:
# "uniprot": [
# {
# "acc": "O14746",
# "entry": "TERT_HUMAN"
# },
# {}
# ],
pos = aa["pos"]
if isinstance(pos, list):
#TODO - taking the first one is just temporary solution because this can be off from the position of Uniprot record which is in MINERVA
aa_pos = pos[0]
elif isinstance(pos, int):
aa_pos = pos
else:
assert False
amino_acid_change = "{}:XXX:g.{}{}>{}:p.{}{}{}".format(name,
position,
original_dna,
alternative_dna,
original_aa,
aa_pos,
alternative_aa)
snps.append({
"position": position,
"original_dna": original_dna,
"alternative_dna": alternative_dna,
"name": name,
"description": description,
"color": color,
"contig": contig,
"allele_frequency": allele_frequency,
"variant_identifier": variant_identifier,
"amino_acid_change": amino_acid_change,
"uniprot_acc": uniprot_acc
})
genes = res["dbsnp"]["gene"]
if not isinstance(genes, list):
genes = [genes]
for gene in genes:
position = str(res["hg19"]["start"])
original_dna = res["vcf"]["ref"]
original_aa = aa["ref"]
alternative_dna = res["vcf"]["alt"]
alternative_aa = aa["alt"]
name = gene["symbol"]
description = res["dbsnp"]["vartype"]
color = "#ff0000"
contig = res["chrom"]
allele_frequency = "0.8" #there is probably a bug in myvariant since it shows wrong allele in dbsnp and then the frequency might be wrong as well
variant_identifier = res["dbsnp"]["rsid"]
# Sometimes, you get something like this:
# "uniprot": [
# {
# "acc": "O14746",
# "entry": "TERT_HUMAN"
# },
# {}
# ],
pos = aa["pos"]
if isinstance(pos, list):
#TODO - taking the first one is just temporary solution because this can be off from the position of Uniprot record which is in MINERVA
aa_pos = pos[0]
elif isinstance(pos, int):
aa_pos = pos
else:
assert False
amino_acid_change = "{}:XXX:g.{}{}>{}:p.{}{}{}".format(name,
position,
original_dna,
alternative_dna,
original_aa,
aa_pos,
alternative_aa)
snps.append({
"position": position,
"original_dna": original_dna,
"alternative_dna": alternative_dna,
"name": name,
"description": description,
"color": color,
"contig": contig,
"allele_frequency": allele_frequency,
"variant_identifier": variant_identifier,
"amino_acid_change": amino_acid_change,
"uniprot_acc": uniprot_acc
})
logging.info("Skipped {} variants out of {}".format(len(content) - len(snps), len(content)))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment