Commit 932bc045 authored by David Hoksza's avatar David Hoksza
Browse files

treating multiple genes per snp in clinvar

parent 5ad7f86c
......@@ -55,18 +55,19 @@ def get_clinvar_variants(clinvar_fname:str, orpha_ids:List[str]):
df_relevant = df_contains_list(df, "OrphanetID", orpha_full_ids)
gene_variants = {}
for index, row in df_relevant.iterrows():
gene_symbol = row['GeneSymbol']
dbsnp = row['RS...dbSNP.']
significance = row['ClinicalSignificance']
if dbsnp == -1:
continue
dbsnp = 'rs{}'.format(dbsnp)
if gene_symbol not in gene_variants:
gene_variants[gene_symbol] = {'variants': {}}
if dbsnp not in gene_variants[gene_symbol]['variants']:
gene_variants[gene_symbol]['variants'][dbsnp] = []
gene_variants[gene_symbol]['variants'][dbsnp].append(significance)
gene_symbols = row['GeneSymbol'] #we can have multiple gene symbols per snp (e.g DPAGT1;HMBS)
for gene_symbol in gene_symbols.split(";"):
dbsnp = row['RS...dbSNP.']
significance = row['ClinicalSignificance']
if dbsnp == -1:
continue
dbsnp = 'rs{}'.format(dbsnp)
if gene_symbol not in gene_variants:
gene_variants[gene_symbol] = {'variants': {}}
if dbsnp not in gene_variants[gene_symbol]['variants']:
gene_variants[gene_symbol]['variants'][dbsnp] = []
gene_variants[gene_symbol]['variants'][dbsnp].append(significance)
return filter_out_conflicting(gene_variants)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment