Commit e8dc2ac4 authored by David Hoksza's avatar David Hoksza
Browse files

assembling into directory with disease id

parent 8b895c49
......@@ -29,20 +29,22 @@ OPENTARGETS_ASSOCIATION_SCORE_THRESHOLD_STR=${OPENTARGETS_ASSOCIATION_SCORE_THRE
ORPHANET_IDS_UNDERSCORE=${ORPHANET_IDS//,/_}
mkdir $RES_DIR
RES_DIR=$RES_DIR/${ORPHANET_IDS_UNDERSCORE}
mkdir $RES_DIR
# Get associations from DisGeNET
disgenet_out_path=${RES_DIR}/01-disgenet-id_${ORPHANET_IDS_UNDERSCORE}-n_${DISGENET_CNT_THRESHOLD}-s_${DISGENET_ASSOCIATION_SCORE_THRESHOLD_STR}.json
disgenet_out_path=${RES_DIR}/01-disgenet-n_${DISGENET_CNT_THRESHOLD}-s_${DISGENET_ASSOCIATION_SCORE_THRESHOLD_STR}.json
$PYTHON_BIN $ASSOCIATIONS_DIR/disgenet.py -o ${ORPHANET_IDS} -n ${DISGENET_CNT_THRESHOLD} -s ${DISGENET_ASSOCIATION_SCORE_THRESHOLD} > ${disgenet_out_path}
echo "Disgenet gene and variant associations stored in ${disgenet_out_path}"
# Get associations from OpenTargets
opentargets_out_path=${RES_DIR}/01-opentargets-id_${ORPHANET_IDS_UNDERSCORE}-n_${DISGENET_CNT_THRESHOLD}-s_${OPENTARGETS_ASSOCIATION_SCORE_THRESHOLD_STR}.json
opentargets_out_path=${RES_DIR}/01-opentargets-n_${DISGENET_CNT_THRESHOLD}-s_${OPENTARGETS_ASSOCIATION_SCORE_THRESHOLD_STR}.json
$PYTHON_BIN $ASSOCIATIONS_DIR/opentargets.py -o ${ORPHANET_IDS} -n ${OPENTARGETS_CNT_THRESHOLD} -s ${OPENTARGETS_ASSOCIATION_SCORE_THRESHOLD} > ${opentargets_out_path}
echo "Opentargets gene and variant associations stored in ${opentargets_out_path}"
# Merge with ClinVar
genes_variants_out_path=${RES_DIR}/02-genes_variants-id_${ORPHANET_IDS_UNDERSCORE}.log
genes_variants_out_path=${RES_DIR}/02-genes_variants.log
$PYTHON_BIN $ASSOCIATIONS_DIR/merge_with_clinvar.py -v $disgenet_out_path,$opentargets_out_path -c ${ASSOCIATIONS_DATA_DIR}/OrphaHPO_clinvar_variants_summary.tsv -oid ${ORPHANET_IDS} > ${genes_variants_out_path}
echo "Integration with ClinVar stored in ${genes_variants_out_path}"
......@@ -58,7 +60,7 @@ echo "Genes stored in ${genes_out_path}"
#cp ${EXTEND_DIR}/output.txt ${text_mining_out_path}
#echo "Genes list extended"
minerva_genes_out_path=${RES_DIR}/04-minerva-genes-id_${ORPHANET_IDS_UNDERSCORE}.txt
minerva_genes_out_path=${RES_DIR}/04-minerva-genes.txt
$PYTHON_BIN $ASSOCIATIONS_DIR/minerva_genes.py -f ${genes_out_path} > ${minerva_genes_out_path}
var_line=`cat ${genes_variants_out_path} | grep "variants in total"`
......@@ -67,7 +69,7 @@ variants_out_path=${genes_variants_out_path/02-genes_variants/03-variants}
echo ${var_line#*:} | tr ',' '\n' > ${variants_out_path}
echo "Variants stored in ${variants_out_path}"
minerva_variants_out_path=${RES_DIR}/04-minerva-variants-id_${ORPHANET_IDS_UNDERSCORE}.txt
minerva_variants_out_path=${RES_DIR}/04-minerva-variants.txt
$PYTHON_BIN $ASSOCIATIONS_DIR/minerva_variants.py -f ${variants_out_path} > ${minerva_variants_out_path}
if [ ${STOP_AFTER_STAGE} = 1 ]; then
......@@ -77,8 +79,8 @@ fi
# ------------------------------ 2. Obtain pathways ------------------------------
echo "Retrieving enriched pathways"
Rscript --vanilla ${ENRICHMENT_DIR}/enrich_maps.R ${genes_out_path} ${ENRICHMENT_CONFIG}
enriched_maps_out_path=$RES_DIR/05-enriched_disease_maps-id_${ORPHANET_IDS_UNDERSCORE}.txt
enriched_paths_out_path=$RES_DIR/05-enriched_pathways-id_${ORPHANET_IDS_UNDERSCORE}.txt
enriched_maps_out_path=$RES_DIR/05-enriched_disease_maps.txt
enriched_paths_out_path=$RES_DIR/05-enriched_pathways.txt
mv enriched_disease_maps.txt ${enriched_maps_out_path}
mv enriched_pathways.txt ${enriched_paths_out_path}
echo "Enriched pathways obtained"
......@@ -97,7 +99,7 @@ if [ ${BUILD_MAP_GENERATOR} = 1 ]; then
fi
echo "Map generator built"
echo "Assembling the map from pathways ..."
map_out_path=${RES_DIR}/06-minerva_map-id_${ORPHANET_IDS_UNDERSCORE}.xml
map_out_path=${RES_DIR}/06-minerva_map.xml
#java -Xmx4g -jar ${MAP_GENERATOR_DIR}/biohackathon/target/biohackathon-1.0-jar-with-dependencies.jar --enrichr-file ${enriched_paths_out_path} --minerva-file ${enriched_maps_out_path} --text-mining-file ${text_mining_out_path} --output-file ${map_out_path}
java -Xmx4g -jar ${MAP_GENERATOR_DIR}/biohackathon/target/biohackathon-1.0-jar-with-dependencies.jar --enrichr-file ${enriched_paths_out_path} --minerva-file ${enriched_maps_out_path} --output-file ${map_out_path}
echo "Pathways assembled into ${map_out_path}"
......
......@@ -28,6 +28,9 @@ def get_dbsnp(ids: List[str]) -> List[Dict]:
cnt = 0
for res in content:
if "dbsnp" not in res:
continue
genes = res["dbsnp"]["gene"]
if not isinstance(genes, list):
genes = [genes]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment