Commit e99a228f authored by Valentina Galata's avatar Valentina Galata
Browse files

format fix in rules/Virulence (issue #26)

parent a37ed46c
#Combine Virulence with SignalP
# Combine Virulence with SignalP
import glob
import os
configfile: "config.yaml"
PROJECT=config["project"]
INPUT=config["input_file"]
PROJECT = config["project"]
INPUT = config["input_file"]
#################################
## Combine Virulence and SignalP #
##################################
# Combine Virulence and SignalP #
#################################
rule merge_SignalPVir:
input: Vir= "{OUTDIR}/{project}/VIRULENCE/HMM_classifier_virulence/{input_file}_virulence_final_prediction.tsv",
SignalP="{OUTDIR}/{project}/SignalP/aggregated/{input_file}_SignalP_results.txt"
output: "{OUTDIR}/{project}/VIRULENCE/virulence_merged/{input_file}_virulence_SignalP_prediction.tsv"
shell: """
join -t $'\t' <(sort {input[0]}) <(sort {input[1]}) > {output[0]}
"""
input:
Vir="{OUTDIR}/{project}/VIRULENCE/HMM_classifier_virulence/{input_file}_virulence_final_prediction.tsv",
SignalP="{OUTDIR}/{project}/SignalP/aggregated/{input_file}_SignalP_results.txt"
output:
"{OUTDIR}/{project}/VIRULENCE/virulence_merged/{input_file}_virulence_SignalP_prediction.tsv"
shell:
"""
join -t $'\t' <(sort {input[0]}) <(sort {input[1]}) > {output[0]}
"""
###########################################################
### Insert a confidence level for the different predictions #
#############################################################
# Insert a confidence level for the different predictions #
###########################################################
# Prediction: Non-Pathogenic
rule SignalVir_non_pathogenic:
input: "{OUTDIR}/{project}/VIRULENCE/virulence_merged/{input_file}_virulence_SignalP_prediction.tsv"
output: temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_non_pathogenic.txt")
message: "The confidence level of Non-Pathogenic is given to query sequences which are negative for both SignalP as Virulence prediction"
shell: """
awk '$4 =="Non-Pathogenic"' {input} | awk '$6 ="-"' | sed 's/ /\t/g' > {output}
"""
input:
"{OUTDIR}/{project}/VIRULENCE/virulence_merged/{input_file}_virulence_SignalP_prediction.tsv"
output:
temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_non_pathogenic.txt")
message: "The confidence level of Non-Pathogenic is given to query sequences which are negative for both SignalP as Virulence prediction"
shell:
"""
awk '$4 =="Non-Pathogenic"' {input} | awk '$6 ="-"' | sed 's/ /\t/g' > {output}
"""
# Prediction: confidence 1
rule SignalVir_confidence_1:
input: "{OUTDIR}/{project}/VIRULENCE/virulence_merged/{input_file}_virulence_SignalP_prediction.tsv"
output: temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_1.txt")
message: "The confidence level of 1 is given to query sequences which are positives for both SignalP as Virulence"
shell: """
awk '$4 =="Pathogenic" && $5 =="Y"' {input} | awk '$6 ="1"' | sed 's/ /\t/g' > {output}
"""
input:
"{OUTDIR}/{project}/VIRULENCE/virulence_merged/{input_file}_virulence_SignalP_prediction.tsv"
output:
temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_1.txt")
message:
"The confidence level of 1 is given to query sequences which are positives for both SignalP as Virulence"
shell:
"""
awk '$4 =="Pathogenic" && $5 =="Y"' {input} | awk '$6 ="1"' | sed 's/ /\t/g' > {output}
"""
rule SignalVir_confidence_2:
input: "{OUTDIR}/{project}/VIRULENCE/virulence_merged/{input_file}_virulence_SignalP_prediction.tsv"
output: temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_2.txt")
shell: """
awk '$4 =="Pathogenic" && $5 =="N"' {input} | awk '$6 ="2"' | sed 's/ /\t/g' > {output}
"""
input:
"{OUTDIR}/{project}/VIRULENCE/virulence_merged/{input_file}_virulence_SignalP_prediction.tsv"
output:
temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_2.txt")
shell:
"""
awk '$4 =="Pathogenic" && $5 =="N"' {input} | awk '$6 ="2"' | sed 's/ /\t/g' > {output}
"""
rule SignalVir_confidence_3:
input: "{OUTDIR}/{project}/VIRULENCE/virulence_merged/{input_file}_virulence_SignalP_prediction.tsv"
output: temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_3.txt")
shell: """
awk '$4 =="-" && $5 =="Y"' {input} | awk '$6 ="3"' | sed 's/ /\t/g' > {output}
"""
input:
"{OUTDIR}/{project}/VIRULENCE/virulence_merged/{input_file}_virulence_SignalP_prediction.tsv"
output:
temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_3.txt")
shell:
"""
awk '$4 =="-" && $5 =="Y"' {input} | awk '$6 ="3"' | sed 's/ /\t/g' > {output}
"""
rule SignalVir_confidence_4:
input: "{OUTDIR}/{project}/VIRULENCE/virulence_merged/{input_file}_virulence_SignalP_prediction.tsv"
output: temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_4.txt")
shell: """
awk '$4 =="-" && $5 =="N"' {input} | awk '$6 ="4"' | sed 's/ /\t/g' > {output}
"""
input:
"{OUTDIR}/{project}/VIRULENCE/virulence_merged/{input_file}_virulence_SignalP_prediction.tsv"
output:
temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_4.txt")
shell:
"""
awk '$4 =="-" && $5 =="N"' {input} | awk '$6 ="4"' | sed 's/ /\t/g' > {output}
"""
rule SignalVir_virulence_prediction:
input:
confidence_1="{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_1.txt",
confidence_2="{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_2.txt",
confidence_3="{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_3.txt",
confidence_4="{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_4.txt",
non_pathogenic="{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_non_pathogenic.txt"
output: "{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_Confidence_Virulence_ensembled.csv"
shell: "cat {input[0]} {input[1]} {input[2]} {input[3]} {input[4]} > {output[0]}"
input:
confidence_1="{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_1.txt",
confidence_2="{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_2.txt",
confidence_3="{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_3.txt",
confidence_4="{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_4.txt",
non_pathogenic="{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_non_pathogenic.txt"
output:
"{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_Confidence_Virulence_ensembled.csv"
shell:
"cat {input[0]} {input[1]} {input[2]} {input[3]} {input[4]} > {output[0]}"
This diff is collapsed.
......@@ -4,30 +4,35 @@ import glob
import os
configfile: "config.yaml"
PROJECT=config["project"]
INPUT=config["input_file"]
PROJECT = config["project"]
INPUT = config["input_file"]
####################################
## Final Report #
#####################################
# Final Report #
####################################
#create final report by combining all files
# create final report by combining all files
rule Virulence_merge_final:
input:
translation="{OUTDIR}/{project}/renamed/{input_file}_translation.tsv",
prediction="{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_Confidence_Virulence_ensembled.csv"
output: temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_Virulence_translation_ensembled.csv")
params: outdir="{OUTDIR}"
shell: """
join -t $'\t' <(sort {input[0]}) <(sort {input[1]}) >{output}
output:
temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_Virulence_translation_ensembled.csv")
params:
outdir="{OUTDIR}"
shell:
"""
join -t $'\t' <(sort {input[0]}) <(sort {input[1]}) > {output}
"""
rule virulence_report:
input: "{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_Virulence_translation_ensembled.csv"
output: "{OUTDIR}/{project}/Virulence_prediction_{input_file}_report.csv"
params: outdir="{OUTDIR}"
input:
"{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_Virulence_translation_ensembled.csv"
output:
"{OUTDIR}/{project}/Virulence_prediction_{input_file}_report.csv"
params:
outdir="{OUTDIR}"
shell:
"sed -i $'1 i\\\ Sequence no.\tSequence Query\tHMM prediction\tclassifier prediction\tVirulence_prediction\tSignalP\tConfidence level' {input};"
"cp {input} {output}"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment