Commit 0ba12560 authored by Valentina Galata's avatar Valentina Galata
Browse files

escaping tabs in rule cmds properly (issue #32)

parent f01fdaba
......@@ -40,7 +40,7 @@ rule modify_details:
outdir="{OUTDIR}"
shell:
"""
sed 's/"//g' {input} | sed 's/,/\t/g' > {output}
sed 's/"//g' {input} | sed 's/,/\\t/g' > {output}
"""
rule modify_report:
......@@ -52,5 +52,5 @@ rule modify_report:
outdir="{OUTDIR}"
shell:
"""
sed 's/"//g' {input} | sed 's/,/\t/g' > {output}
sed 's/"//g' {input} | sed 's/,/\\t/g' > {output}
"""
......@@ -56,7 +56,7 @@ rule config_library_2:
outdir="{OUTDIR}"
shell:
"""
cut -f3,4,5,6,7,8,9 -d "#" {input}| sed 's/#/\t/g' >{output}
cut -f3,4,5,6,7,8,9 -d "#" {input} | sed 's/#/\\t/g' > {output}
"""
#Gene table Toxic
......@@ -71,7 +71,7 @@ rule config_toxic:
outdir="{OUTDIR}"
shell:
"""
sed 's/"//g' {input} | sed 's/,/\t/g' | cut -f2,3,4,5 > {output}
sed 's/"//g' {input} | sed 's/,/\\t/g' | cut -f2,3,4,5 > {output}
"""
###########################################################
......@@ -90,7 +90,7 @@ rule non_pathogenic:
outdir="{OUTDIR}"
shell:
"""
awk '$3 =="non-pathogenic"' {input} | awk '$5 ="-"' | sed 's/ /\t/g' > {output}
awk '$3 =="non-pathogenic"' {input} | awk '$5 ="-"' | sed 's/ /\\t/g' > {output}
"""
# Prediction: confidence 1
......@@ -105,7 +105,7 @@ rule confidence_1:
outdir="{OUTDIR}"
shell:
"""
awk '$4 =="Y" && $3 =="pathogenic"' {input}| awk '$5 ="1"' | sed 's/ /\t/g' > {output}
awk '$4 =="Y" && $3 =="pathogenic"' {input}| awk '$5 ="1"' | sed 's/ /\\t/g' > {output}
"""
# Prediction: confidence 2
......@@ -120,7 +120,7 @@ rule confidence_2:
outdir="{OUTDIR}"
shell:
"""
awk '$4 =="N" && $3 =="pathogenic"' {input}| awk '$5 ="2"' | sed 's/ /\t/g' > {output}
awk '$4 =="N" && $3 =="pathogenic"' {input} | awk '$5 ="2"' | sed 's/ /\\t/g' > {output}
"""
# Combine different prediction files
......
......@@ -40,7 +40,7 @@ rule HMM_correct_format:
outdir="{OUTDIR}"
shell:
"""
sed '/^#/ d' {input} | sed 's/ \+/\t/g' > {output}
sed '/^#/ d' {input} | sed 's/ \+/\\t/g' > {output}
"""
def aggregate_hmm(wildcards):
......@@ -62,7 +62,7 @@ rule HMM_correct_format_2:
outdir="{OUTDIR}"
shell:
"""
cut -f 1,3,5,6 {input} |uniq >{output}
cut -f 1,3,5,6 {input} | uniq > {output}
"""
rule HMM_correct_format_3:
......@@ -77,5 +77,5 @@ rule HMM_correct_format_3:
echo "#Toxin" > {wildcards.OUTDIR}/{wildcards.project}/TOXIN/HMM_toxin/{wildcards.input_file}_header
cat {wildcards.OUTDIR}/{wildcards.project}/TOXIN/HMM_toxin/{wildcards.input_file}_header {input} > {output}
rm -rf {wildcards.OUTDIR}/{wildcards.project}/TOXIN/HMM_toxin/{wildcards.input_file}_header
sed -i $'1 i\\\ Query_sequence\tHMM_Name\tSignificance_Evalue\tScore' {output}
sed -i $'1 i\\\ Query_sequence\\tHMM_Name\\tSignificance_Evalue\\tScore' {output}
"""
......@@ -21,7 +21,7 @@ rule merge_final:
outdir="{OUTDIR}"
shell:
"""
join -t $'\t' <(sort {input[0]}) <(sort {input[1]}) >{output}
join -t $'\\t' <(sort {input[0]}) <(sort {input[1]}) > {output}
"""
rule toxin_report:
......@@ -32,5 +32,5 @@ rule toxin_report:
params:
outdir="{OUTDIR}"
shell:
"sed -i $'1 i\\\ Sequence no.\tSequence Query\tNumber of Hits\tHMM prediction\tSignalP\tConfidence level' {input};"
"sed -i $'1 i\\\ Sequence no.\\tSequence Query\\tNumber of Hits\\tHMM prediction\\tSignalP\\tConfidence level' {input};"
"cp {input} {output}"
......@@ -38,7 +38,7 @@ rule generate_translation:
outdir="{OUTDIR}"
shell:
"""
paste {input[0]} {input[1]} | awk 'sub(/^>/,"")' OFS='\t' > {output}
paste {input[0]} {input[1]} | awk 'sub(/^>/,"")' OFS='\\t' > {output}
"""
###############################
......
......@@ -38,5 +38,5 @@ rule generate_ContigTranslation:
outdir="{OUTDIR}"
shell:
"""
paste {input[0]} {input[1]} | awk 'sub(/^>/,"")' OFS='\t' > {output}
paste {input[0]} {input[1]} | awk 'sub(/^>/,"")' OFS='\\t' > {output}
"""
......@@ -33,7 +33,7 @@ rule SignalP_format:
outdir="{OUTDIR}"
shell:
"""
sed '1,2d' {input} >{output}
sed '1,2d' {input} > {output}
"""
rule signalP_modified:
......@@ -45,7 +45,7 @@ rule signalP_modified:
outdir="{OUTDIR}"
shell:
"""
awk '{{print $1"\t"$10}}' {input} > {output}
awk '{{print $1"\\t"$10}}' {input} > {output}
"""
def aggregate_input(wildcards):
......
......@@ -18,7 +18,7 @@ rule merge_SignalPVir:
"{OUTDIR}/{project}/VIRULENCE/virulence_merged/{input_file}_virulence_SignalP_prediction.tsv"
shell:
"""
join -t $'\t' <(sort {input[0]}) <(sort {input[1]}) > {output[0]}
join -t $'\\t' <(sort {input[0]}) <(sort {input[1]}) > {output[0]}
"""
###########################################################
......@@ -34,7 +34,7 @@ rule SignalVir_non_pathogenic:
message: "The confidence level of Non-Pathogenic is given to query sequences which are negative for both SignalP as Virulence prediction"
shell:
"""
awk '$4 =="Non-Pathogenic"' {input} | awk '$6 ="-"' | sed 's/ /\t/g' > {output}
awk '$4 =="Non-Pathogenic"' {input} | awk '$6 ="-"' | sed 's/ /\\t/g' > {output}
"""
# Prediction: confidence 1
......@@ -47,7 +47,7 @@ rule SignalVir_confidence_1:
"The confidence level of 1 is given to query sequences which are positives for both SignalP as Virulence"
shell:
"""
awk '$4 =="Pathogenic" && $5 =="Y"' {input} | awk '$6 ="1"' | sed 's/ /\t/g' > {output}
awk '$4 =="Pathogenic" && $5 =="Y"' {input} | awk '$6 ="1"' | sed 's/ /\\t/g' > {output}
"""
rule SignalVir_confidence_2:
......@@ -57,7 +57,7 @@ rule SignalVir_confidence_2:
temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_2.txt")
shell:
"""
awk '$4 =="Pathogenic" && $5 =="N"' {input} | awk '$6 ="2"' | sed 's/ /\t/g' > {output}
awk '$4 =="Pathogenic" && $5 =="N"' {input} | awk '$6 ="2"' | sed 's/ /\\t/g' > {output}
"""
rule SignalVir_confidence_3:
......@@ -67,7 +67,7 @@ rule SignalVir_confidence_3:
temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_3.txt")
shell:
"""
awk '$4 =="-" && $5 =="Y"' {input} | awk '$6 ="3"' | sed 's/ /\t/g' > {output}
awk '$4 =="-" && $5 =="Y"' {input} | awk '$6 ="3"' | sed 's/ /\\t/g' > {output}
"""
rule SignalVir_confidence_4:
......@@ -77,7 +77,7 @@ rule SignalVir_confidence_4:
temp("{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_virulence_confidence_4.txt")
shell:
"""
awk '$4 =="-" && $5 =="N"' {input} | awk '$6 ="4"' | sed 's/ /\t/g' > {output}
awk '$4 =="-" && $5 =="N"' {input} | awk '$6 ="4"' | sed 's/ /\\t/g' > {output}
"""
rule SignalVir_virulence_prediction:
......
......@@ -40,7 +40,7 @@ rule HMM_correct_format_vir:
outdir="{OUTDIR}"
shell:
"""
sed '/^#/ d' {input} | sed 's/ \+/\t/g' > {output}
sed '/^#/ d' {input} | sed 's/ \+/\\t/g' > {output}
"""
def aggregate_hmm(wildcards):
......@@ -89,7 +89,7 @@ rule HMM_VIR_nonpath:
temp("{OUTDIR}/{project}/VIRULENCE/HMM_virulence/{input_file}_hmm_non_path.txt")
shell:
"""
awk '$2 == "True" && $3 == "False" && $4 == "False"' {input} | awk '$5 = "Non-Pathogenic"' | sed 's/ /\t/g' > {output}
awk '$2 == "True" && $3 == "False" && $4 == "False"' {input} | awk '$5 = "Non-Pathogenic"' | sed 's/ /\\t/g' > {output}
"""
rule HMM_VIR_pathogenic:
......@@ -99,7 +99,7 @@ rule HMM_VIR_pathogenic:
temp("{OUTDIR}/{project}/VIRULENCE/HMM_virulence/{input_file}_hmm_pathogenic.txt")
shell:
"""
awk '$3 == "True"' {input} | awk '$5 = "Pathogenic"' | sed 's/ /\t/g' > {output}
awk '$3 == "True"' {input} | awk '$5 = "Pathogenic"' | sed 's/ /\\t/g' > {output}
"""
rule HMM_VIR_unclassified:
......@@ -109,7 +109,7 @@ rule HMM_VIR_unclassified:
temp("{OUTDIR}/{project}/VIRULENCE/HMM_virulence/{input_file}_hmm_unclassified.txt")
shell:
"""
awk '$3 == "False" && $4 == "True"' {input} | awk '$5 = "Unclassified"' | sed 's/ /\t/g' > {output}
awk '$3 == "False" && $4 == "True"' {input} | awk '$5 = "Unclassified"' | sed 's/ /\\t/g' > {output}
"""
rule HMM_VIR_report:
......@@ -214,11 +214,11 @@ rule join_matrix:
echo "xjoin: need at least 2 files" >&2
return 1
elif [ "$#" -lt 3 ]; then
join -t $'\t' <($srt "$1") <($srt "$2")
join -t $'\\t' <($srt "$1") <($srt "$2")
else
f=$1
shift
join -t $'\t' <($srt "$f") <(xjoin "$@")
join -t $'\\t' <($srt "$f") <(xjoin "$@")
fi
}}
......@@ -264,7 +264,7 @@ rule format_classifier_2:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}_classifier_results_formatted.tsv"
shell:
"""
awk '{{$1=sprintf("%010d", $1)}}1' {input} | sed 's/ /\t/g' > {output}
awk '{{$1=sprintf("%010d", $1)}}1' {input} | sed 's/ /\\t/g' > {output}
"""
##############################
......@@ -277,8 +277,8 @@ rule merge_virulence:
output:
"{OUTDIR}/{project}/VIRULENCE/HMM_classifier_virulence/{input_file}_Virulence_prediction.tsv"
shell:
"join -t $'\t' <(sort {input[0]}) <(sort {input[1]}) > {output};"
"sed -i $'1 i\\\ ID\tHMM_prediction\tmodel_prediction' {output}"
"join -t $'\\t' <(sort {input[0]}) <(sort {input[1]}) > {output};"
"sed -i $'1 i\\\ ID\\tHMM_prediction\\tmodel_prediction' {output}"
rule virulence_nonpath:
input:
......@@ -287,7 +287,7 @@ rule virulence_nonpath:
temp("{OUTDIR}/{project}/VIRULENCE/HMM_classifier_virulence/{input_file}_prediction_non_path_1.tsv")
shell:
"""
awk '$2 == "Non-Pathogenic" && $3 == "negative"' {input} | awk '$4 = "Non-Pathogenic"' | sed 's/ /\t/g' > {output}
awk '$2 == "Non-Pathogenic" && $3 == "negative"' {input} | awk '$4 = "Non-Pathogenic"' | sed 's/ /\\t/g' > {output}
"""
rule virulence_nonpath2:
......@@ -297,7 +297,7 @@ rule virulence_nonpath2:
temp("{OUTDIR}/{project}/VIRULENCE/HMM_classifier_virulence/{input_file}_prediction_non_path_2.tsv")
shell:
"""
awk '$2 == "Unclassified" && $3 == "negative"' {input} | awk '$4 = "Non-Pathogenic"' | sed 's/ /\t/g' > {output}
awk '$2 == "Unclassified" && $3 == "negative"' {input} | awk '$4 = "Non-Pathogenic"' | sed 's/ /\\t/g' > {output}
"""
rule virulence_path:
......@@ -307,7 +307,7 @@ rule virulence_path:
temp("{OUTDIR}/{project}/VIRULENCE/HMM_classifier_virulence/{input_file}_prediction_pathogenic_1.tsv")
shell:
"""
awk '$2 == "Pathogenic" && $3 == "pathogenic"' {input} | awk '$4 = "Pathogenic"' | sed 's/ /\t/g' > {output}
awk '$2 == "Pathogenic" && $3 == "pathogenic"' {input} | awk '$4 = "Pathogenic"' | sed 's/ /\\t/g' > {output}
"""
rule virulence_path2:
......@@ -317,7 +317,7 @@ rule virulence_path2:
temp("{OUTDIR}/{project}/VIRULENCE/HMM_classifier_virulence/{input_file}_prediction_pathogenic_2.tsv")
shell:
"""
awk '$2 == "Unclassified" && $3 == "pathogenic"' {input} | awk '$4 = "Pathogenic"' | sed 's/ /\t/g' > {output}
awk '$2 == "Unclassified" && $3 == "pathogenic"' {input} | awk '$4 = "Pathogenic"' | sed 's/ /\\t/g' > {output}
"""
rule virulence_unclassified1:
......@@ -327,7 +327,7 @@ rule virulence_unclassified1:
temp("{OUTDIR}/{project}/VIRULENCE/HMM_classifier_virulence/{input_file}_prediction_unclassified_1.tsv")
shell:
"""
awk '$2 == "Pathogenic" && $3 == "negative"' {input} | awk '$4 = "-"' | sed 's/ /\t/g' > {output}
awk '$2 == "Pathogenic" && $3 == "negative"' {input} | awk '$4 = "-"' | sed 's/ /\\t/g' > {output}
"""
rule virulence_unclassified2:
......@@ -337,7 +337,7 @@ rule virulence_unclassified2:
temp("{OUTDIR}/{project}/VIRULENCE/HMM_classifier_virulence/{input_file}_prediction_unclassified_2.tsv")
shell:
"""
awk '$2 == "Non-Pathogenic" && $3 == "pathogenic"' {input} | awk '$4 = "-"' | sed 's/ /\t/g' > {output}
awk '$2 == "Non-Pathogenic" && $3 == "pathogenic"' {input} | awk '$4 = "-"' | sed 's/ /\\t/g' > {output}
"""
rule merge_combined:
......
......@@ -22,7 +22,7 @@ rule Virulence_merge_final:
outdir="{OUTDIR}"
shell:
"""
join -t $'\t' <(sort {input[0]}) <(sort {input[1]}) > {output}
join -t $'\\t' <(sort {input[0]}) <(sort {input[1]}) > {output}
"""
rule virulence_report:
......@@ -33,5 +33,5 @@ rule virulence_report:
params:
outdir="{OUTDIR}"
shell:
"sed -i $'1 i\\\ Sequence no.\tSequence Query\tHMM prediction\tclassifier prediction\tVirulence_prediction\tSignalP\tConfidence level' {input};"
"sed -i $'1 i\\\ Sequence no.\\tSequence Query\\tHMM prediction\\tclassifier prediction\\tVirulence_prediction\\tSignalP\\tConfidence level' {input};"
"cp {input} {output}"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment