#Toxin import glob import os # HMM scan rule run_HMM_tox: input: hmm=config["pathofact"]["tox_hmm"], renamed="{datadir}/{project}/splitted/{sample}/{file_i}.fasta" output: temp("{datadir}/{project}/TOXIN/HMM_toxin/{sample}/{file_i}.hmmscan") log: "{datadir}/{project}/TOXIN/HMM_toxin/{sample}/{file_i}.log" message: "Run HMM scan on {input.renamed} to generate {output}" params: outdir="{datadir}", runtime=config["pathofact"]["runtime"]["long"], mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"] conda: "../../envs/HMMER.yaml" threads: 1 shell: """ hmmsearch --cpu {threads} --noali --notextw --tblout {output} {input.hmm} {input.renamed} &> {log} """ # Adjust HMM results to correct format rule HMM_correct_format: input: "{datadir}/{project}/TOXIN/HMM_toxin/{sample}/{file_i}.hmmscan" output: temp("{datadir}/{project}/TOXIN/HMM_toxin/{sample}/{file_i}.hmm.csv") message: "Adjust {input} to correct format: {output}" params: outdir="{datadir}", runtime=config["pathofact"]["runtime"]["short"], mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"] shell: """ sed '/^#/ d' {input} | sed 's/ \+/\\t/g' > {output} """ def aggregate_hmm(wildcards): checkpoint_output = checkpoints.splitting.get(**wildcards).output.splits return expand( "{datadir}/{project}/TOXIN/HMM_toxin/{sample}/{file_i}.hmm.csv", datadir=wildcards.datadir, project=wildcards.project, sample=wildcards.sample, file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i ) rule HMM_correct_format_2: input: aggregate_hmm output: temp("{datadir}/{project}/TOXIN/HMM_toxin/{sample}.Input_HMM_R_temp.csv") params: outdir="{datadir}", runtime=config["pathofact"]["runtime"]["short"], mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"] shell: """ cut -f 1,3,5,6 {input} | uniq > {output} """ rule HMM_correct_format_3: input: "{datadir}/{project}/TOXIN/HMM_toxin/{sample}.Input_HMM_R_temp.csv" output: "{datadir}/{project}/TOXIN/HMM_toxin/{sample}.Input_HMM_R.csv" params: outdir="{datadir}", runtime=config["pathofact"]["runtime"]["short"], mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"] shell: """ echo "#Toxin" > {wildcards.datadir}/{wildcards.project}/TOXIN/HMM_toxin/{wildcards.sample}_header cat {wildcards.datadir}/{wildcards.project}/TOXIN/HMM_toxin/{wildcards.sample}_header {input} > {output} rm -rf {wildcards.datadir}/{wildcards.project}/TOXIN/HMM_toxin/{wildcards.sample}_header sed -i $'1 i\\\ Query_sequence\\tHMM_Name\\tSignificance_Evalue\\tScore' {output} """