Commit 1b0bca6d authored by Shaman Narayanasamy's avatar Shaman Narayanasamy
Browse files

Remove CPM measure scripts

parent 830f893e
rule find_AL:
input:
"{assm}.processed.coords.filtered"
output:
"{assm}.processed.coords.filtered_AL"
shell:
"""
Rscript
"""
rule find_AL1000:
input:
"{assm}.processed.coords.filtered"
output:
"{assm}.processed.coords.filtered_AL1000"
shell:
"""
Rscript
"""
rule find_C1000:
input:
"{assm}.processed.coords.filtered_AL1000"
output:
"{assm}.processed.coords.filtered_C1000"
shell:
"""
bedtools merge {input} > output
"""
rule process_table:
input:
"%s/{assm}.coords.filtered" % NUCMER_RES
output:
"{assm}.processed.coords.filtered"
shell:
"""
sed -e "s/ | /\t/g" {input} | grep -v "^==========" > {output}
"""
#!/bin/R
require(stringr)
require(bedr)
args <- commandArgs(trailingOnly = TRUE)
#numcer.tab <- args[1]
## Read in nucmer table
nucmer.tab <- "/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/metaquast_output/CPM_analysis/IMP.processed.coords.filtered"
nucmer.dat <- read.delim(nucmer.tab, sep="\t", header=F)
colnames(nucmer.dat) <- c("rstart", "rend", "cstart", "cend", "rlen", "clen", "pident", "ref", "contig", "ambiguity")
al <- nucmer.dat
al1000 <- subset(nucmer.dat, clen >= 1000)
bed <- nucmer.dat[, c("contig", "cstart", "cend")]
bed.1 <- subset(bed, cstart < cend)
# Calculate M: sum length of all aligned regions
M <- sum(al$clen)
# Calculate N: sum length of all unaligned regions
macr <-
max(al$clen)
## Write out AL file
write.table(al,
"/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/metaquast_output/CPM_analysis/IMP.processed.coords.filtered_AL",
row.names=F,
col.names=F,
sep="\t",
quote=F)
## Write out AL file
write.table(subset(nucmer.dat, clen >= 1000),
"/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/metaquast_output/CPM_analysis/IMP.processed.coords.filtered_AL1000",
row.names=F,
col.names=F,
sep="\t",
quote=F)
## Write out bedfile
write.table(bed,
"/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/metaquast_output/CPM_analysis/IMP.processed.coords.filtered.bed",
row.names=F,
col.names=F,
sep="\t",
quote=F)
bed.2 <- unique(paste(bed$contig, ":", bed$cstart, "-", bed$cend, sep=""))
bed2index(bed)
bed.3 <- bed.2[is.valid.region(bed.2, check.chr=F)]
convert2bed(bed.2, check.zero.based=F, check.valid=F)
bedr.merge.region(bed.3, check.chr=F, check.valid=T, check.sort=T, check.zero.based=T)
IMP_ENV=/mnt/nfs/projects/ecosystem_biology/local_tools/IMP/dependencies
export PATH=$IMP_ENV/fastuniq/source:$PATH
export PATH=$IMP_ENV/sortmerna-2.0:$PATH
export PATH=$IMP_ENV/sortmerna-2.0/scripts:$PATH
export PATH=$IMP_ENV/bwa-0.7.9a:$PATH
export PATH=$IMP_ENV/idba-1.1.1/bin:$PATH
export PATH=$IMP_ENV/megahit:$PATH
export PATH=$IMP_ENV/CAP3:$PATH
export PATH=$IMP_ENV/prokka/bin:$PATH
export PATH=$IMP_ENV/quast:$PATH
export PATH=$IMP_ENV/quast/libs/genemark/linux_64:$PATH
export PATH=$IMP_ENV/prokka/binaries/linux:$PATH
export PATH=$IMP_ENV/cd-hit-v4.6.1-2012-08-27_OpenMP:$PATH
# Samtools must be full path!
export PATH=/mnt/nfs/projects/ecosystem_biology/local_tools/IMP/dependencies/samtools-0.1.19:$PATH
export PATH=/mnt/nfs/projects/ecosystem_biology/local_tools/IMP/dependencies/bedtools2/bin:$PATH
export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/IMP/dependencies/bedtools2/bin
export PATH=$PATH:$IMP_ENV/bedtools2/bin
export PATH=/mnt/nfs/projects/ecosystem_biology/local_tools/IMP/dependencies/quast:$IMP_ENV:$PATH
#source this file before execution of snakefile
#module load Python
source /mnt/nfs/projects/ecosystem_biology/local_tools/IMP/bin/activate
module load lang/Java/1.7.0_21
#
#module load MEGAHIT
#module load BWA
#module load SAMtools
#module load BEDTools
#module load OpenBLAS
#module load Boost/1.53.0-ictce-5.3.0
#
#export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/idba-1.1.1.icc/bin
#
#module load CAP3
#
##symbolic links for prokka db
#module load prokka
#
#export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/tabix-0.2.6
#export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/gkno_launcher/tools/freebayes/bin
#export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/vcftools/bin
#export PERL5LIB=$PERL5LIB:/mnt/nfs/projects/ecosystem_biology/local_tools/vcftools/perl
#export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/Platypus/Platypus_0.7.9.1
#
#module load R
#Rscript -e "install.packages('beanplot')"
#
#module list
#The Boost C++ Libraries were successfully built!
#
#The following directory should be added to compiler include paths:
#
# /mnt/src_nfs1/projects/ecosystem_biology/local_tools/IMP/dependencies/boost_1_54_0
#
#The following directory should be added to linker library paths:
#
# /mnt/src_nfs1/projects/ecosystem_biology/local_tools/IMP/dependencies/boost_1_54_0/stage/lib
#
import subprocess
#TMPDIR = os.environ.get("TMPDIR", "/tmp")
#SRCDIR = os.environ.get("SRCDIR", "src")
#CONFIG = os.environ.get("CONFIG", "config_normalNode.json")
#DBPATH = os.environ.get("DBPATH", "/mnt/nfs/projects/ecosystem_biology/local_tools/IMP/dependencies/prokka/db")
#
#configfile: CONFIG
#
#MEMCORE = os.environ.get("MEMCORE", config['memory_per_core_gb'])
#THREADS = os.environ.get("THREADS", config['threads'])
#MEMTOTAL = os.environ.get("MEMTOTAL", config['memory_total_gb'])
## Define input directories
# Nucmer results (from metaQUAST)
NUCMER_RES = "/scratch/users/snarayanasamy/IMP_MS_data/metaquast_analysis/SM/combined_reference/contigs_reports/nucmer_output"
# metaQUAST table results
MQ_RES = "/scratch/users/snarayanasamy/IMP_MS_data/metaquast_analysis/SM/combined_reference/contigs_reports/nucmer_output"
## Define samples
ASSMS = [ 'IMP', 'IMP-megahit', 'IMP_MG', 'IMP_MT', 'MetAmos_MG', 'MetAmos_MGMT', 'MOCAT_MG', 'MOCAT_MGMT' ]
## Define output directory
OUTDIR = os.environ.get("OUTDIR", "/scratch/users/snarayanasamy/IMP_MS_data/metaquast_analysis/CPM_analysis")
workdir:
OUTDIR
include:
'../rules/process_nucmer.rule'
#include:
# '../rules/find_AL.rule'
#
rule CPM_ALL:
input:
expand("{assm}.processed.coords.filtered", assm=ASSMS),
#expand("{assm}.processed.coords.filtered_AL", assm=ASSMS)
output:
touch('cpm.done')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment