Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
IMP
IMP_manuscript_analysis
Commits
1b0bca6d
Commit
1b0bca6d
authored
Sep 21, 2016
by
Shaman Narayanasamy
Browse files
Remove CPM measure scripts
parent
830f893e
Changes
7
Hide whitespace changes
Inline
Side-by-side
additional_analyses/CPM_measure_analysis/rules/find_AL.rule
deleted
100644 → 0
View file @
830f893e
rule find_AL:
input:
"{assm}.processed.coords.filtered"
output:
"{assm}.processed.coords.filtered_AL"
shell:
"""
Rscript
"""
additional_analyses/CPM_measure_analysis/rules/find_AL1000.rule
deleted
100644 → 0
View file @
830f893e
rule find_AL1000:
input:
"{assm}.processed.coords.filtered"
output:
"{assm}.processed.coords.filtered_AL1000"
shell:
"""
Rscript
"""
additional_analyses/CPM_measure_analysis/rules/find_C1000.rule
deleted
100644 → 0
View file @
830f893e
rule find_C1000:
input:
"{assm}.processed.coords.filtered_AL1000"
output:
"{assm}.processed.coords.filtered_C1000"
shell:
"""
bedtools merge {input} > output
"""
additional_analyses/CPM_measure_analysis/rules/process_nucmer.rule
deleted
100644 → 0
View file @
830f893e
rule process_table:
input:
"%s/{assm}.coords.filtered" % NUCMER_RES
output:
"{assm}.processed.coords.filtered"
shell:
"""
sed -e "s/ | /\t/g" {input} | grep -v "^==========" > {output}
"""
additional_analyses/CPM_measure_analysis/src/getAL_info.R
deleted
100644 → 0
View file @
830f893e
#!/bin/R
require
(
stringr
)
require
(
bedr
)
args
<-
commandArgs
(
trailingOnly
=
TRUE
)
#numcer.tab <- args[1]
## Read in nucmer table
nucmer.tab
<-
"/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/metaquast_output/CPM_analysis/IMP.processed.coords.filtered"
nucmer.dat
<-
read.delim
(
nucmer.tab
,
sep
=
"\t"
,
header
=
F
)
colnames
(
nucmer.dat
)
<-
c
(
"rstart"
,
"rend"
,
"cstart"
,
"cend"
,
"rlen"
,
"clen"
,
"pident"
,
"ref"
,
"contig"
,
"ambiguity"
)
al
<-
nucmer.dat
al1000
<-
subset
(
nucmer.dat
,
clen
>=
1000
)
bed
<-
nucmer.dat
[,
c
(
"contig"
,
"cstart"
,
"cend"
)]
bed.1
<-
subset
(
bed
,
cstart
<
cend
)
# Calculate M: sum length of all aligned regions
M
<-
sum
(
al
$
clen
)
# Calculate N: sum length of all unaligned regions
macr
<-
max
(
al
$
clen
)
## Write out AL file
write.table
(
al
,
"/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/metaquast_output/CPM_analysis/IMP.processed.coords.filtered_AL"
,
row.names
=
F
,
col.names
=
F
,
sep
=
"\t"
,
quote
=
F
)
## Write out AL file
write.table
(
subset
(
nucmer.dat
,
clen
>=
1000
),
"/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/metaquast_output/CPM_analysis/IMP.processed.coords.filtered_AL1000"
,
row.names
=
F
,
col.names
=
F
,
sep
=
"\t"
,
quote
=
F
)
## Write out bedfile
write.table
(
bed
,
"/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/metaquast_output/CPM_analysis/IMP.processed.coords.filtered.bed"
,
row.names
=
F
,
col.names
=
F
,
sep
=
"\t"
,
quote
=
F
)
bed.2
<-
unique
(
paste
(
bed
$
contig
,
":"
,
bed
$
cstart
,
"-"
,
bed
$
cend
,
sep
=
""
))
bed2index
(
bed
)
bed.3
<-
bed.2
[
is.valid.region
(
bed.2
,
check.chr
=
F
)]
convert2bed
(
bed.2
,
check.zero.based
=
F
,
check.valid
=
F
)
bedr.merge.region
(
bed.3
,
check.chr
=
F
,
check.valid
=
T
,
check.sort
=
T
,
check.zero.based
=
T
)
additional_analyses/CPM_measure_analysis/src/preload_modules.sh
deleted
100644 → 0
View file @
830f893e
IMP_ENV
=
/mnt/nfs/projects/ecosystem_biology/local_tools/IMP/dependencies
export
PATH
=
$IMP_ENV
/fastuniq/source:
$PATH
export
PATH
=
$IMP_ENV
/sortmerna-2.0:
$PATH
export
PATH
=
$IMP_ENV
/sortmerna-2.0/scripts:
$PATH
export
PATH
=
$IMP_ENV
/bwa-0.7.9a:
$PATH
export
PATH
=
$IMP_ENV
/idba-1.1.1/bin:
$PATH
export
PATH
=
$IMP_ENV
/megahit:
$PATH
export
PATH
=
$IMP_ENV
/CAP3:
$PATH
export
PATH
=
$IMP_ENV
/prokka/bin:
$PATH
export
PATH
=
$IMP_ENV
/quast:
$PATH
export
PATH
=
$IMP_ENV
/quast/libs/genemark/linux_64:
$PATH
export
PATH
=
$IMP_ENV
/prokka/binaries/linux:
$PATH
export
PATH
=
$IMP_ENV
/cd-hit-v4.6.1-2012-08-27_OpenMP:
$PATH
# Samtools must be full path!
export
PATH
=
/mnt/nfs/projects/ecosystem_biology/local_tools/IMP/dependencies/samtools-0.1.19:
$PATH
export
PATH
=
/mnt/nfs/projects/ecosystem_biology/local_tools/IMP/dependencies/bedtools2/bin:
$PATH
export
PATH
=
$PATH
:/mnt/nfs/projects/ecosystem_biology/local_tools/IMP/dependencies/bedtools2/bin
export
PATH
=
$PATH
:
$IMP_ENV
/bedtools2/bin
export
PATH
=
/mnt/nfs/projects/ecosystem_biology/local_tools/IMP/dependencies/quast:
$IMP_ENV
:
$PATH
#source this file before execution of snakefile
#module load Python
source
/mnt/nfs/projects/ecosystem_biology/local_tools/IMP/bin/activate
module load lang/Java/1.7.0_21
#
#module load MEGAHIT
#module load BWA
#module load SAMtools
#module load BEDTools
#module load OpenBLAS
#module load Boost/1.53.0-ictce-5.3.0
#
#export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/idba-1.1.1.icc/bin
#
#module load CAP3
#
##symbolic links for prokka db
#module load prokka
#
#export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/tabix-0.2.6
#export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/gkno_launcher/tools/freebayes/bin
#export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/vcftools/bin
#export PERL5LIB=$PERL5LIB:/mnt/nfs/projects/ecosystem_biology/local_tools/vcftools/perl
#export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/Platypus/Platypus_0.7.9.1
#
#module load R
#Rscript -e "install.packages('beanplot')"
#
#module list
#The Boost C++ Libraries were successfully built!
#
#The following directory should be added to compiler include paths:
#
# /mnt/src_nfs1/projects/ecosystem_biology/local_tools/IMP/dependencies/boost_1_54_0
#
#The following directory should be added to linker library paths:
#
# /mnt/src_nfs1/projects/ecosystem_biology/local_tools/IMP/dependencies/boost_1_54_0/stage/lib
#
additional_analyses/CPM_measure_analysis/workflow/cpm_workflow
deleted
100644 → 0
View file @
830f893e
import subprocess
#TMPDIR = os.environ.get("TMPDIR", "/tmp")
#SRCDIR = os.environ.get("SRCDIR", "src")
#CONFIG = os.environ.get("CONFIG", "config_normalNode.json")
#DBPATH = os.environ.get("DBPATH", "/mnt/nfs/projects/ecosystem_biology/local_tools/IMP/dependencies/prokka/db")
#
#configfile: CONFIG
#
#MEMCORE = os.environ.get("MEMCORE", config['memory_per_core_gb'])
#THREADS = os.environ.get("THREADS", config['threads'])
#MEMTOTAL = os.environ.get("MEMTOTAL", config['memory_total_gb'])
## Define input directories
# Nucmer results (from metaQUAST)
NUCMER_RES = "/scratch/users/snarayanasamy/IMP_MS_data/metaquast_analysis/SM/combined_reference/contigs_reports/nucmer_output"
# metaQUAST table results
MQ_RES = "/scratch/users/snarayanasamy/IMP_MS_data/metaquast_analysis/SM/combined_reference/contigs_reports/nucmer_output"
## Define samples
ASSMS = [ 'IMP', 'IMP-megahit', 'IMP_MG', 'IMP_MT', 'MetAmos_MG', 'MetAmos_MGMT', 'MOCAT_MG', 'MOCAT_MGMT' ]
## Define output directory
OUTDIR = os.environ.get("OUTDIR", "/scratch/users/snarayanasamy/IMP_MS_data/metaquast_analysis/CPM_analysis")
workdir:
OUTDIR
include:
'../rules/process_nucmer.rule'
#include:
# '../rules/find_AL.rule'
#
rule CPM_ALL:
input:
expand("{assm}.processed.coords.filtered", assm=ASSMS),
#expand("{assm}.processed.coords.filtered_AL", assm=ASSMS)
output:
touch('cpm.done')
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment