Commit 3b812953 authored by Shaman Narayanasamy's avatar Shaman Narayanasamy
Browse files

Merge branch 'master' of...

Merge branch 'master' of ssh://git-r3lab-server.uni.lu:8022/shaman.narayanasamy/IMP_manuscript_analysis
parents 517dfe00 b4e0907e
......@@ -24,3 +24,60 @@ additional_analyses/preprocessing/metAmos_preprocessing.xlsx
additional_analyses/preprocessing/preprocessing.xlsx
additional_analyses/preprocessing/preprocessing.xlsz.ods
additional_analyses/preprocessing/preprocessing_analysis.R~
additional_analyses/preprocessing/preprocessing_analysis.R~
metaquast_analysis/*.stdout
metaquast_analysis/*.stderr
iterative_assembly/MT_assembly/*.stderr
iterative_assembly/MT_assembly/*.stdout
iterative_assembly/MG_assembly/*.stderr
iterative_assembly/MG_assembly/*.stdout
iterative_assembly/*/*~
additional_analyses/single_omicVsIntegrated/metAmos/*.stderr
additional_analyses/single_omicVsIntegrated/metAmos/*.stdout
additional_analyses/single_omicVsIntegrated/MOCAT/*.stderr
additional_analyses/single_omicVsIntegrated/MOCAT/*.stdout
additional_analyses/single_omicVsIntegrated/IMP/*.stderr
additional_analyses/single_omicVsIntegrated/IMP/*.stdout
additional_analyses/*/*.stderr
additional_analyses/*/*.stdout
additional_analyses/*/*~
additional_analyses/*/*/*~
additional_analyses/*/*.tsv
additional_analyses/*/*/.snakemake/
metaquast_analysis/*~
prodigal_analysis/*~
.gitignore~
.gitignore.swp
additional_analyses/HMP_gene_catalog/.snakemake/
additional_analyses/HMP_gene_catalog/config_mapping.json
additional_analyses/HMP_gene_catalog/index_IGC.sh
additional_analyses/HMP_gene_catalog/launcher.sh
additional_analyses/HMP_gene_catalog/map_IMP.rules
additional_analyses/HMP_gene_catalog/map_MOCAT.rules
additional_analyses/HMP_gene_catalog/map_metAmos.rules
additional_analyses/HMP_gene_catalog/preload_modules.sh
additional_analyses/HMP_gene_catalog/test.sh
additional_analyses/MT_genomes/A02_MT_contigs.ids
additional_analyses/MT_genomes/X310763260_MT_contigs.ids
additional_analyses/preprocessing/calculate_reads-MOCAT.sh
additional_analyses/preprocessing/calculate_reads-MetAmos.sh
additional_analyses/single_omicVsIntegrated/config_mapping.json
additional_analyses/single_omicVsIntegrated/preload_modules.sh
iterative_assembly/MG_assembly/.snakemake/
iterative_assembly/MG_assembly/execution.sh
iterative_assembly/MG_assembly/execution_D32.sh
iterative_assembly/MG_assembly/execution_D36.sh
iterative_assembly/MG_assembly/execution_D49.sh
iterative_assembly/MG_assembly/execution_PRJEB8813.sh
iterative_assembly/MG_assembly/execution_X311245214.sh
iterative_assembly/MG_assembly/execution_X316192082.sh
iterative_assembly/MG_assembly/execution_X316701492.sh
iterative_assembly/MG_assembly/execution_X317690558.sh
iterative_assembly/MG_assembly/execution_simDat_v2.sh
iterative_assembly/MT_assembly/.snakemake/
iterative_assembly/MT_assembly/collapsed_contig_gene_stats.tsv
iterative_assembly/MT_assembly/execution.sh
iterative_assembly/MT_assembly/launcher_back.sh
prodigal_analysis/oarlogs/
prodigal_analysis/prodigal_summary.tsv
quast_analysis/
......@@ -43,7 +43,7 @@ get_expressed <- function(MG.file, MT.file, workspace){
hypo.num <- nrow(hypo[hypo$MT_depth > 5,])
return(c(mg.only, mgmt, mt.only, hypo.num, all,
return(c(all, mg.only, mgmt, mt.only,
mg.only/all*100, mgmt/all*100, mt.only/all*100))
}
......@@ -54,29 +54,72 @@ contig_summary <- function(workspace){
mg.only <- length(which(is.na(all.dat$MT_depth)))
mt.only <- length(which(is.na(all.dat$MG_depth)))
all <- nrow(all.dat)
return(c(mg.only, mgmt, mt.only, all,
return(c(all, mg.only, mgmt, mt.only,
mg.only/all*100, mgmt/all*100, mt.only/all*100))
#rm(list=ls())
}
### Read in flagstat data
### Read in all the different Rdat workspaces
samples <- c("SM", "HF1", "HF2", "HF3", "HF4", "HF5", "WW1", "WW2", "WW3", "WW4", "BG")
indir <- "/scratch/users/snarayanasamy/IMP_MS_data/IMP_analysis"
dat <- data.frame(Dataset=as.character(),
contigs <- data.frame(Dataset=as.character(),
ALL_contigs=as.numeric(),
MG_contigs=as.numeric(),
MGMT_contigs=as.numeric(),
MT_contigs=as.numeric(),
MG_fraction=as.numeric(),
MGMT_fraction=as.numeric(),
MT_fraction=as.numeric()
MG_contigs_fraction=as.numeric(),
MGMT_contigs_fraction=as.numeric(),
MT_contigs_fraction=as.numeric()
)
for(i in seq <- along(samples)){
dat <- rbind(dat, c(samples[i],
contig_summary(paste(indir, samples[i],
"Analysis/result/MGMT_results.Rdat", sep="/"))
)
)
### Bind the contigsa together
for(i in seq_along(samples)){
line <- c(samples[i], contig_summary(paste(indir, samples[i], "Analysis/results/MGMT_results.Rcontigs", sep="/")))
contigs <- rbind.data.frame(contigs,
data.frame(Dataset=as.character(line[1]),
ALL_contigs=as.numeric(line[2]),
MG_contigs=as.numeric(line[3]),
MGMT_contigs=as.numeric(line[4]),
MT_contigs=as.numeric(line[5]),
MG_contigs_fraction=as.numeric(line[6]),
MGMT_contigs_fraction=as.numeric(line[7]),
MT_contigs_fraction=as.numeric(line[8])
)
)
}
genes <- data.frame(Dataset=as.character(),
ALL_genes=as.numeric(),
MG_genes=as.numeric(),
MGMT_genes=as.numeric(),
MT_genes=as.numeric(),
MG_genes_fraction=as.numeric(),
MGMT_genes_fraction=as.numeric(),
MT_genes_fraction=as.numeric()
)
for(i in seq_along(samples)){
line <- c(samples[i], get_expressed(
paste(indir, samples[i], "Analysis/MG.gene_depth.avg", sep="/"),
paste(indir, samples[i], "Analysis/MT.gene_depth.avg", sep="/"),
paste(indir, samples[i], "Analysis/results/MGMT_results.Rdat", sep="/")
))
genes <- rbind.data.frame(genes,
data.frame(Dataset=as.character(line[1]),
ALL_genes=as.numeric(line[2]),
MG_genes=as.numeric(line[3]),
MGMT_genes=as.numeric(line[4]),
MT_genes=as.numeric(line[5]),
MG_genes_fraction=as.numeric(line[6]),
MGMT_genes_fraction=as.numeric(line[7]),
MT_genes_fraction=as.numeric(line[8])
)
)
}
write.table(dat, "/scratch/users/snarayanasamy/IMP_MS_data/IMP_data_usage/IMP_data_usage.tsv", sep="\t", quote=F, row.names=F)
#!/bin/bash -l
OARSUB="oarsub --notify "mail:shaman.narayanasamy@uni.lu" -l nodes=1,walltime=120"
OARSUB="oarsub --notify "mail:shaman.narayanasamy@uni.lu" -l nodes=1,walltime=24 -t besteffort -t idempotent"
declare -a SAMPLES=("HF1" "HF2" "HF3" "HF4" "HF5")
......@@ -33,7 +33,7 @@ do
METAMOS_MT="/scratch/users/snarayanasamy/IMP_MS_data/metAmosAnalysis/${S1}/MT/default/${S}/Assemble/out/soapdenovo.31.asm.contig"
MOCAT_MT="/scratch/users/snarayanasamy/IMP_MS_data/MOCAT_analysis/MT/${S1}/${S}_MOCAT_MT"
OUTDIR="/scratch/users/snarayanasamy/IMP_MS_data/metaquast_analysis/${S}"
OUTDIR="/scratch/users/snarayanasamy/IMP_MS_data/metaquast_analysis/HF_ref/${S}"
${OARSUB} -n "${S}_ref_metaquast" "./execution_HF_ref.sh $IMP $IMP_MEGAHIT $METAMOS $MOCAT $IMP_MG $METAMOS_MG $MOCAT_MG $IMP_MT $METAMOS_MT $MOCAT_MT $OUTDIR"
done
#!/bin/bash -l
OUTFILE="prodigal_summary.tsv"
declare -a SAMPLES=("SM" "HF1" "HF2" "HF3" "HF4" "HF5" "WW1" "WW2" "WW3" "WW4" "BG")
echo -e "Dataset\tAssembly\tProdigal_total_genes\tProdigal_complete_genes\tProdigal_incomplete_genes" > ${OUTFILE}
### Repeat for all the data sets
for S in "${SAMPLES[@]}"
do
IMP_MGMT="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/IMP_MGMT/${S}/"
IMP_MEGAHIT="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/IMP-megahit/${S}/"
IMP_MG="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/IMP_MG/${S}"
IMP_MT="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/IMP_MT/${S}"
MOCAT_MGMT="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/MOCAT_MGMT/${S}"
MOCAT_MG="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/MOCAT_MG/${S}"
MOCAT_MT="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/MOCAT_MT/${S}"
METAMOS_MGMT="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/metAmos_MGMT/${S}"
METAMOS_MG="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/metAmos_MG/${S}"
METAMOS_MT="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/metAmos_MT/${S}"
paste <(echo ${S}) <(echo IMP) <(tail -n1 $IMP_MGMT/gene_count.Prodigal.tsv) >> ${OUTFILE}
paste <(echo ${S}) <(echo IMP-megahit) <(tail -n1 $IMP_MEGAHIT/gene_count.Prodigal.tsv) >> ${OUTFILE}
paste <(echo ${S}) <(echo MOCAT_MGMT) <(tail -n1 $MOCAT_MGMT/gene_count.Prodigal.tsv) >> ${OUTFILE}
paste <(echo ${S}) <(echo MetAMOS_MGMT) <(tail -n1 $METAMOS_MGMT/gene_count.Prodigal.tsv) >> ${OUTFILE}
paste <(echo ${S}) <(echo IMP_MG) <(tail -n1 $IMP_MG/gene_count.Prodigal.tsv) >> ${OUTFILE}
paste <(echo ${S}) <(echo MOCAT_MG) <(tail -n1 $MOCAT_MG/gene_count.Prodigal.tsv) >> ${OUTFILE}
paste <(echo ${S}) <(echo MetAMOS_MG) <(tail -n1 $METAMOS_MG/gene_count.Prodigal.tsv) >> ${OUTFILE}
paste <(echo ${S}) <(echo IMP_MT) <(tail -n1 $IMP_MT/gene_count.Prodigal.tsv) >> ${OUTFILE}
paste <(echo ${S}) <(echo MOCAT_MT) <(tail -n1 $MOCAT_MT/gene_count.Prodigal.tsv) >> ${OUTFILE}
paste <(echo ${S}) <(echo MetAMOS_MT) <(tail -n1 $METAMOS_MT/gene_count.Prodigal.tsv) >> ${OUTFILE}
done
#!/bin/bash -l
OARSUB="oarsub --notify "mail:shaman.narayanasamy@uni.lu" -t bigsmp -t idempotent -t besteffort -l core=12/nodes=1,walltime=120"
declare -a SAMPLES=("SM" "HF1" "HF2" "HF3" "HF4" "HF5" "WW1" "WW2" "WW3" "WW4" "BG")
### Repeat for all the data sets
for S in "${SAMPLES[@]}"
do
MGMT_REF="/scratch/users/snarayanasamy/IMP_MS_data/IMP_analysis/${S}_megahit/Assembly/MGMT.assembly.merged.fa"
OUTDIR_MGMT="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/IMP-megahit/${S}"
${OARSUB} -n "${S}_IMP_MGMT_prodigal" "./makeGenePredictionsProdigal.sh $MGMT_REF $OUTDIR_MGMT"
done
#!/bin/bash -l
OARSUB="oarsub --notify "mail:shaman.narayanasamy@uni.lu" -t bigsmp -t idempotent -t besteffort -l core=12/nodes=1,walltime=120"
declare -a SAMPLES=("SM" "HF1" "HF2" "HF3" "HF4" "HF5" "WW1" "WW2" "WW3" "WW4" "BG")
### Repeat for all the data sets
for S in "${SAMPLES[@]}"
do
MGMT_REF="/scratch/users/snarayanasamy/IMP_MS_data/IMP_analysis/${S}/Assembly/MGMT.assembly.merged.fa"
MG_REF="/scratch/users/snarayanasamy/IMP_MS_data/iterative_assemblies/MG_assemblies/${S}/MG_contigs_merged_2.fa"
MT_REF="/scratch/users/snarayanasamy/IMP_MS_data/iterative_assemblies/MT_assemblies/${S}/MT_contigs_merged_2.fa"
OUTDIR_MGMT="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/IMP_MGMT/${S}"
${OARSUB} -n "${S}_IMP_MGMT_prodigal" "./makeGenePredictionsProdigal.sh $MGMT_REF $OUTDIR_MGMT"
OUTDIR_MG="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/IMP_MG/${S}"
${OARSUB} -n "${S}_IMP_MG_prodigal" "./makeGenePredictionsProdigal.sh $MG_REF $OUTDIR_MG"
OUTDIR_MT="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/IMP_MT/${S}"
${OARSUB} -n "${S}_IMP_MT_prodigal" "./makeGenePredictionsProdigal.sh $MT_REF $OUTDIR_MT"
done
#!/bin/bash -l
OARSUB="oarsub --notify "mail:shaman.narayanasamy@uni.lu" -t bigsmp -t idempotent -t besteffort -l core=12/nodes=1,walltime=120"
#OARSUB="oarsub --notify "mail:shaman.narayanasamy@uni.lu" -t idempotent -t besteffort -l nodes=1,walltime=48"
declare -a SAMPLES=("SM" "HF1" "HF2" "HF3" "HF4" "HF5" "WW1" "WW2" "WW3" "WW4" "BG")
### Repeat for all the data sets
for S in "${SAMPLES[@]}"
do
check=${#S}
echo $check
if [ $check -eq 3 ]
then
S1="${S::-1}"
echo "$S1"
else
S1="$S"
echo "Continue"
fi
MGMT_REF="/scratch/users/snarayanasamy/IMP_MS_data/MOCAT_analysis/Combined/${S1}/${S}_MOCAT_MGMT"
MG_REF="/scratch/users/snarayanasamy/IMP_MS_data/MOCAT_analysis/MG/${S1}/${S}_MOCAT_MG"
MT_REF="/scratch/users/snarayanasamy/IMP_MS_data/MOCAT_analysis/MT/${S1}/${S}_MOCAT_MT"
OUTDIR_MGMT="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/MOCAT_MGMT/${S}"
${OARSUB} -n "${S}_MOCAT_MGMT_prodigal" "./makeGenePredictionsProdigal.sh $MGMT_REF $OUTDIR_MGMT"
OUTDIR_MG="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/MOCAT_MG/${S}"
${OARSUB} -n "${S}_MOCAT_MG_prodigal" "./makeGenePredictionsProdigal.sh $MG_REF $OUTDIR_MG"
OUTDIR_MT="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/MOCAT_MT/${S}"
${OARSUB} -n "${S}_MOCAT_MT_prodigal" "./makeGenePredictionsProdigal.sh $MT_REF $OUTDIR_MT"
done
#!/bin/bash -l
OARSUB="oarsub --notify "mail:shaman.narayanasamy@uni.lu" -t bigsmp -t idempotent -t besteffort -l core=12/nodes=1,walltime=120"
#OARSUB="oarsub --notify "mail:shaman.narayanasamy@uni.lu" -t idempotent -t besteffort -l nodes=1,walltime=48"
declare -a SAMPLES=("SM" "HF1" "HF2" "HF3" "HF4" "HF5" "WW1" "WW2" "WW3" "WW4" "BG")
### Repeat for all the data sets
for S in "${SAMPLES[@]}"
do
check=${#S}
echo $check
if [ $check -eq 3 ]
then
S1="${S::-1}"
echo "$S1"
else
S1="$S"
echo "Continue"
fi
MG_REF="/scratch/users/snarayanasamy/IMP_MS_data/metAmosAnalysis/${S1}/MG/default/${S}/Assemble/out/soapdenovo.31.asm.contig"
MT_REF="/scratch/users/snarayanasamy/IMP_MS_data/metAmosAnalysis/${S1}/MT/default/${S}/Assemble/out/soapdenovo.31.asm.contig"
MGMT_REF="/scratch/users/snarayanasamy/IMP_MS_data/metAmosAnalysis/${S1}/MGMT/default/${S}/Assemble/out/soapdenovo.31.asm.contig"
OUTDIR_MGMT="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/metAmos_MGMT/${S}"
${OARSUB} -n "${S}_metAmos_MGMT_prodigal" "./makeGenePredictionsProdigal.sh $MGMT_REF $OUTDIR_MGMT"
OUTDIR_MG="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/metAmos_MG/${S}"
${OARSUB} -n "${S}_metAmos_MG_prodigal" "./makeGenePredictionsProdigal.sh $MG_REF $OUTDIR_MG"
OUTDIR_MT="/scratch/users/snarayanasamy/IMP_MS_data/prodigal_analysis/metAmos_MT/${S}"
${OARSUB} -n "${S}_metAmos_MT_prodigal" "./makeGenePredictionsProdigal.sh $MT_REF $OUTDIR_MT"
done
#!/bin/bash
prodigal=/work/projects/ecosystem_biology/local_tools/MOCAT/bin/prodigal
contig=$1
gpdir=$2
mkdir -p ${gpdir}
cd ${gpdir}
#ln -s ../${contig}
${prodigal} -f gff -a contig.Prodigal.faa.tmp -d contig.Prodigal.fna.tmp -p meta -o contig.Prodigal.gff -i ${contig} -q 2>>gene.prediction.log >>gene.prediction.log
/work/projects/ecosystem_biology/local_tools/MOCAT/src/MOCATGenePredictionProdigal_aux.pl contig.Prodigal.fna.tmp contig.Prodigal.faa.tmp contig.Prodigal.tab 2>> gene_prediction.log >> gene_prediction.log
perl /work/projects/ecosystem_biology/local_tools/MOCAT/fetchMG/fetchMG.pl -o marker_genes -t 4 -d contig.Prodigal.fna -m extraction contig.Prodigal.faa
## Aggregate complete and incomplete genes in a table
echo -e "Prodigal_all\tProdigal_complete\tProdigal_incomplete" > gene_count.Prodigal.tsv
paste <(wc -l contig.Prodigal.tab | cut -f1 -d " ") <(grep -wc "complete" contig.Prodigal.tab) <(grep -wc "incomplete" contig.Prodigal.tab) -d "\t" >> gene_count.Prodigal.tsv
cd ..
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment