Commit 07b7ab80 authored by Shaman Narayanasamy's avatar Shaman Narayanasamy
Browse files

Modify R script

parent 7645826c
#!/bin/R
require(stringr)
require(bedr)
args <- commandArgs(trailingOnly = TRUE)
#numcer.tab <- args[1]
nucmer.tab <- "/scratch/users/snarayanasamy/IMP_MS_data/metaquast_analysis/CPM_analysis/IMP.processed.coords.filtered"
nucmer.dat <- read.table(nucmer.tab, sep="\t", header=T)
colnames(nucmer.dat) <- c("rstart", "rend", "cstart", "cend", "rlen", "clen", "pident", "tags")
## Read in nucmer table
nucmer.tab <- "/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/metaquast_output/CPM_analysis/IMP.processed.coords.filtered"
nucmer.dat <- read.delim(nucmer.tab, sep="\t", header=F)
colnames(nucmer.dat) <- c("rstart", "rend", "cstart", "cend", "rlen", "clen", "pident", "ref", "contig", "ambiguity")
al <- nucmer.dat
al1000 <- subset(nucmer.dat, clen >= 1000)
bed <- nucmer.dat[, c("contig", "cstart", "cend")]
bed.1 <- subset(bed, cstart < cend)
# Calculate M: sum length of all aligned regions
M <- sum(al$clen)
# Calculate N: sum length of all unaligned regions
macr <-
max(al$clen)
## Write out AL file
write.table(al,
"/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/metaquast_output/CPM_analysis/IMP.processed.coords.filtered_AL",
row.names=F,
col.names=F,
sep="\t",
quote=F)
## Write out AL file
write.table(subset(nucmer.dat, clen >= 1000),
"/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/metaquast_output/CPM_analysis/IMP.processed.coords.filtered_AL1000",
row.names=F,
col.names=F,
sep="\t",
quote=F)
## Write out bedfile
write.table(bed,
"/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/metaquast_output/CPM_analysis/IMP.processed.coords.filtered.bed",
row.names=F,
col.names=F,
sep="\t",
quote=F)
bed.2 <- unique(paste(bed$contig, ":", bed$cstart, "-", bed$cend, sep=""))
bed2index(bed)
bed.3 <- bed.2[is.valid.region(bed.2, check.chr=F)]
convert2bed(bed.2, check.zero.based=F, check.valid=F)
bedr.merge.region(bed.3, check.chr=F, check.valid=T, check.sort=T, check.zero.based=T)
......@@ -113,5 +113,32 @@ pdf("/home/shaman/Documents/Publications/IMP-manuscript/figures/IMP-vizbin_lengt
grid.draw(legend)
dev.off()
## Binning plot for third revision
maxbin.res <- read.table("/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/datasets/IMP_analysis/X310763260_20151004-idba/Binning/MaxBin/maxbin_res.summary", sep="\t", header=T)
maxbin.res$Completeness <- gsub("%", "", maxbin.res$Completeness)
maxbin.res$Completeness <- as.numeric(maxbin.res$Completeness)
maxbin.res$Bin.name <- gsub("maxbin_res.", "", maxbin.res$Bin.name)
maxbin.res$Bin.name <- gsub(".fasta", "", maxbin.res$Bin.name)
maxbin.res$Bin.name <- gsub("^0", "", maxbin.res$Bin.name)
maxbin.res$Bin.name <- gsub("^0", "", maxbin.res$Bin.name)
maxbin.res$Bin.name <- as.numeric(maxbin.res$Bin.name)
#maxbin.res <- maxbin.res[order(maxbin.res$Completeness, decreasing=T),]
maxbin.res <- transform(maxbin.res, Bin.name = reorder(Bin.name, -Abundance))
cols <- colorRampPalette(brewer.pal(9,"Blues"))(100)
bin.plot <- ggplot(maxbin.res, aes(x=Bin.name, y=Abundance)) +
geom_bar(stat="identity", aes(fill=Completeness)) +
scale_fill_gradient(low="lightgreen", high="darkgreen") +
xlab("Bin number") +
ylab("Bin abundance (%)") +
theme_classic(base_size=40) +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position=c(0.95,0.85)
)
pdf("/home/shaman/Documents/Publications/IMP-manuscript/figures/third_iteration/IMP-BinCompleteness-v2.pdf",
width=28, height=13)
bin.plot
dev.off()
......@@ -8,9 +8,10 @@ library(plyr)
library(tidyr)
library(graphics)
library(cowplot)
library(stringr)
### Load data usage workspace
load("/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/comparison/data_usage/data_usage.Rdat")
#load("/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/comparison/data_usage/data_usage.Rdat")
######################################################################################################
##### Initialize functions
......@@ -35,20 +36,24 @@ makeTransparent = function(..., alpha=0.5) {
###############################################################################################################################
### FUNCTION: 6-axis radar chart
plot.6axis <- function(dat, cols, dens, font, linetype,
plot.7axis <- function(dat, cols, dens, font, linetype,
fsize, linewd, mcex, lwd, plwd, title = "") {
dat <- dat[,c("Assembly",
"X..contigs.....1000.bp.",
"contigs 1000.bp.",
"N50",
"X..predicted.genes..unique.",
#"X..misassemblies",
"predicted.genes..unique.",
"Genome.fraction....",
"MG_properly_paired",
"MT_properly_paired"
"MT_properly_paired",
"CPM"
)]
colnames(dat) = c("Assembly", "contigs \u2265 1kb", "N50 length", "no. of genes", #"misassemblies",
"genome fraction (%)", "MG properly paired", "MT properly paired")
colnames(dat) = c("Assembly", "contigs \u2265 1kb",
"N50 length", "no. of genes",
"genome fraction (%)", "MG properly paired",
"MT properly paired", "CPM")
dat$Assembly <- factor(dat$Assembly, levels=c(as.character(dat$Assembly), "max", "min"))
dat <- rbind(
c("max", apply(dat[, 2:ncol(dat)], 2, max)),
......@@ -77,9 +82,11 @@ par(mar=c(0,0,8,0))
radarchart(dat[,-1], pcol=cols, pfcol=dens, plty=linetype, plwd=plwd,
cglwd=lwd, palcex=font, calcex=font, vlcex=font, cglcol="darkgray",
paxislabels = c("Assembly", "contigs \u2265 1kb", "N50 length",
"no. of genes", #"misassemblies",
"no. of genes",
"genome fraction (%)",
"MG mapped", "MG properly paired", "MT mapped", "MT properly paired"),
"MG mapped", "MG properly paired",
"MT mapped", "MT properly paired",
"CPM"),
oma=c(0,0,0,0))#, title = title, cex.main = 6)
title(title, outer = FALSE, line = 2, font = 2, cex.main = 7.5)
}
......@@ -99,6 +106,7 @@ dat <- dat[,c("Assembly",
colnames(dat) <- c("Assembly", "contigs \u2265 1kb", "N50 length",
"no. of genes", "MG properly paired", "MT properly paired")
dat$Assembly <- factor(dat$Assembly, levels=c(as.character(dat$Assembly), "max", "min"))
dat <- rbind(
c("max", apply(dat[, 2:ncol(dat)], 2, max)),
c("min", apply(dat[, 2:ncol(dat)], 2, min)),
......@@ -154,11 +162,6 @@ lwd=16
plwd=15
pdf("/home/shaman/Documents/Publications/IMP-manuscript/figures/second_iteration/SM_radarChart_v7.pdf",
width=38, height=25)
plot.6axis(SM_quast[SM_quast$Assembly%in%assm_mgmt,], cols, dens, font, linetype, fsize, linewd, mcex, lwd, plwd)
dev.off()
pdf("/home/shaman/Documents/Publications/IMP-manuscript/figures/second_iteration/HF1_radarChart_v6.pdf",
width=38, height=25)
plot.5axis(HF1_quast[HF1_quast$Assembly%in%assm_mgmt,], cols, dens, font, linetype, fsize, linewd, mcex, lwd, plwd)
......@@ -225,22 +228,6 @@ for(i in seq_along(samples_1)){
ref.dat <- SM_quast[,!colnames(SM_quast)%in%colnames(all.dat)]
### Prepare for data summary radar chart
all.dat$Assembly <- as.factor(all.dat$Assembly)
all.dat.sum <- aggregate(all.dat[,-c(1:2, 28:47)], by=list(all.dat$Assembly), FUN=sum)
colnames(all.dat.sum)[1] <- "Assembly"
all.dat.mean <- aggregate(all.dat[,c(28:31)], by=list(all.dat$Assembly), FUN=mean)
colnames(all.dat.mean)[1] <- "Assembly"
all.dat.agg <- cbind.data.frame(all.dat.sum, all.dat.mean, ref.dat[1:10,])
## Generate summary/cummulative radar chart
pdf("/home/shaman/Documents/Publications/IMP-manuscript/figures/second_iteration/ALL_radarChart_v7.pdf",
width=38, height=25)
plot.6axis(all.dat.agg[all.dat.agg$Assembly%in%assm_mgmt,], cols, dens, font, linetype, fsize, linewd, mcex, lwd, plwd)
dev.off()
## Generate legend for figure
legend_labels <- c("IMP","IMP-megahit","MOCAT_MGMT","MetAMOS_MGMT")
......@@ -268,3 +255,51 @@ write.table(as.data.frame(all.dat), "/home/shaman/Documents/Publications/IMP-man
#save.image("/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/comparison/data_usage/comparison.Rdat")
#load("/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/comparison/data_usage/comparison.Rdat")
### Obtain composite performance metric (CPM) from data
sm.dat <- subset(all.dat, Dataset=="SM")
cpm.dat <- cbind.data.frame(str_split_fixed(sm.dat$unaligned.contigs, " \\+ ", 2))
colnames(cpm.dat) <- c("fully.unaligned.contigs", "partially.unaligned.contigs")
cpm.dat$fully.unaligned.contigs <- as.numeric(cpm.dat$fully.unaligned.contigs)
cpm.dat$partially.unaligned.contigs <- as.numeric(gsub(" part", "", cpm.dat$partially.unaligned.contigs))
cpm.dat <- cbind.data.frame(cpm.dat, C500 = sm.dat$contigs - (cpm.dat$fully.unaligned.contigs + cpm.dat$partially.unaligned.contigs))
cpm.dat <- cbind.data.frame(cpm.dat,Aligned.length = sm.dat$Total.length - sm.dat$Unaligned.length)
cpm.dat <- cbind.data.frame(cpm.dat, MACR = sm.dat$Largest.alignment/sm.dat$Reference.length)
cpm.dat <- cbind.data.frame(cpm.dat, MACRP = cpm.dat$MACR * 100)
cpm.dat <- cbind.data.frame(cpm.dat, Chimeric.index = sm.dat$Unaligned.length/(sm.dat$Unaligned.length + cpm.dat$Aligned.length) * 100)
cpm.dat <- cbind.data.frame(cpm.dat, nC500 = 5 * cpm.dat$MACRP / max(cpm.dat$C500))
cpm.dat <- cbind.data.frame(cpm.dat, nMACR = 5 * sm.dat$Largest.alignment / max(sm.dat$Largest.alignment))
cpm.dat <- cbind.data.frame(cpm.dat, n.Chimeric.index = 5 * cpm.dat$Chimeric.index / max(cpm.dat$Chimeric.index))
cpm.dat <- cbind.data.frame(cpm.dat, CPM = 0.25 * (cpm.dat$nMACR + cpm.dat$nC500) + 0.5 * cpm.dat$n.Chimeric.index)
empty <- as.data.frame(matrix(NA,100,ncol(cpm.dat)))
colnames(empty) <- colnames(cpm.dat)
cpm.dat <- rbind.data.frame(cpm.dat, empty)
all.dat <- cbind.data.frame(all.dat, cpm.dat)
## Plot new version version of SM data
pdf("/home/shaman/Documents/Publications/IMP-manuscript/figures/third_iteration/SM_radarChart_v8.pdf",
width=38, height=25)
plot.7axis(subset(all.dat[all.dat$Assembly%in%assm_mgmt,], Dataset=="SM"), cols, dens, font, linetype, fsize, linewd, mcex, lwd, plwd)
dev.off()
### Prepare for data summary radar chart
all.dat$Assembly <- as.factor(all.dat$Assembly)
all.dat$Total.length <- as.numeric(all.dat$Total.length)
all.dat.sum <- aggregate(all.dat[,-c(1:2, 28:31, 37)], by=list(all.dat$Assembly), FUN=sum, na.rm=T, na.action=NULL)
colnames(all.dat.sum)[1] <- "Assembly"
all.dat.mean <- aggregate(all.dat[,c(28:31)], by=list(all.dat$Assembly), FUN=mean, na.rm=T, na.action=NULL)
colnames(all.dat.mean)[1] <- "Assembly"
all.dat.agg <- cbind.data.frame(all.dat.sum, all.dat.mean)
## Generate new version of summary/cummulative radar chart
pdf("/home/shaman/Documents/Publications/IMP-manuscript/figures/third_iteration/ALL_radarChart_v8.pdf",
width=38, height=25)
plot.7axis(all.dat.agg[all.dat.agg$Assembly%in%assm_mgmt,], cols, dens, font, linetype, fsize, linewd, mcex, lwd, plwd)
dev.off()
#save.image("/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/comparison/data_usage/comparison_revision.Rdat")
#load("/home/shaman/Work/Data/integrated-omics-pipeline/MS_analysis/comparison/data_usage/comparison_revision.Rdat")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment