Skip to content
Snippets Groups Projects
Commit 11aebe6f authored by Patrycja Mulica's avatar Patrycja Mulica
Browse files

Update Figure3-4-5/rnaseq_analyses_patrycja.R

parent cd71637e
No related branches found
No related tags found
No related merge requests found
#
# RNA-seq data pre-processing and analysis
#
library('Rsubread')
# Rsubread 2.0.1
############################
# Step 1: aligning the reads
############################
# retrieve fastq files
fastq.files <- list.files(path = "/mnt/data/AnneProjects", pattern = ".fq.gz$", full.names = TRUE, recursive = TRUE)
# check that all files are present
print(fastq.files)
# replace fq.gz by bam in file name
targets = data.frame(inputfile=fastq.files, outputfile=gsub("fq.gz","bam",fastq.files))
r1files = as.matrix(targets$inputfile)[grep("\\_1\\.fq",as.matrix(targets$inputfile))]
r2files = as.matrix(targets$inputfile)[grep("\\_2\\.fq",as.matrix(targets$inputfile))]
outfiles = gsub("fq.gz","bam", gsub("\\_1\\.fq","\\.fq",r1files))
align(index="new_index",readfile1=r1files, readfile2=r2files, output_file=as.character(outfiles),output_format="BAM", nthreads=10)
############################
# Step 2: Quality check
############################
bam.files <- list.files(path = "/mnt/data/AnneProjects", pattern = ".bam$", full.names = TRUE, recursive = TRUE)
print(bam.files)
props <- propmapped(files=bam.files)
print(props)
# Extract quality scores for individual fastq.files (one example)
qs <- qualityScores(filename=fastq.files[1],nreads=100)
#############################
# Step 3: Determine counts
#############################
# version:
gtf <- list.files("./",pattern=".*gtf$",full.names = T)
#
# ispairedEnd = TRUE
#
fc <- featureCounts(files = as.character(outfiles), GTF.featureType="exon",GTF.attrType="gene_name", annot.ext=gtf, isGTFAnnotationFile=TRUE, isPairedEnd=TRUE, nthreads=10)
##############################
# Functions for data analysis
##############################
#
# PLS-DA ploot
#
plsdaplot = function(combdat, phenotype, filestr="plsda_plot.pdf"){
comblab = match(phenotype, unique(phenotype))
comblabunq = unique(comblab)
const = which(apply(combdat, 1, var)==0)
combfilt = combdat
if(length(const))
combfilt = combdat[-const,]
# variance filter
vars = apply(combfilt, 1, var)
var1000 = order(vars, decreasing=T)[1:1000]
source("osplda_rainbow.R")
require('bitops')
require('pls')
mods <- make.OSC.PLS.model(comblab, pls.data = as.data.frame(t((combfilt[var1000,]))), comp = 2, OSC.comp = 0, validation = "LOO", method = "oscorespls", cv.scale = FALSE, progress = FALSE)
pdf(filestr)
plot.OSC.results(mods, plot = "scores", groups = phenotype)
dev.off()
}
#
# Volcano plot
#
plotvolcano = function(dat, logfc, fdr, logfc_thresh1 = 1, fdr_thresh = 0.05, lablogfc_thresh = 1.5, labfdr_thresh = 0.001, cexlab=2, filestr="volcano_plot_with_labels.pdf")
{
mat = data.frame(x=logfc, y=-log10(fdr))
matorange = data.frame(x=logfc, y=-log10(fdr))[which(abs(logfc)>logfc_thresh1),]
matgreen = data.frame(x=logfc, y=-log10(fdr))[intersect(which(fdr<fdr_thresh),which(abs(logfc)>logfc_thresh1)),]
matred =data.frame(x=logfc, y=-log10(fdr))[which(fdr<fdr_thresh),]
matlab = data.frame(x=logfc, y=-log10(fdr))[intersect(which(fdr<labfdr_thresh),which(abs(logfc)>lablogfc_thresh)),]
topgenes = rownames(dat)
matlabels = topgenes[intersect(which(fdr<labfdr_thresh),which(abs(logfc)>lablogfc_thresh))]
if(!require('ggrepel'))
{
install.packages("ggrepel")
}
require('ggplot2')
# write volcano plot to pdf
pdf(filestr)
p = ggplot(mat, aes(x, y)) + geom_point(color = 'gray', size=0.8, fill="white") + theme_classic(base_size = 16) + geom_point(data = matorange, color="orange") + geom_point(data = matred, color="red") + geom_point(data = matgreen, color="green") + geom_text_repel(data = matlab, label=matlabels, nudge_y=0.1, size=cexlab) + xlab('Log fold change') + ylab('-log10(P-value)') + theme(axis.text=element_text(size=12), axis.title=element_text(size=12,face="bold"))
print(p)
dev.off()
}
#
# Differential expression analysis
#
differential_analyses <- function(countsdat, condition1_ids, condition2_ids, condition1_name, condition2_name, filename_prefix="case_vs_control", logfc_thresh1 = 1, use_nominal = FALSE, fdr_thresh = 0.05, lablogfc_thresh = 0.5, labfdr_thresh = 0.05, cex.axis = 1, filtmean=NULL){
require('DESeq2')
# convert outcome variable into factor (e.g. "PD" vs. "control")
outcome = c(rep(condition1_name,length(condition1_ids)),rep(condition2_name,length(condition2_ids)))
cts = countsdat[,c(condition1_ids,condition2_ids)]
colnames(cts)=outcome
coldata = factor(outcome, levels=c(condition1_name, condition2_name))
names(coldata)=outcome
dds = DESeqDataSetFromMatrix(countData = cts, colData = DataFrame(coldata), design = ~coldata)
library("edgeR")
dge <- DGEList(counts=cts)
keep <- filterByExpr(dge, design=model.matrix(~0+outcome))
dds_filt <- dds[keep,]
topgenes_rnaseq <- DESeq(dds_filt)
res_case_vs_control <- results(topgenes_rnaseq)
res_case_vs_control <- as.matrix(res_case_vs_control[order(res_case_vs_control$pvalue),])
if(!is.null(filtmean)){
res_case_vs_control = res_case_vs_control[-which(res_case_vs_control[,1] < filtmean),]
}
pdf( paste(filename_prefix,"_boxplots_top100.pdf",collapse=""), width=7, height=7)
par(mfrow=c(2,2))
for(i in 1:100)
boxplot(as.matrix(cts[match(rownames(res_case_vs_control),rownames(cts))[i],]) ~ outcome, col=c("lightblue","pink"), main=rownames(res_case_vs_control)[i], ylab="preprocessed expression count", xlab="", cex.axis=cex.axis, outline=T)
dev.off()
# ranking table
write.xlsx(cbind(round(res_case_vs_control[,1:4], 3), format(res_case_vs_control[,5:6], digits=3)), paste(filename_prefix,"_ranking.xlsx",collapse=""))
if(!use_nominal){
plotvolcano(res_case_vs_control, res_case_vs_control[,"log2FoldChange"], res_case_vs_control[,"padj"], logfc_thresh1 = logfc_thresh1, fdr_thresh = fdr_thresh, lablogfc_thresh = lablogfc_thresh, labfdr_thresh = labfdr_thresh, filestr=paste(filename_prefix,"_volcano_plot_with_labels.pdf",collapse=""))
} else {
plotvolcano(res_case_vs_control, res_case_vs_control[,"log2FoldChange"], res_case_vs_control[,"pvalue"], logfc_thresh1 = logfc_thresh1, fdr_thresh = fdr_thresh, lablogfc_thresh = lablogfc_thresh, labfdr_thresh = labfdr_thresh, filestr=paste(filename_prefix,"_volcano_plot_with_labels.pdf",collapse=""))
}
return(res_case_vs_control)
}
##############################
# Astrocyte analysis
##############################
library(xlsx)
astrocytes_annot <- xlsx::read.xlsx("samples_astrocytes.xlsx", 1)
astrocytes_samp = grep("Oksanen|Palm",colnames(fc$counts))
phenotype = rep("oksanen_t129",length(astrocytes_samp))
phenotype[grep("48.*Oksanen",colnames(fc$counts)[astrocytes_samp])] = rep("oksanen_48",length(grep("48.*Oksanen",colnames(fc$counts)[astrocytes_samp])))
phenotype[grep("T12.*Palm",colnames(fc$counts)[astrocytes_samp])] = rep("palm_t129",length(grep("T12.*Palm",colnames(fc$counts)[astrocytes_samp])))
phenotype[grep("48.*Palm",colnames(fc$counts)[astrocytes_samp])] = rep("palm_48",length(grep("48.*Palm",colnames(fc$counts)[astrocytes_samp])))
print(table(phenotype))
#phenotype
# oksanen_48 oksanen_t129 palm_48 palm_t129
# 3 3 3 3
oksanen_t129 = which(phenotype=="oksanen_t129")
oksanen_48 = which(phenotype=="oksanen_48")
palm_48 = which(phenotype=="palm_48")
palm_t129 = which(phenotype=="palm_t129")
plsdaplot(fc$counts[,astrocytes_samp], phenotype, filestr="plsda_rnaseq_astrocytes_2022.pdf")
##############################
# DESeq2 analysis
##############################
res_oksanen_vs_palm_all = differential_analyses(fc$counts[,astrocytes_samp], c(oksanen_t129,oksanen_48), c(palm_t129,palm_48), "oksanen", "palm", filename_prefix="oksanen_vs_palm_deseq2_2022", logfc_thresh1 = 1, use_nominal=F, fdr_thresh = 0.05, lablogfc_thresh = 0.5, labfdr_thresh = 0.05)
res_t129_vs_48_all = differential_analyses(fc$counts[,astrocytes_samp], c(oksanen_t129,palm_t129), c(oksanen_48,palm_48), "T12.9", "#48", filename_prefix="t129_vs_48_deseq2_2022", logfc_thresh1 = 1, use_nominal=F, fdr_thresh = 0.05, lablogfc_thresh = 0.5, labfdr_thresh = 0.05)
res_t129_oksanen_vs_palm_all = differential_analyses(fc$counts[,astrocytes_samp], oksanen_t129, palm_t129, "T12.9_oksanen", "T12.9_palm", filename_prefix="t129_oksanen_vs_palm_deseq2_2022", logfc_thresh1 = 1, use_nominal=F, fdr_thresh = 0.05, lablogfc_thresh = 0.5, labfdr_thresh = 0.05)
res_48_oksanen_vs_palm_all = differential_analyses(fc$counts[,astrocytes_samp], oksanen_48, palm_48, "#48_oksanen", "#48_palm", filename_prefix="48_oksanen_vs_palm_deseq2_2022", logfc_thresh1 = 1, use_nominal=F, fdr_thresh = 0.05, lablogfc_thresh = 0.5, labfdr_thresh = 0.05)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment