--- title: "R Notebook" output: html_notebook --- ```{r} library(ggplot2) library(pheatmap) library(RColorBrewer) #display.brewer.all() library(tidyverse) library(ggpubr) library(stringr) library(ggsignif) library(openxlsx) library(viridis) library(xgboost) library(data.table) library(readxl) library(janitor) library(dplyr) library(jcolors) library(stringr) library(rstatix) library(FSA) library(dunn.test) ``` Data import ```{r} setwd("//atlas.uni.lux/users/isabel.rosety/GBA/Stainings/Plots/488Ki67_568Edu_647Sox2/") data1<-read.csv("//atlas.uni.lux/users/isabel.rosety/GBA/Stainings/Plots/488Ki67_568Edu_647Sox2/IR_20220915_488Ki67_568Edu_647Sox2_RS_e51e52e54d30Edu8h_20220915_175904.csv",sep = ";") data2<-read.csv("//atlas.uni.lux/users/isabel.rosety/GBA/Stainings/Plots/488Ki67_568Edu_647Sox2/IR_20220915_488Ki67_568Edu_647Sox2_RS_e53d30edu8h_20220919_134031.csv",sep = ";") #data<-read.csv("//atlas.uni.lux/users/isabel.rosety/GBA/Stainings/Plots/488Ki67_568Edu_647Sox2/IR_20220915_488Ki67_568Edu_647Sox2_RS_e52to54d30edu48h_20220915_175904.csv",sep = ";") data3<-read.csv("//atlas.uni.lux/users/isabel.rosety/GBA/Stainings/Plots/488Ki67_568Edu_647Sox2/IR_20230523_568Edu_647Sox2_RS_b55b56b57d30_20230521_183416.csv",sep = ";") data = bind_rows(data1,data2,data3) tmp=do.call(rbind,strsplit(data$AreaName,"_")) #stringsplit will take the column Areaname and will split this depending on the underscore, then we put them otgeter one ofter the other data$Condition = tmp[,1] data %>% mutate(Condition = str_replace_all(Condition, pattern = "MUT", replacement = "GBA_PD")) %>% mutate(Condition = str_replace_all(Condition, pattern = "WT", replacement = "CTRL")) ->data data %>% mutate(CellLine = AreaName) ->data data %>% mutate(CellLine = str_replace_all(CellLine, pattern = "WT_56", replacement = "CTRL1")) %>% mutate(Condition = str_replace_all(Condition, pattern = "WT_39", replacement = "CTRL2")) %>% mutate(CellLine = str_replace_all(CellLine, pattern = "WT_68", replacement = "CTRL3")) %>% mutate(CellLine = str_replace_all(CellLine, pattern = "MUT_309", replacement = "PD1")) %>% mutate(CellLine = str_replace_all(CellLine, pattern = "MUT_KTI6", replacement = "PD2")) %>% mutate(CellLine = str_replace_all(CellLine, pattern = "MUT_SGO1", replacement = "PD3")) ->data data$TimepointEdu<-paste0(data$Condition,"_",data$Timepoint) #stringr package needed toselect = c("Barcode","CellLine","Condition","TimepointEdu", "Batch","OrganoidIdx","Day","Batch","EduInSox2MFI","Sox2ByNucAlive","EduByNucAlive","EduAndSox2BySox2") #toselect = c("Barcode","AreaName","Condition","Batch","OrganoidIdx","Day","Batch","Sox2ByNucAlive","Ki67ByNucAlive","EduByNucAlive","EduAndSox2BySox2","Ki67AndSox2BySox2","Ki67AndEduAndSox2BySox2") data %>% #same dplyr::select(toselect)->data feature_names<-colnames(data[,8:ncol(data)]) #feature_names<-"TUNELByNuc" #Mean of replicates data%>% pivot_longer(feature_names,names_to = "Features", values_to = "Measure") -> data_all_long data2 = drop_na(data_all_long) #Getting the mean of the technical replicates data2 %>% group_by(Condition,CellLine,TimepointEdu,Batch,Features, Barcode,Day) %>% summarize(MeanFeatures = mean(Measure))-> data_Mean #Normalizing to mean of controls data_Mean %>% dplyr::filter(Condition=="CTRL") %>% #group_by(Condition, Day, Features,Batch) %>% group_by(Condition,Features,Batch) %>% #View summarise(baseline = mean(MeanFeatures)) %>% #View ungroup() %>% dplyr::select(-Condition) %>% #full_join(data_Mean, by=c("Day","Features","Batch")) %>% full_join(data_Mean, by=c("Features","Batch")) %>% mutate(Foldchange = MeanFeatures /baseline) ->data_all_based included_vars =c("CellLine", "Condition","Batch","Barcode","Day") # here we already have the mean of each section data_all_based %>% pivot_wider(id_cols=all_of(included_vars),names_from = Features,values_from = Foldchange)->data_all_based_wide2 #If I want to get the wide table without normalizing to mean of controls: data_Mean %>% full_join(data_Mean, by=c("Features","MeanFeatures","Batch","CellLine", "TimepointEdu","Barcode", "Condition","Day")) ->data_Mean included_vars =c("CellLine", "Condition","Batch","TimepointEdu","Barcode","Day") data_Mean %>% pivot_wider(all_of(included_vars),names_from = Features,values_from = MeanFeatures)->data_Mean_wide #write.csv(data_Mean_wide, file = 'data_all_combined.csv') ``` KW test loop ```{r} apply(data_Mean_wide [7:ncol(data_Mean_wide)],2,shapiro.test)->shapiro_test #normality hypothesis rejected when p-value <0.05 -> use non parametric kruskal.test(MeanFeatures ~ Features, data = data_Mean) kruskal.test(EduAndSox2BySox2 ~ TimepointEdu, data = data_Mean_wide ) # if p-value <0.05, then there is a statistically significant difference between the groups #Dunn test #apply(data_Mean_wide [7:ncol(data_Mean_wide)],2,kruskal.test) assign_significance = function(pvals){ significance = rep(NA,length(pvals)) significance[pvals >= 0.05] = 'ns' significance[pvals <0.05 & pvals >0.01] = '*' significance[pvals <=0.01 & pvals >0.001] = '**' significance[pvals <=0.001 & pvals >0.0001] = '***' significance[pvals <=0.0001] = '****' return(significance) } Features=c("EduAndSox2BySox2") wanted =c("CTRL_T0 - CTRL_T7","CTRL_T0 - GBA_PD_T0", "CTRL_T7 - GBA_PD_T7","GBA_PD_T0 - GBA_PD_T7") results = list() for(feat in Features){ kpvals = kruskal.test(data_Mean_wide[[feat]],as.factor(data_Mean_wide$TimepointEdu))$p.value #first time run this line and next line to see the order of the features, define feat tmp = dunn.test(data_Mean_wide[[feat]],as.factor(data_Mean_wide$TimepointEdu), list=TRUE) pvals = tmp$P[match(wanted,tmp$comparisons)] padjusted =p.adjust(pvals,method="BH",n=length(wanted)*length(Features)) results[[feat]] = cbind(wanted,feat,kpvals,pvals,padjusted) } table_results = as.data.table(do.call(rbind,results)) table_results$significancekw = assign_significance(as.double(table_results$kpvals)) table_results$significancedt = assign_significance(as.double(table_results$pvals)) table_results$significancedtadj = assign_significance(as.double(table_results$padjusted)) table_results = dplyr::select(table_results,"wanted", "feat", "kpvals","significancekw","pvals","significancedt","padjusted","significancedtadj") #write.csv(table_results, file = 'KW_table_results.csv') ``` Add group1 and group2 ```{r} #table_results|>column_to_rownames(var="wanted")->table_results tmp <- str_split_fixed(table_results$wanted, ' - ', 2) table_results$group1 = tmp[,1] table_results$group2 = tmp[,2] ``` Test to add significance bars with Kruskal Wallis (loop) ```{r} for (i in 1:length(Features)) { data_Mean_wide %>% pivot_longer(cols=Features, names_to = "feature", values_to = "value") %>% filter(feature %in% Features[i]) %>% ggplot( aes(x=factor(TimepointEdu, level = c("CTRL_T0","CTRL_T7", "GBA_PD_T0","GBA_PD_T7")), y=value)) + #geom_violin( aes(fill=Condition),show.legend = T, trim=T), geom_violin( aes(fill=TimepointEdu),show.legend = T,scale = "width", trim=T)+ geom_dotplot(binaxis = "y",stackdir = "center",dotsize=0.5)+ scale_fill_manual(values= c("#1565C0",alpha(("#1565C0"),0.4),"#CC0000",alpha(("#CC0000"),0.4)),name = "TimepointEdu")+ #scale_y_continuous(expand = c(0, 0))+ ylim(0, 0.09)+ scale_color_jcolors("pal7")+ theme(legend.key=element_blank()) + stat_pvalue_manual(table_results, xmin = "group1", label="significancedtadj", y.position = 0.07, step.increase = 0.09,size=8, hide.ns =F)+ #facet_grid(~fct_relevel(Day, "d30","d60"), scales="free") + labs(x = "", y = paste(Features[i]), #y = paste(names[i]), fill = "Condition", #title = paste(feature_names[i])) + title = "" )+ theme_bw() + theme( axis.line = element_line(colour = 'black', linewidth = 0.5) , axis.title.x = element_blank(), axis.text.x = element_text(size=8, color="black"), axis.title.y = element_text(size = 12), axis.text.y = element_text(size=10, color="black"), axis.ticks.y = element_line(), axis.ticks.length=unit(.25, "cm"), #change legend text font size) #legend.key.size = unit(0.7, "cm"), #legend.key.width = unit(0.6,"cm"), legend.key=element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), plot.title = element_text(size = 20, hjust=0.5, vjust= 1, face = "bold"), plot.subtitle = element_blank(),#element_text(size = 2, hjust=0.5) strip.text = element_text(size=12, vjust=0.5), strip.background = element_rect(fill="lightgray"), # panel.border = element_rect(fill = NA, color = "black"), panel.spacing.y = unit(0.8, "lines"), strip.switch.pad.wrap=unit(20, "lines"), legend.position="right", legend.text = element_text(size=17), legend.title = element_text(size=19) ) -> p print(p) #t<- cowplot::ggdraw(cowplot::add_sub(p, "Wilcox-test, ***p=0.001, **p=0.01, *p=0.05",hjust=-0.2, size=13)) ##ggsave(paste0(Sys.Date(),"_", names[i], ".pdf"), plot=t) #ggsave(paste0(Sys.Date(),(sprintf("Plot_%s_Edu_All_Timepoints_test_ViolinTrimmed.pdf",feature_names[i]))),height=3,width=5) } ```