Commit 2bb2e901 authored by Marek Ostaszewski's avatar Marek Ostaszewski
Browse files

Merge branch 'repo_update' into 'master'

Repo update

See merge request !305
parents 8e876051 f11162e7
##########################################################
### Normalization of GSE152075 SARS-CoV-2 dataset #####
########################################################
library(pacman)
#### Normalize Data #####
### Load necessary packages
pacman::p_load("here", "rentrez","reutils","hipathia", "biomaRt", "utils",
"stringr", "AnnotationDbi", "org.Hs.eg.db","dplyr","tidyr",
"openxlsx", "preprocessCore", "edgeR")
### Download the file if it doesn't exist
if(!file.exists("GSE152075_raw_counts_GEO.txt.gz")) {
download.file(url = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE152075&format=file&file=GSE152075%5Fraw%5Fcounts%5FGEO%2Etxt%2Egz",
destfile = "GSE152075_raw_counts_GEO.txt.gz")
}
### Load and clean RNA-seq expression set from raw counts file.
expreset_raw <- read.delim(file = gzfile("GSE152075_raw_counts_GEO.txt.gz"), header = T, sep = " ")
identifiers <- rownames(expreset_raw)
identifiers_df <- data.frame(id = identifiers,
entrez = mapIds(org.Hs.eg.db, keys = identifiers, column = "ENTREZID", keytype = "SYMBOL"),
stringsAsFactors = F) %>% .[!is.na(.$entrez),] ## identify weird codes which do not have entrez ID
expreset_raw <- expreset_raw[rownames(expreset_raw) %in% identifiers_df$id, ] ## here we clean the dataset from pseudogenes and rows with none entrez ids which will not be used by Hipathia
print("read and clean from non entrez codes ...done")
# Explore how our data is organized
hist(as.numeric(expreset_raw[2,]),breaks=100)
var(as.numeric(expreset_raw[2,]))
getVari <- apply(expreset_raw, 1, var)
hist(getVari,100)
# Normalise by TMM with "edgeR" package
dge <- DGEList(counts=expreset_raw)
print("dge...done")
tmm <- calcNormFactors(dge, method="TMM")
print("tmm...done")
logcpm <- cpm(tmm, prior.count=3, log=TRUE)
print("logcpm...done")
### Normalise by Hipathia
gExp = logcpm
gExp = normalize.quantiles(gExp)
rownames(gExp)<- rownames(logcpm)
colnames(gExp) <- colnames(logcpm)
### Construct the metadata file with the info from GEO GSE152075 (Virus vs Control)
metadata <- data.frame(fileName = colnames(gExp),
type = c(rep("Virus",430), rep("Control",54)))
### Translate by Hipathia
trans_data <- translate_data(gExp, "hsa")
#### Export results table for the Cov-Hipathia Disease Maps differential signaling example.
trans_data_export <- tibble::rownames_to_column(as.data.frame(trans_data), var ="X")
write.table(trans_data_export, file = "GSE152075_input_data.tsv", col.names = T, row.names = F, quote = F, sep = "\t")
write.table(metadata, file = "GSE152075_design_matrix.tsv", row.names = F, col.names = F, quote = F, sep = "\t" )
......@@ -7,7 +7,7 @@ output:
df_print: paged
---
```{r setup, cache=TRUE}
```{r setup, cache=FALSE}
source("overrepresentation_analysis_helper_functions.R")
library(dplyr)
library(Seurat)
......@@ -28,7 +28,7 @@ if(!file.exists("eils-lung.cells.ucsc.edu_meta.tsv")) {
meta <- read.table("eils-lung.cells.ucsc.edu_meta.tsv", header=T, sep="\t", as.is=T, row.names=1)
### Folder for writing out the results. Modify if necessary
results.folder = "./lung_rmd_res/"
results.folder <- "./lung_rmd_res/"
if(!dir.exists(results.folder)) { dir.create(results.folder) }
```
......@@ -50,7 +50,7 @@ I do not know whether we need to do QC steps on the dataset or is it done.
I will use the dataset as it is I expect that the quality filtering is already done.
Just to make sure let's make some QC plots.
```{r plot Seurat features, cache=TRUE}
```{r plot Seurat features, cache=FALSE}
Seurat::VlnPlot(lung_sc_rna_seq, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
plot1 <- Seurat::FeatureScatter(lung_sc_rna_seq, feature1 = "nCount_RNA", feature2 = "percent.mt")
......@@ -97,9 +97,11 @@ Let's set the meta data specific identity :-)
```{r process identities(cntd)}
Seurat::Idents(object = lung_sc_rna_seq) <- "new.ident"
```
```{r calculate and write down markers, cache=TRUE}
```{r calculate markers, cache=TRUE}
lung_sc_rna_seq.markers <- Seurat::FindAllMarkers(lung_sc_rna_seq, only.pos = TRUE, min.pct = 0.25,
logfc.threshold = 0.25)
```
```{r write down markers, cache=FALSE}
write.table(lung_sc_rna_seq.markers, file = paste0(results.folder, "Lung_cell_specific_markers.txt"),
sep = "\t", quote=F, row.names = F)
```
......
......@@ -80,7 +80,7 @@ degs_inf_vs_bys_12 <- get_degs(colon, 'Colon_12h_', "_Infected", 'Colon_12h_', "
degs_inf_vs_bys_24 <- get_degs(colon, 'Colon_24h_', "_Infected", 'Colon_12h_', "_Bystander")
```
```{r colon: write results, cache=TRUE}
```{r colon: write results, cache=FALSE}
write.table(degs_12_vs_mock, file = file.path(results.folder,"comparing_all_cells_12h.txt"),
sep = "\t", quote=F, row.names = F)
write.table(degs_24_vs_mock, file = file.path(results.folder, "comparing_all_cells_24h.txt"),
......@@ -111,25 +111,33 @@ unique(Idents(illeum))
```{r ileum: calculate DEGs per cell type and write them down, cache=TRUE}
degs_inf_vs_mock_12_illeum <- get_degs(illeum, 'Illeum_12h_', "_Infected", 'Illeum_Mock_', "_Non-Infected")
write.table(degs_inf_vs_mock_12_illeum,
file = file.path(results.folder, "degs_inf_vs_mock_illeum_12h.txt"), sep = "\t", quote=F, row.names = F)
degs_inf_vs_mock_24_illeum <- get_degs(illeum, 'Illeum_24h_', "_Infected", 'Illeum_Mock_', "_Non-Infected")
write.table(degs_inf_vs_mock_24_illeum,
file = file.path(results.folder, "degs_inf_vs_mock_illeum_24h.txt"), sep = "\t", quote=F, row.names = F)
degs_bys_vs_mock_12_illeum <- get_degs(illeum, 'Illeum_12h_', "_Bystander", 'Illeum_Mock_', "_Non-Infected")
write.table(degs_bys_vs_mock_12_illeum,
file = file.path(results.folder, "degs_bys_vs_mock_12h_illeum.txt"), sep = "\t", quote=F, row.names = F)
degs_bys_vs_mock_24_illeum <- get_degs(illeum, 'Illeum_24h_', "_Bystander", 'Illeum_Mock_', "_Non-Infected")
write.table(degs_bys_vs_mock_24_illeum,
file = file.path(results.folder, "degs_bys_vs_mock_24h_illeum.txt"), sep = "\t", quote=F, row.names = F)
degs_inf_vs_bys_12_illeum <- get_degs(illeum, 'Illeum_12h_', "_Infected", 'Illeum_12h_', "_Bystander")
write.table(degs_inf_vs_bys_12_illeum,
file = file.path(results.folder, "degs_inf_vs_bys_12h_illeum.txt"), sep = "\t", quote=F, row.names = F)
degs_inf_vs_bys_24_illeum <- get_degs(illeum, 'Illeum_24h_', "_Infected", 'Illeum_24h_', "_Bystander")
```
```{r ileum: write results, cache=FALSE}
write.table(degs_inf_vs_mock_12_illeum,
file = file.path(results.folder, "degs_inf_vs_mock_illeum_12h.txt"),
sep = "\t", quote=F, row.names = F)
write.table(degs_inf_vs_mock_24_illeum,
file = file.path(results.folder, "degs_inf_vs_mock_illeum_24h.txt"),
sep = "\t", quote=F, row.names = F)
write.table(degs_bys_vs_mock_12_illeum,
file = file.path(results.folder, "degs_bys_vs_mock_12h_illeum.txt"),
sep = "\t", quote=F, row.names = F)
write.table(degs_bys_vs_mock_24_illeum,
file = file.path(results.folder, "degs_bys_vs_mock_24h_illeum.txt"),
sep = "\t", quote=F, row.names = F)
write.table(degs_inf_vs_bys_12_illeum,
file = file.path(results.folder, "degs_inf_vs_bys_12h_illeum.txt"),
sep = "\t", quote=F, row.names = F)
write.table(degs_inf_vs_bys_24_illeum,
file = file.path(results.folder, "degs_inf_vs_bys_24h_illeum.txt"), sep = "\t", quote=F, row.names = F)
file = file.path(results.folder, "degs_inf_vs_bys_24h_illeum.txt"),
sep = "\t", quote=F, row.names = F)
```
Writing down the cell counts for each type of data. This can be useful to investigate the data.
......
##################################################
## Helper Functions of over-represetation analysis
## Date: 13.04.2021
## Author: Marek Ostaszewski, Dezso Modos
##################################################
source("https://git-r3lab.uni.lu/covid/models/-/raw/master/Integration/MINERVA_access/minerva_access_functions.R")
### A function retrieving element identifiers from MINERVA
get_mnv_refs <- function(fcomponents, id_type) {
### Get MINERVA HGNCs as a named list
message("Retrieving MINERVA HGNCs collection")
mnv_refs <- sapply(fcomponents$map_elements, function(x) get_annotation(x$references, "HGNC_SYMBOL"))
names(mnv_refs) <- fcomponents$models$name
return(mnv_refs)
}
### For handling Reactome pathways
### Get HGNCs of UniProts
translate_up_hgnc <- function(upid, hard_filter = T) {
resp <- ask_GET(paste0("https://www.ebi.ac.uk/proteins/api/genecentric/", upid))
if(is.null(resp)) { return("") }
res <- fromJSON(resp)
if(is.null(res$relatedGene)) { return("") }
### This is a patchy reference; the correct way would be to get HGNC ID (numeric) and retrieve the symbol based on this
### but I'd like to avoid yet another API call
hgncs <- with(res$relatedGene, geneName[accession == upid & geneNameType == "Gene name"])
### Gene symbols retrieved this way may not be HGNCs; we hard filter out non-uppercased or too short ids
if(hard_filter) {
hgncs <- hgncs[(nchar(hgncs) > 2) & (toupper(hgncs) == hgncs)]
}
return(hgncs)
}
### Get participants of a pathway and translate them into HGNCs
get_reactome_pathway_contents <- function(reactome_path_id) {
res <- fromJSON(ask_GET(paste0("https://reactome.org/ContentService/data/participants/", reactome_path_id)))
ids <- sapply(res$refEntities, "[[", "identifier")
symbols <- sapply(unique(unlist(ids)), translate_up_hgnc)
return(unlist(unique(symbols)))
}
### A function retrieving element identifiers from WikiPathway diagrams
get_wp_refs <- function() {
message("Retrieving WP collection")
### This file contains all the pathways in the WP COVID-19 repo
wpids <- readLines("https://raw.githubusercontent.com/wikipathways/SARS-CoV-2-WikiPathways/master/pathways.txt")
require(rWikiPathways)
message("Retrieving WP HGNCs")
wp_hgncs <- sapply(wpids, getXrefList, "H")
names(wp_hgncs) <- wpids
return(wp_hgncs)
}
### A function retrieving element identifiers from Reactome diagrams
get_rct_refs <- function() {
message("Retrieving Reactome collection")
rct_cov_hgncs <- sapply(c("R-HSA-9678110", "R-HSA-9679504", "R-HSA-9679514", "R-HSA-9683701", "R-HSA-9679509"),
get_reactome_pathway_contents)
rct_cov2_hgncs <- sapply(c("R-HSA-9694614", "R-HSA-9694676", "R-HSA-9694682", "R-HSA-9694635", "R-HSA-9694322"),
get_reactome_pathway_contents)
return(c(rct_cov_hgncs, rct_cov2_hgncs))
}
get_hypergeometric_pvalue <- function(signature, genesets, background,
verbose = F) {
### This function is a modified version of the hypergeometric function from the R package hyperR
### (https://github.com/montilab/hypeR/blob/master/R/hyper_enrichment.R).
### The used background in this case is a vector of gene IDs. It checks whether certain genes are in the background.
### Without such check thehypergeomteric tests can understimate the enrichmnet.
if (!is(signature, "vector")) stop("Expected signature to be a vector of symbols\n")
if (!is(genesets, "list")) stop("Expected genesets to be a list of genesets\n")
if (!is(background, "vector")) stop("Expected background to be a vector of gene symbols\n")
if (length(intersect(signature, background))==0) stop ("The singiture is not in the background. Some mapping misstake.\n")
signature <- unique(signature)
genesets <- lapply(genesets, unique)
background <- unique(background)
signature.found <- signature[signature %in% unique(unlist(genesets))]
n.hits <- sapply(genesets, function(x, y) length(intersect(x, y)), signature.found)
n.drawn <- length(signature)
n.genesets <- sapply(genesets, function(x, y) length(intersect(x, y)), background)
n.left <- length(background)-n.genesets
print (list(n.hits, n.drawn, n.genesets, n.left))
pvals <- suppressWarnings(stats::phyper(q=n.hits-1,
m=n.genesets,
n=n.left,
k=n.drawn,
lower.tail=FALSE))
if(verbose) { message(pvals)}
data <- data.frame(label=names(genesets),
pval=signif(pvals, 2),
fdr=signif(stats::p.adjust(pvals, method="fdr"), 2),
signature=length(signature),
geneset=n.genesets,
overlap=n.hits,
left=n.left,
hits=sapply(genesets, function(x, y) paste(intersect(x, y), collapse=','), signature.found),
stringsAsFactors=FALSE)
return(data)
}
group_refs <- function(refs, groups) {
### Using by-name groupings provided, group unique HGNCs and return
grefs <- sapply(groups, function(x) setdiff(unique(unlist(refs[x])), c("", NA)))
names(grefs) <- names(groups)
return(grefs)
}
get_degs <- function(data, ident1a, ident1b, ident2a, ident2b){
#'Get diferretailly expressed genes comapring two idnetifiers in the data.
#'The identifiers are either at the beginning (ident1a ident2a) or at the end
#'of the cell tyep (ident2a, ident2b)
#'data is asureat object
#'ident1a, ident1b, ident2a, ident2b are strigns.
# Empty table fo DEG results
all_degs <- data.frame()
# Calculate DEGs between colon 24 and mock for each cell type
for (i in unique(data$CellTypes)){
print(i)
identity1 <- paste0(ident1a, i, ident1b)
identity2 <- paste0(ident2a, i, ident2b)
identifyers <- c(Idents(data))
selection1 <- Idents(data) == identity1
selection2 <- Idents(data) == identity2
if (length(identifyers[selection1])>2 & length(identifyers[selection2])>2){
# Get DEGs for all cell
degs <- FindMarkers(data, ident.1 = identity1, ident.2 = identity2, assay='RNA',test.use = "MAST")
# Put gene names as column not row names
degs$gene <- rownames(degs)
# Add comparison as column
degs <- degs %>% mutate(comparison = paste0(paste0(identity1, '_v_',identity2)))
# Append to previous results
all_degs <- rbind(all_degs, degs)
}
}
return(all_degs)
}
# Filter for p adj <= 0.05 and lfc >= 0.5
filtering_for_degs <- function (degsf, fdr=0.05, fc=0.5){
degsf <- degsf %>% dplyr::filter((p_val_adj <= fdr) & (abs(avg_log2FC>=fc)))
return(degsf)
}
plot_group_intersect <- function(overrepresentation, map_grefs, title, file_name) {
edge_table <- data.frame("Source" = character(0),"Target" = character(0),
"Weight" = integer(0), "Overlap percetage" = numeric(0))
node_table <- data.frame("Function" = character(0), "fdr" = numeric(0),
"Overlap percentage" = numeric(0), "Size" = integer(0))
#DEGs hits in the over-representation
all_deg_hits <- unique(unlist(strsplit(overrepresentation$hits, split = ",")))
#Creating the graph tables
for (i in 1:length(map_grefs)) {
for (j in 1:length(map_grefs)) {
if (i != j & i > j ) {
groups_isect <- intersect(map_grefs[[i]], map_grefs[[j]])
my.row <- data.frame("Source" = names(map_grefs)[i], "Target" = names(map_grefs)[j], "Weight" = length(groups_isect),
"Overlap percetage" = length(intersect(groups_isect,all_deg_hits))/length(groups_isect))
edge_table <- rbind(edge_table, my.row)
}
}
function_size <- length(all_deg_hits)
if(function_size == 0) { function_size <- 1 }
node_table_row <- data.frame("Function" = names(map_grefs)[i],
"fdr" = overrepresentation[names(map_grefs)[i], "fdr"],
"Overlap percentage" = length(intersect(map_grefs[[i]], all_deg_hits))/function_size,
"Size" = function_size)
node_table <- rbind(node_table, node_table_row)
}
g <- graph_from_data_frame(edge_table, directed=FALSE, vertices=node_table)
png(file_name, 3000, 3000)
g$layout <- layout_in_circle
#The palette is optimized to overlap percentage. If you want it to FDR you need to use the second palette.
g_palette = multiRamp(rbind(c(0,0.9),c(0.9,1)),list(c("blue","red"),c("red", "red")))
V(g)$color <- g_palette(V(g)$Overlap.percentage)
E(g)$color <- g_palette(edge.attributes(g)$Overlap.percetage)
plot.igraph(g,
main = title,
vertex.label = c("Virus replication cycle","ER stress and \nunfolded protein response",
"Autophagy and \nprotein degradation","Apoptosis\n pathway","Renin-angiotensin system",
"Coagulopathy\n pathway","PAMP signalling",
"Induction of interferons\n and the cytokine storm",
"Altered host\n metabolism", "Non-mechanistic diagrams"),
edge.width= 0.4*edge_attr(g,"Weight")+3,
vertex.size = 60, vertex.size2 = 15,
vertex.label.color ="white",
edge.curved = FALSE,
vertex.label.cex = 4,
edge.arrow.size = 6,
vertex.shape = "rectangle")
dev.off()
}
creating_overrepresetation_graphs_per_condtions <- function(data, map_grefs, root_dir = "") {
results <- list()
filterd_condtions <- filtering_for_degs(data)
background <- unique(data$gene)
cell_types <- unique(filterd_condtions["comparison"])$comparison
for (comparison in cell_types) {
print(comparison)
#Doing the enrichment
filterd_condtions <- filtering_for_degs(data, fdr=0.1, fc=0.3)
degs <- filterd_condtions[filterd_condtions["comparison"]==comparison, "gene"]
result <- get_hypergeometric_pvalue(degs, map_grefs, background)
results[[comparison]] <- result
plot_group_intersect(result, map_grefs, title = comparison, file_name = paste0(root_dir, comparison,"plot_05_05.png"))
write.table(result, paste0(root_dir, comparison,".txt"), quote=FALSE, sep ="\t")
}
names(results) <- cell_types
return(results)
}
\ No newline at end of file
......@@ -5,71 +5,34 @@
## Author: Marek Ostaszewski
##################################################
create_minerva_overlay <- function(deg_table, map_components,
min_lfc = 0.3, upper_cap = 3, lower_cap = -3,
to_file = NULL) {
# Remove low lfc genes
res <- deg_table[abs(deg_table$avg_log2FC) >= min_lfc,]
# Reduce to minimal data frame
res <- data.frame(name = res$gene, value = res$avg_log2FC)
res <- res[!is.na(res$name),]
tres <- table(res$name)
res <- res[!(res$name %in% names(tres[tres > 1])),]
# Cap to upper threshold and normalise positive values
res$value[res$value > upper_cap] <- upper_cap
res$value[res$value > 0] <- res$value[res$value > 0]/upper_cap
# Cap to upper threshold and normalise negative values
res$value[res$value < lower_cap] <- lower_cap
res$value[res$value < 0] <- res$value[res$value < 0]/abs(lower_cap)
hgnc_refs <- get_components_annotations(map_components, "HGNC_SYMBOL")
all_named_hgncs <- list()
for(h in 1:length(hgnc_refs)) {
names(hgnc_refs[[h]]) <- map_components$map_elements[[h]]$name
hgnc_refs[[h]] <- hgnc_refs[[h]][!is.na(hgnc_refs[[h]]) & sapply(hgnc_refs[[h]], length) > 0]
for(n in names(hgnc_refs[[h]])) {
all_named_hgncs[[n]] <- unique(c(all_named_hgncs[[n]], hgnc_refs[[h]][[n]]))
}
}
overlay <- data.frame(name = names(all_named_hgncs), value = 0)
for(i in 1:nrow(overlay)) {
overlay[i,2] <- mean(res[res$name %in% all_named_hgncs[[i]],"value"])
}
library(dplyr)
### A helper function to flatten the distribution by assigning values over max = max and under min = min
trim_tails <- function(x, max, min) {
x[x > max] <- max
x[x < min] <- min
return(x)
}
create_minerva_overlay <- function(deg_table, name_hgnc_map,
cap = 3, to_file = NULL) {
### Reduce to minimal data frame, cap to thresholds, normalise
res <- transmute(deg_table, name = gene, value = trim_tails(avg_log2FC, cap, -cap)) %>%
filter(!is.na(name)) %>%
mutate(value = value/cap)
overlay <- overlay[!is.na(overlay$value),]
### Calculate mean value for multiple HGNCs
overlay <- sapply(name_hgnc_map, function(x) mean(res[res$name %in% x,]$value))
overlay <- overlay[!is.nan(overlay)]
if(!is.null(to_file)) {
# Write a MINERVA-friendly version of the table, return processing-friendly version
write.table(overlay, file = to_file, sep = "\t", col.names = T, row.names = F, quote = F)
write.table(data.frame(name = names(overlay), value = overlay),
file = to_file, sep = "\t", col.names = T, row.names = F, quote = F)
}
return(res)
}
filter_data <- function(scdf, adjp = 0.1, minlfc = 0.3) {
scdf[abs(scdf$avg_log2FC) > minlfc & scdf$p_val_adj < adjp,]
}
pick_genes <- lapply(system("ls _notgit/rmd_res/all_*", intern = T), read.table, header = T, sep = "\t")
filter_genes <- lapply(pick_genes, filter_data)
mod_names <- sapply(filter_genes, function(x) x$comparison[1])
mod_names <- gsub("_Non-Infected", "_Mock", mod_names)
mod_names <- gsub("_Mock_", "_", mod_names)
mod_names <- gsub("_Illeum_", "_", mod_names)
mod_names <- gsub("_Colon_", "_", mod_names)
mod_names <- gsub("_v_Enterocyte 1_", "_vs_", mod_names)
mod_names <- gsub("_v_Inmature Enterocyte 2_", "_vs_", mod_names)
mod_names <- gsub("_v_Enterocyte 2_", "_vs_", mod_names)
mod_names <- gsub("_v_12h_Enterocyte 2_", "_vs_", mod_names)
mod_names <- gsub("_v_12h_Enterocyte 1_", "_vs_", mod_names)
mod_names <- gsub("_v_24h_Inmature Enterocyte 2_", "_vs_", mod_names)
mod_names <- gsub("_Enterocyte 1_", "_Enterocyte_1_", mod_names)
mod_names <- gsub("_Enterocyte 2_", "_Enterocyte_2_", mod_names)
mod_names <- gsub("_Inmature Enterocyte 2_", "_Imm_Enterocyte_2_", mod_names)
names(filter_genes) <- mod_names
### Helper functions to access and retrieve content from MINERVA-hosted diagrams
source("https://git-r3lab.uni.lu/covid/models/-/raw/master/Integration/MINERVA_access/minerva_access_functions.R")
......@@ -78,24 +41,60 @@ map <- "https://covid19map.elixir-luxembourg.org/minerva/api/"
### Get the components of MINERVA and WikiPathways (see minerva_access.R)
map_components <- get_map_components(map)
### Detailed lists
mnv_refs <- get_components_annotations(map_components, "HGNC_SYMBOL")
mnv_refs <- sapply(mnv_refs, function(x) unique(unlist(x)))
scores <- data.frame(sapply(filter_genes, function(x) sapply(mnv_refs, function(y) length(intersect(y, x$gene)))))
colnames(scores) <- sapply(filter_genes, function(x) x$comparison[1])
scores["overview",] <- colSums(scores)
hgnc_refs <- get_components_annotations(map_components, "HGNC_SYMBOL")
### An element in MINERVA can have many HGNCs; to solve these conflicts
### create a list of name - HGNC mappings; each named element gets a list of associated HGNCs
### these are passed to "create_minerva_overlay"
name_hgncs <- list()
for(h in 1:length(hgnc_refs)) {
names(hgnc_refs[[h]]) <- map_components$map_elements[[h]]$name
hgnc_refs[[h]] <- hgnc_refs[[h]][!is.na(hgnc_refs[[h]]) & sapply(hgnc_refs[[h]], length) > 0]
for(n in names(hgnc_refs[[h]])) {
name_hgncs[[n]] <- unique(c(name_hgncs[[n]], hgnc_refs[[h]][[n]]))
}
}
selected_overlays <- c("Illeum_24h_Imm_Enterocyte_2_Bystander_vs_Mock",
"Illeum_24h_Imm_Enterocyte_2_Infected_vs_Mock")
### Read airway DEG files (generated by "SC_analysis_airways.Rmd"), filter and create overlay
### Airway cells' DEGs are all in the same file, with "cluster" defining their label
if(!file.exists("./lung_rmd_res/Lung_cell_specific_markers.txt")) {
message("File './lung_rmd_res/Lung_cell_specific_markers.txt' does not exist, run 'SC_analysis_airways.Rmd' to generate it.")
} else {
mnv_airway_cells <- read.table(file = "./lung_rmd_res/Lung_cell_specific_markers.txt",
sep = "\t", header = T) %>%
filter(abs(avg_log2FC) > 0.3 & p_val_adj < 0.1)
### We select three "secretory" subtypes to create overlays
ariw_sec1_cells <- create_minerva_overlay(filter(mnv_airway_cells, cluster == "Secretory1"),
name_hgnc_map = name_hgncs, to_file = "airway_secretory1.txt")
ariw_sec2_cells <- create_minerva_overlay(filter(mnv_airway_cells, cluster == "Secretory2"),
name_hgnc_map = name_hgncs, to_file = "airway_secretory2.txt")
ariw_sec3_cells <- create_minerva_overlay(filter(mnv_airway_cells, cluster == "Secretory3"),
name_hgnc_map = name_hgncs, to_file = "airway_secretory3.txt")
}
map_components$map_elements[[2]]
if(!file.exists("./gut_rmd_res/degs_bys_vs_mock_24h_illeum.txt")) {
message("File './gut_rmd_res/degs_bys_vs_mock_24h_illeum.txt' does not exist, run 'SC_analysis_gut.Rmd' to generate it.")
} else {
### We select one bystander cell type from the gut organoids
mnv_bys_mock_24h_il <- read.table("./gut_rmd_res/degs_bys_vs_mock_24h_illeum.txt",
header = T, sep = "\t") %>%
filter(abs(avg_log2FC) > 0.3 & p_val_adj < 0.1)
sel_bys <- "Illeum_24h_Inmature Enterocyte 2_Bystander_v_Illeum_Mock_Inmature Enterocyte 2_Non-Infected"
bys_mock_24h_il_imm_ent <- create_minerva_overlay(filter(mnv_bys_mock_24h_il, comparison == sel_bys),
name_hgnc_map = name_hgncs,
to_file = "Ileum_24h_Imm_Enterocyte_2_Bystander_vs_Mock.txt")
}
bys_mock_24h_il_imm_ent <- create_minerva_overlay(filter_genes[[selected_overlays[1]]], map_components,
to_file = paste0(selected_overlays[1],".txt"))
inf_mock_24h_il_imm_ent <- create_minerva_overlay(filter_genes[[selected_overlays[2]]], map_components,
to_file = paste0(selected_overlays[2],".txt"))
mnv_airway_cells <- filter_data(read.table("./Lung_cell_specific_markers.txt"))
create_minerva_overlay(mnv_airway_cells[grep("Secretory", mnv_airway_cells$cluster),],
map_components = map_components,
to_file = "airway_secretory.txt")
if(!file.exists("./gut_rmd_res/degs_inf_vs_mock_illeum_24h.txt")) {
message("File './gut_rmd_res/degs_inf_vs_mock_illeum_24h.txt' does not exist, run 'SC_analysis_gut.Rmd' to generate it.")
} else {
### We select one infected cell type from the gut organoids
mnv_inf_mock_24h_il <- read.table("./gut_rmd_res/degs_inf_vs_mock_illeum_24h.txt",
header = T, sep = "\t") %>%
filter(abs(avg_log2FC) > 0.3 & p_val_adj < 0.1)
sel_inf <- "Illeum_24h_Inmature Enterocyte 2_Infected_v_Illeum_Mock_Inmature Enterocyte 2_Non-Infected"
inf_mock_24h_il_imm_ent <- create_minerva_overlay(filter(mnv_inf_mock_24h_il, comparison == sel_inf),
name_hgnc_map = name_hgncs,
to_file = "Illeum_24h_Imm_Enterocyte_2_Infected_vs_Mock.txt")
}