Commit 20973bf6 authored by Marek Ostaszewski's avatar Marek Ostaszewski
Browse files

csv configs, errors from OmniPath

parent 079b19ff
......@@ -25,7 +25,7 @@ input <- readLines(infile)
config <- "config.txt"
if(length(args) > 1) { config <- args[2] }
if(!file.exists(config)) { message("No correct config given!")}
config <- read.table(config, sep = "\t", stringsAsFactors = F, header = T)
config <- read.table(config, sep = ",", stringsAsFactors = F, header = T)
outputFile <-config[config$param == "output_file", "value"]
n <-config[config$param == "n", "value"]
......@@ -91,8 +91,16 @@ retrieve_omnipath <- function(finput, only_between_inputs = T) {
resp <- httr::GET(url = url,
httr::add_headers('Content-Type' = "application/x-www-form-urlencoded"))
if(httr::status_code(resp) != 200) {
message("OmniPathDB is not available...")
### Omnipath not functioning, return empty table
return(setNames(data.frame(matrix(ncol = 5, nrow = 0)),
c("source_hgnc_symbol","target_hgnc_symbol","is_directed", "consensus_direction", "references")))
}
result <- read.table(text = httr::content(resp, as = "text", encoding = "UTF-8"),
sep = "\t", header = T)
result <- result[result$is_directed == 1,]
colnames(result)[which(names(result) == "source_genesymbol")] <- "source_hgnc_symbol"
colnames(result)[which(names(result) == "target_genesymbol")] <- "target_hgnc_symbol"
......
### Map and pathway enrichment analysis.
### Requires commandline input or file "input.txt" to be present in the script directory.
required.packages <- c("jsonlite", "httr", "enrichR")
new.packages <- required.packages[!(required.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages, repos='http://cran.us.r-project.org')
library(jsonlite)
library(httr)
ask_GET <- function(furl, fask) {
resp <- httr::GET(url = paste0(furl, fask),
httr::add_headers('Content-Type' = "application/x-www-form-urlencoded"))
return(httr::content(resp, as = "text"))
}
args = commandArgs(trailingOnly=TRUE)
infile <- "input.txt"
if(length(args) > 0) { infile <- args[1] }
if(!file.exists(infile)) { message("No correct input given!")}
input <- readLines(infile)
config <- "config.txt"
if(length(args) > 0) { config <- args[2] }
if(!file.exists(config)) { message("No correct config given!")}
config <- read.table(config, sep = "\t", stringsAsFactors = F, header = T)
message("Enrichment of disease maps in config")
cat(file = "enriched_disease_maps.txt",
"minerva_instance\tproject_id\tmodel_id\tname\tbounds.height\tbounds.width\tbounds.x\tbounds.y\n",
append = F)
for(map in config[config$type == "map","resource"]) {
message(paste0("Querying ", map))
cfg <- fromJSON(ask_GET(map, "configuration/"))
project_id <- cfg$options[cfg$options$type == "DEFAULT_MAP","value"]
mnv_base <- paste0(map,"projects/",project_id,"/")
### Ask for models
message(paste0("Asking for models: ", mnv_base, "models/"))
models <- fromJSON(ask_GET(mnv_base, "models/"), flatten = F)
model_elements <- lapply(models$idObject,
function(x) fromJSON(ask_GET(paste0(mnv_base,"models/",x,"/"),
"bioEntities/elements/?columns=id,name,type,references,bounds"),
flatten = F))
all_hgncs <- c()
message(paste0("Setting up enrichment parameters..."))
### Prep run, go through all model elements and:
for(me in model_elements) {
### Get all HGNCs (universe for enrichment)
all_hgncs <- c(all_hgncs, sapply(me$references,
function(x) x[x$type == "HGNC_SYMBOL", "resource"]))
all_hgncs <- all_hgncs[sapply(all_hgncs,
function(x) ifelse(is.character(x) & length(x) > 0, TRUE, FALSE))]
}
N <- length(all_hgncs)
n <- length(input)
message(paste0("Going through individual models..."))
for(i in 1:length(model_elements)) {
me <- model_elements[[i]]
if(length(unique(me[me$type == "Pathway","name"])) == 0) { next }
path_pVals <- data.frame(pname = unique(me[me$type == "Pathway","name"]), pVal = 1)
### Calculating the inclusion of the elements per bioentity set
for(up in 1:nrow(path_pVals)) {
res <- apply(me[me$name == path_pVals$pname[up],"bounds"], 1,
function(x) me$bounds$x >= x["x"] & me$bounds$x <= (x["x"] + x["width"]) &
me$bounds$y >= x["y"] & me$bounds$y <= (x["y"] + x["height"]))
hgncs <- unlist(sapply(me$references[res & me$type %in% c("Protein", "RNA", "Gene")],
function(x) x[x$type == "HGNC_SYMBOL", "resource"]))
m <- length(hgncs)
k <- length(intersect(input,hgncs))
if (k > 0) {
path_pVals$pVal[up] <- (((m * (n - k)) / (k * (N - m))) ** k) * (((n * (N - m)) / (N * (n - k))) ** n)
}
}
### Bonferroni correction
# path_pVals$pVal <- path_pVals$pVal*nrow(path_pVals)
path_pVals <- path_pVals[path_pVals$pVal < 0.1, ]
if(nrow(path_pVals) > 0) {
path_pVals <- path_pVals[order(path_pVals$pVal), ]
if(!is.null(config[config$resource == "max_areas_per_map",]$value)) {
path_pVals <- path_pVals[1:config[config$resource == "max_areas_per_map",]$value, ]
}
pbounds <- cbind(name = me[me$name %in% path_pVals$pname,]$name,
me[me$name %in% path_pVals$pname,]$bounds)
pathways_to_pull <- cbind(minerva_instance = map,
project_id = project_id,
model_id = models$idObject[i],
pbounds, stringsAsFactors = F)
write.table(pathways_to_pull, file = "enriched_disease_maps.txt",
sep = "\t", quote = F, col.names = F, row.names = F, append = T)
message(paste0("Output file updated..."))
}
}
message(paste0("Done."))
}
library(enrichR)
message("Enrichment of pathways")
### Header of the enrichment table, it's consecutive segments will be written
### later below
cat(file = "enriched_pathways.txt",
"pathway_db\tTerm\tAdjusted.P.value\n",
append = F)
pathway_dbs <- config[config$type == "pathway_db","resource"]
### Run Enrichr
enriched_pathways <- enrichr(input, databases = pathway_dbs)
### Leave only the pathways with adjusted p lower than 0.1
adj_pathways <- lapply(enriched_pathways,
function(x) {
selected <- which(x$Adjusted.P.value < 0.1)
### Cap the amount of pathways to the predefined max in config
cap <- min(length(selected), config[config$resource == "max_areas_per_pathway_db",]$value)
selected <- head(selected, cap)
x[selected, ]
})
### Clean empty results
adj_pathways <- adj_pathways[sapply(adj_pathways, dim)[1,] > 0]
### Write results down in the file
for(aep in names(adj_pathways)) {
write.table(cbind(pathway_db = aep,
adj_pathways[[aep]][,c("Term","Adjusted.P.value")],
stringsAsFactors = F),
file = "enriched_pathways.txt", sep = "\t", quote = F, col.names = F, row.names = F, append = T)
}
message("Enrichment finished.")
### Map and pathway enrichment analysis.
### Requires commandline input or file "input.txt" to be present in the script directory.
required.packages <- c("jsonlite", "httr", "enrichR")
new.packages <- required.packages[!(required.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages, repos='http://cran.us.r-project.org')
library(jsonlite)
library(httr)
ask_GET <- function(furl, fask) {
resp <- httr::GET(url = paste0(furl, fask),
httr::add_headers('Content-Type' = "application/x-www-form-urlencoded"))
return(httr::content(resp, as = "text"))
}
args = commandArgs(trailingOnly=TRUE)
infile <- "input.txt"
if(length(args) > 0) { infile <- args[1] }
if(!file.exists(infile)) { message("No correct input given!")}
input <- readLines(infile)
config <- "config.txt"
if(length(args) > 0) { config <- args[2] }
if(!file.exists(config)) { message("No correct config given!")}
config <- read.table(config, sep = ",", stringsAsFactors = F, header = T)
message("Enrichment of disease maps in config")
cat(file = "enriched_disease_maps.txt",
"minerva_instance\tproject_id\tmodel_id\tname\tbounds.height\tbounds.width\tbounds.x\tbounds.y\n",
append = F)
for(map in config[config$type == "map","resource"]) {
message(paste0("Querying ", map))
cfg <- fromJSON(ask_GET(map, "configuration/"))
project_id <- cfg$options[cfg$options$type == "DEFAULT_MAP","value"]
mnv_base <- paste0(map,"projects/",project_id,"/")
### Ask for models
message(paste0("Asking for models: ", mnv_base, "models/"))
models <- fromJSON(ask_GET(mnv_base, "models/"), flatten = F)
model_elements <- lapply(models$idObject,
function(x) fromJSON(ask_GET(paste0(mnv_base,"models/",x,"/"),
"bioEntities/elements/?columns=id,name,type,references,bounds"),
flatten = F))
all_hgncs <- c()
message(paste0("Setting up enrichment parameters..."))
### Prep run, go through all model elements and:
for(me in model_elements) {
### Get all HGNCs (universe for enrichment)
all_hgncs <- c(all_hgncs, sapply(me$references,
function(x) x[x$type == "HGNC_SYMBOL", "resource"]))
all_hgncs <- all_hgncs[sapply(all_hgncs,
function(x) ifelse(is.character(x) & length(x) > 0, TRUE, FALSE))]
}
N <- length(all_hgncs)
n <- length(input)
message(paste0("Going through individual models..."))
for(i in 1:length(model_elements)) {
me <- model_elements[[i]]
if(length(unique(me[me$type == "Pathway","name"])) == 0) { next }
path_pVals <- data.frame(pname = unique(me[me$type == "Pathway","name"]), pVal = 1)
### Calculating the inclusion of the elements per bioentity set
for(up in 1:nrow(path_pVals)) {
res <- apply(me[me$name == path_pVals$pname[up],"bounds"], 1,
function(x) me$bounds$x >= x["x"] & me$bounds$x <= (x["x"] + x["width"]) &
me$bounds$y >= x["y"] & me$bounds$y <= (x["y"] + x["height"]))
hgncs <- unlist(sapply(me$references[res & me$type %in% c("Protein", "RNA", "Gene")],
function(x) x[x$type == "HGNC_SYMBOL", "resource"]))
m <- length(hgncs)
k <- length(intersect(input,hgncs))
if (k > 0) {
path_pVals$pVal[up] <- (((m * (n - k)) / (k * (N - m))) ** k) * (((n * (N - m)) / (N * (n - k))) ** n)
}
}
### Bonferroni correction
# path_pVals$pVal <- path_pVals$pVal*nrow(path_pVals)
path_pVals <- path_pVals[path_pVals$pVal < 0.1, ]
if(nrow(path_pVals) > 0) {
path_pVals <- path_pVals[order(path_pVals$pVal), ]
if(!is.null(config[config$resource == "max_areas_per_map",]$value)) {
path_pVals <- path_pVals[1:config[config$resource == "max_areas_per_map",]$value, ]
}
pbounds <- cbind(name = me[me$name %in% path_pVals$pname,]$name,
me[me$name %in% path_pVals$pname,]$bounds)
pathways_to_pull <- cbind(minerva_instance = map,
project_id = project_id,
model_id = models$idObject[i],
pbounds, stringsAsFactors = F)
write.table(pathways_to_pull, file = "enriched_disease_maps.txt",
sep = "\t", quote = F, col.names = F, row.names = F, append = T)
message(paste0("Output file updated..."))
}
}
message(paste0("Done."))
}
library(enrichR)
message("Enrichment of pathways")
### Header of the enrichment table, it's consecutive segments will be written
### later below
cat(file = "enriched_pathways.txt",
"pathway_db\tTerm\tAdjusted.P.value\n",
append = F)
pathway_dbs <- config[config$type == "pathway_db","resource"]
### Run Enrichr
enriched_pathways <- enrichr(input, databases = pathway_dbs)
### Leave only the pathways with adjusted p lower than 0.1
adj_pathways <- lapply(enriched_pathways,
function(x) {
selected <- which(x$Adjusted.P.value < 0.1)
### Cap the amount of pathways to the predefined max in config
cap <- min(length(selected), config[config$resource == "max_areas_per_pathway_db",]$value)
selected <- head(selected, cap)
x[selected, ]
})
### Clean empty results
adj_pathways <- adj_pathways[sapply(adj_pathways, dim)[1,] > 0]
### Write results down in the file
for(aep in names(adj_pathways)) {
write.table(cbind(pathway_db = aep,
adj_pathways[[aep]][,c("Term","Adjusted.P.value")],
stringsAsFactors = F),
file = "enriched_pathways.txt", sep = "\t", quote = F, col.names = F, row.names = F, append = T)
}
message("Enrichment finished.")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment