02_Carnival_Transcriptomics_phosphoprotemicsMann.md

library(readr)
library(DESeq2)
library(CARNIVAL)
library(OmnipathR)
library(progeny)
library(dorothea)
library(dplyr)
library(readr)
library(tidyr)
library(tibble)
library(ggplot2)

## We also define a function to format the CARNIVAL output to cytoscape
OutputCyto <- function(CarnivalResults, outputFile) {
    CarnivalNetwork <-
        as.data.frame(CarnivalResults$weightedSIF, stringsAsFactors = FALSE) %>%
        dplyr::mutate(Sign = as.numeric(Sign), Weight = as.numeric(Weight)) %>%
        dplyr::mutate(Weight = Sign * Weight) %>%
        dplyr::select(Node1, Weight, Node2)

    CarnivalNetworkNodes <-
        unique(c(CarnivalNetwork$Node1,CarnivalNetwork$Node2))

    CarnivalAttributes <- CarnivalResults$nodesAttributes %>%
        as.data.frame() %>%
        dplyr::filter(Node %in% CarnivalNetworkNodes) %>%
        dplyr::mutate(NodeType = as.character(NodeType)) %>%
        dplyr::mutate(NodeType=if_else(NodeType =="", "I", NodeType))

    nameOutputNetwork <- paste0(outputFile, "Network.sif")
    nameOutputAttributes <-  paste0(outputFile, "Attributes.txt")

    write.table(CarnivalNetwork, file = nameOutputNetwork,
        quote = FALSE, row.names = FALSE, col.names = FALSE, sep = " ")

    write.table(CarnivalAttributes, file = nameOutputAttributes,
        quote = FALSE, row.names = FALSE, col.names = TRUE, sep = "\t")
}
## Raw counts table
GSE147507_raw_counts <-
    read.csv("TranscriptomicsData/GSE147507_RawReadCounts_Human.tsv", sep = "\t")

count_A549vsCOV2_df <- GSE147507_raw_counts[,c(22:27)]
row.names(count_A549vsCOV2_df) <- GSE147507_raw_counts$X

## Define conditions
targets_A549vsCOV2 <-
    as.data.frame(matrix(NA,length(names(count_A549vsCOV2_df)),1))
names(targets_A549vsCOV2) <- c("condition")
row.names(targets_A549vsCOV2) <- names(count_A549vsCOV2_df)
targets_A549vsCOV2$condition <-
    gsub("Series5_", "", row.names(targets_A549vsCOV2))
targets_A549vsCOV2$condition <-
    factor(gsub("_[1-3]$", "", targets_A549vsCOV2$condition))
targets_A549vsCOV2
##                                 condition
## Series5_A549_Mock_1             A549_Mock
## Series5_A549_Mock_2             A549_Mock
## Series5_A549_Mock_3             A549_Mock
## Series5_A549_SARS.CoV.2_1 A549_SARS.CoV.2
## Series5_A549_SARS.CoV.2_2 A549_SARS.CoV.2
## Series5_A549_SARS.CoV.2_3 A549_SARS.CoV.2
## Create deseq2 object
dds_A549vsCOV2 <-
    DESeqDataSetFromMatrix(countData = as.matrix(count_A549vsCOV2_df),
    colData = targets_A549vsCOV2, design = ~ condition)

## Set control
dds_A549vsCOV2$condition <- relevel(dds_A549vsCOV2$condition,
    ref = levels(targets_A549vsCOV2$condition)[1])

## Carry out diff exp
dds_A549vsCOV2 <- DESeq(dds_A549vsCOV2)

## See the comparisons carried out
comparison_A549vsCOV2 <- resultsNames(dds_A549vsCOV2)

## Get results table
results_A549vsCOV2 <-
    results(dds_A549vsCOV2, name=comparison_A549vsCOV2[2])
dds_A549vsCOV2_df <- as.data.frame(results_A549vsCOV2) %>%
    rownames_to_column(var = "GeneID") %>%
    dplyr::select(GeneID, stat) %>%
    dplyr::filter(!is.na(stat)) %>%
    column_to_rownames(var = "GeneID")

pathways_A549vsCOV2_zscore <- t(progeny(as.matrix(dds_A549vsCOV2_df),
    scale=TRUE, organism="Human", top = 100, perm = 10000, z_scores = TRUE))
colnames(pathways_A549vsCOV2_zscore) <- "NES"

## I also need to run progeny in such a way to have values between 1 and -1 to
## use as CARNIVAL input
pathways_A549vsCOV2_zscore_inputCarnival <-
  t(progeny(as.matrix(dds_A549vsCOV2_df),
    scale=TRUE, organism="Human", top = 100, perm = 10000, z_scores = FALSE))
colnames(pathways_A549vsCOV2_zscore_inputCarnival) <- "Activity"
pathways_A549vsCOV2_zscore_df <- as.data.frame(pathways_A549vsCOV2_zscore) %>%
    rownames_to_column(var = "Pathway") %>%
    dplyr::arrange(NES) %>%
    dplyr::mutate(Pathway = factor(Pathway))

ggplot(pathways_A549vsCOV2_zscore_df,aes(x = reorder(Pathway, NES), y = NES)) +
    geom_bar(aes(fill = NES), stat = "identity") +
    scale_fill_gradient2(low = "darkblue", high = "indianred",
        mid = "whitesmoke", midpoint = 0) +
    theme_minimal() +
    theme(axis.title = element_text(face = "bold", size = 12),
        axis.text.x =
            element_text(angle = 45, hjust = 1, size =10, face= "bold"),
        axis.text.y = element_text(size =10, face= "bold"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) +
    xlab("Pathways")
## We load Dorothea Regulons
data(dorothea_hs, package = "dorothea")
regulons <- dorothea_hs %>%
  dplyr::filter(confidence %in% c("A", "B","C"))
dds_A549vsCOV2_stat <-  as.data.frame(results_A549vsCOV2) %>%
    rownames_to_column(var = "GeneID") %>%
    dplyr::select(GeneID, stat) %>%
    dplyr::filter(!is.na(stat)) %>%
    column_to_rownames(var = "GeneID") %>%
    as.matrix()

tf_activities_A549vsCOV2_stat <-
    dorothea::run_viper(as.matrix(dds_A549vsCOV2_stat), regulons,
    options =  list(minsize = 5, eset.filter = FALSE,
    cores = 1, verbose = FALSE, nes = TRUE))
tf_activities_A549vsCOV2_top25 <- tf_activities_A549vsCOV2_stat %>%
    as.data.frame() %>%
    rownames_to_column(var = "GeneID") %>%
    dplyr::rename(NES = "stat") %>%
    dplyr::top_n(25, wt = abs(NES)) %>%
    dplyr::arrange(NES) %>%
    dplyr::mutate(GeneID = factor(GeneID))

ggplot(tf_activities_A549vsCOV2_top25,aes(x = reorder(GeneID, NES), y = NES)) +
    geom_bar(aes(fill = NES), stat = "identity") +
    scale_fill_gradient2(low = "darkblue", high = "indianred",
        mid = "whitesmoke", midpoint = 0) +
    theme_minimal() +
    theme(axis.title = element_text(face = "bold", size = 12),
        axis.text.x =
            element_text(angle = 45, hjust = 1, size =10, face= "bold"),
        axis.text.y = element_text(size =10, face= "bold"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) +
    xlab("Transcription Factors")
## We take the kinase activity after 24 hours to be in line with the transcriptomics
## data
DIA_kinase_activity <- read_csv("Mann/diaCovidKinaseActMat.csv") %>%
    dplyr::rename(kinase = "X1") %>%
    dplyr::select(kinase, `24h`)
## Warning: Missing column names filled in: 'X1' [1]
ia_omnipath <- import_omnipath_interactions() %>% as_tibble()
## Downloaded 36684 interactions.
ia_pwextra <- import_pathwayextra_interactions() %>% as_tibble()
## Downloaded 44955 interactions.
ia_kinaseextra <- import_kinaseextra_interactions() %>% as_tibble()
## Downloaded 22338 interactions.
## We bind the datasets
interactions <- as_tibble(
    bind_rows(
        ia_omnipath %>% mutate(type = 'ppi'),
        ia_pwextra %>% mutate(type = 'ppi'),
        ia_kinaseextra %>% mutate(type = 'ppi')))

signed_directed_interactions <-
    dplyr::filter(interactions, consensus_direction==1) %>%
    filter(consensus_stimulation == 1 | consensus_inhibition == 1) %>%
    dplyr::mutate(sign = if_else(consensus_stimulation==1,1,-1))  %>%
    dplyr::select(source_genesymbol, sign,  target_genesymbol) %>%
    dplyr::rename(source ="source_genesymbol", target ="target_genesymbol")

carnival_pkn <- signed_directed_interactions %>%
    dplyr::distinct(source, target, .keep_all = TRUE)

all_source_nodes <- unique(carnival_pkn$source)
all_target_nodes <- unique(carnival_pkn$target)
all_nodes_network <- unique(c(all_source_nodes,all_target_nodes))
top_10_kinases <- DIA_kinase_activity %>%
    dplyr::filter(kinase %in% all_source_nodes) %>%
    dplyr::top_n(10,abs(`24h`)) %>%
    tibble::column_to_rownames(var = "kinase") %>%
    t() %>% sign() %>% as.data.frame()

top_30_tf <- tf_activities_A549vsCOV2_stat %>%
  as.data.frame() %>%
  rownames_to_column(var = "GeneID") %>%
  dplyr::filter(GeneID %in% all_nodes_network) %>%
  dplyr::arrange(desc(abs(stat))) %>%
  dplyr::top_n(30, wt = abs(stat)) %>%
  column_to_rownames(var = "GeneID") %>%
  t() %>% as.data.frame()

progeny_weigths <- pathways_A549vsCOV2_zscore_inputCarnival %>%
    as.data.frame() %>% t()
carnival_results_noprogeny <-runCARNIVAL(
    solverPath="/opt/ibm/ILOG/CPLEX_Studio129/cplex/bin/x86-64_linux/cplex",
    netObj=carnival_pkn,
    measObj=top_30_tf,
    inputObj = top_10_kinases,
    # dir_name="Carnival_Results",
    # weightObj=as.data.frame(t(Kin_activity_LNCaP_noInhib_t1_EGF)),
    # nodeID = 'gene',
    timelimit = 900,
    solver = "cplex")
saveRDS(carnival_results_noprogeny,
    file = "Carnival_Results/carnival_results_noprogeny.rds")
OutputCyto(carnival_results_noprogeny,
    outputFile="Carnival_Results/carnival_results_noprogeny")
carnival_results_withprogeny <-runCARNIVAL(
    solverPath="/opt/ibm/ILOG/CPLEX_Studio129/cplex/bin/x86-64_linux/cplex",
    netObj=carnival_pkn,
    measObj=top_30_tf,
    inputObj = top_10_kinases,
    # dir_name="Carnival_Results",
    weightObj=progeny_weigths,
    # nodeID = 'gene',
    timelimit = 900,
    solver = "cplex")
saveRDS(carnival_results_withprogeny,
    file = "Carnival_Results/carnival_results_withprogeny.rds")
OutputCyto(carnival_results_withprogeny,
    outputFile="Carnival_Results/carnival_results_withprogeny")
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 19.10
##
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.3.7.so
##
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets
## [8] methods   base
##
## other attached packages:
##  [1] ggplot2_3.3.1               tibble_3.0.1
##  [3] tidyr_1.1.0                 dplyr_1.0.0
##  [5] dorothea_1.0.0              progeny_1.11.1
##  [7] OmnipathR_1.3.2             jsonlite_1.6.1
##  [9] igraph_1.2.5                CARNIVAL_1.0.1
## [11] DESeq2_1.28.1               SummarizedExperiment_1.18.1
## [13] DelayedArray_0.14.0         matrixStats_0.56.0
## [15] Biobase_2.48.0              GenomicRanges_1.40.0
## [17] GenomeInfoDb_1.24.0         IRanges_2.22.2
## [19] S4Vectors_0.26.1            BiocGenerics_0.34.0
## [21] readr_1.3.1
##
## loaded via a namespace (and not attached):
##  [1] segmented_1.1-0        Category_2.54.0        bitops_1.0-6
##  [4] bit64_0.9-7            doParallel_1.0.15      RColorBrewer_1.1-2
##  [7] httr_1.4.1             tools_4.0.1            R6_2.4.1
## [10] KernSmooth_2.23-17     DBI_1.1.0              colorspace_1.4-1
## [13] withr_2.2.0            gridExtra_2.3          tidyselect_1.1.0
## [16] bit_1.1-15.2           curl_4.3               compiler_4.0.1
## [19] graph_1.66.0           labeling_0.3           scales_1.1.1
## [22] genefilter_1.70.0      RBGL_1.64.0            rappdirs_0.3.1
## [25] stringr_1.4.0          digest_0.6.25          mixtools_1.2.0
## [28] rmarkdown_2.2          XVector_0.28.0         pkgconfig_2.0.3
## [31] htmltools_0.4.0        bcellViper_1.24.0      dbplyr_1.4.4
## [34] rlang_0.4.6            RSQLite_2.2.0          farver_2.0.3
## [37] generics_0.0.2         BiocParallel_1.22.0    viper_1.22.0
## [40] RCurl_1.98-1.2         magrittr_1.5           GenomeInfoDbData_1.2.3
## [43] Matrix_1.2-18          Rcpp_1.0.4.6           munsell_0.5.0
## [46] lifecycle_0.2.0        stringi_1.4.6          yaml_2.2.1
## [49] UniProt.ws_2.28.0      MASS_7.3-51.6          zlibbioc_1.34.0
## [52] BiocFileCache_1.12.0   grid_4.0.1             blob_1.2.1
## [55] ggrepel_0.8.2          crayon_1.3.4           lattice_0.20-41
## [58] splines_4.0.1          annotate_1.66.0        hms_0.5.3
## [61] locfit_1.5-9.4         knitr_1.28             pillar_1.4.4
## [64] geneplotter_1.66.0     codetools_0.2-16       lpSolve_5.6.15
## [67] XML_3.99-0.3           glue_1.4.1             evaluate_0.14
## [70] vctrs_0.3.1            foreach_1.5.0          gtable_0.3.0
## [73] purrr_0.3.4            kernlab_0.9-29         assertthat_0.2.1
## [76] xfun_0.14              xtable_1.8-4           e1071_1.7-3
## [79] class_7.3-17           survival_3.1-12        iterators_1.0.12
## [82] AnnotationDbi_1.50.0   memoise_1.1.0          ellipsis_0.3.1
## [85] GSEABase_1.50.1