diff --git a/02_Carnival_Transcriptomics_phosphoprotemicsMann.Rmd b/02_Carnival_Transcriptomics_phosphoprotemicsMann.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..4b5ff83cf6b09c402ce5f6a2c4c6653872973890 --- /dev/null +++ b/02_Carnival_Transcriptomics_phosphoprotemicsMann.Rmd @@ -0,0 +1,363 @@ +--- +title: "CARNIVAL for causal integration of Phospho and transcriptomics data after SARS-CoV-2 infection" +author: "Alberto Valdeolivas: alberto.valdeolivas@bioquant.uni-heidelberg.de; Date:" +date: "09/07/2020" +output: github_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +### License Info + +This program is free software: you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +Please check http://www.gnu.org/licenses/. + +## Introduction + +The present script computes TF activity from transcriptomics data on the A549 +human lung derived cell line after SARS-CoV-2 infection. It also takes kinase +activity derived from phosphoproteomics data in the same cell line. Both +experiments were conducted 24 hours after SARS-CoV-2 infection, but they come +from two different independent publications (see references) + +## Getting the inputs for CARNIPHAL + +We first load the required libraries and define a function to export CARNIVAL +results to cytoscape. + +```{r, message=FALSE, warning=FALSE} +library(readr) +library(DESeq2) +library(CARNIVAL) +library(OmnipathR) +library(progeny) +library(dorothea) +library(dplyr) +library(readr) +library(tidyr) +library(tibble) +library(ggplot2) + +## We also define a function to format the CARNIVAL output to cytoscape +OutputCyto <- function(CarnivalResults, outputFile) { + CarnivalNetwork <- + as.data.frame(CarnivalResults$weightedSIF, stringsAsFactors = FALSE) %>% + dplyr::mutate(Sign = as.numeric(Sign), Weight = as.numeric(Weight)) %>% + dplyr::mutate(Weight = Sign * Weight) %>% + dplyr::select(Node1, Weight, Node2) + + CarnivalNetworkNodes <- + unique(c(CarnivalNetwork$Node1,CarnivalNetwork$Node2)) + + CarnivalAttributes <- CarnivalResults$nodesAttributes %>% + as.data.frame() %>% + dplyr::filter(Node %in% CarnivalNetworkNodes) %>% + dplyr::mutate(NodeType = as.character(NodeType)) %>% + dplyr::mutate(NodeType=if_else(NodeType =="", "I", NodeType)) + + nameOutputNetwork <- paste0(outputFile, "Network.sif") + nameOutputAttributes <- paste0(outputFile, "Attributes.txt") + + write.table(CarnivalNetwork, file = nameOutputNetwork, + quote = FALSE, row.names = FALSE, col.names = FALSE, sep = " ") + + write.table(CarnivalAttributes, file = nameOutputAttributes, + quote = FALSE, row.names = FALSE, col.names = TRUE, sep = "\t") +} +``` + +### Differential expression analysis on the transcriptomics data + +We take the series 5 from the following dataset (A549 mock treated versus +SARS-CoV-2 infected): + +<https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE147507> + +which is related to the following publication: + +<https://www.cell.com/cell/pdf/S0092-8674(20)30489-X.pdf?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS009286742030489X%3Fshowall%3Dtrue> + +```{r, message=FALSE} +## Raw counts table +GSE147507_raw_counts <- + read.csv("TranscriptomicsData/GSE147507_RawReadCounts_Human.tsv", sep = "\t") + +count_A549vsCOV2_df <- GSE147507_raw_counts[,c(22:27)] +row.names(count_A549vsCOV2_df) <- GSE147507_raw_counts$X + +## Define conditions +targets_A549vsCOV2 <- + as.data.frame(matrix(NA,length(names(count_A549vsCOV2_df)),1)) +names(targets_A549vsCOV2) <- c("condition") +row.names(targets_A549vsCOV2) <- names(count_A549vsCOV2_df) +targets_A549vsCOV2$condition <- + gsub("Series5_", "", row.names(targets_A549vsCOV2)) +targets_A549vsCOV2$condition <- + factor(gsub("_[1-3]$", "", targets_A549vsCOV2$condition)) +targets_A549vsCOV2 + +## Create deseq2 object +dds_A549vsCOV2 <- + DESeqDataSetFromMatrix(countData = as.matrix(count_A549vsCOV2_df), + colData = targets_A549vsCOV2, design = ~ condition) + +## Set control +dds_A549vsCOV2$condition <- relevel(dds_A549vsCOV2$condition, + ref = levels(targets_A549vsCOV2$condition)[1]) + +## Carry out diff exp +dds_A549vsCOV2 <- DESeq(dds_A549vsCOV2) + +## See the comparisons carried out +comparison_A549vsCOV2 <- resultsNames(dds_A549vsCOV2) + +## Get results table +results_A549vsCOV2 <- + results(dds_A549vsCOV2, name=comparison_A549vsCOV2[2]) +``` + +### Pathway activity estimation using Progeny + +We first estimate the pathway activity using the Progeny package. In particular, +we compute the normalised enriched score (NEs) of the different pathways by +running Progeny using the statistic from the differential express analysis. + +```{r} +dds_A549vsCOV2_df <- as.data.frame(results_A549vsCOV2) %>% + rownames_to_column(var = "GeneID") %>% + dplyr::select(GeneID, stat) %>% + dplyr::filter(!is.na(stat)) %>% + column_to_rownames(var = "GeneID") + +pathways_A549vsCOV2_zscore <- t(progeny(as.matrix(dds_A549vsCOV2_df), + scale=TRUE, organism="Human", top = 100, perm = 10000, z_scores = TRUE)) +colnames(pathways_A549vsCOV2_zscore) <- "NES" + +## I also need to run progeny in such a way to have values between 1 and -1 to +## use as CARNIVAL input +pathways_A549vsCOV2_zscore_inputCarnival <- + t(progeny(as.matrix(dds_A549vsCOV2_df), + scale=TRUE, organism="Human", top = 100, perm = 10000, z_scores = FALSE)) +colnames(pathways_A549vsCOV2_zscore_inputCarnival) <- "Activity" +``` + +We now display the normalized enrichment scores (NES) in a bar plot. + +```{r, dpi=300} +pathways_A549vsCOV2_zscore_df <- as.data.frame(pathways_A549vsCOV2_zscore) %>% + rownames_to_column(var = "Pathway") %>% + dplyr::arrange(NES) %>% + dplyr::mutate(Pathway = factor(Pathway)) + +ggplot(pathways_A549vsCOV2_zscore_df,aes(x = reorder(Pathway, NES), y = NES)) + + geom_bar(aes(fill = NES), stat = "identity") + + scale_fill_gradient2(low = "darkblue", high = "indianred", + mid = "whitesmoke", midpoint = 0) + + theme_minimal() + + theme(axis.title = element_text(face = "bold", size = 12), + axis.text.x = + element_text(angle = 45, hjust = 1, size =10, face= "bold"), + axis.text.y = element_text(size =10, face= "bold"), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank()) + + xlab("Pathways") +``` + +### Transcription Factor activity with Dorothea and Viper + +Now, we estimate the transcription factor (TF) activity using the dorothea +package. We select Dorothea interactions with confidence level A, B and C. + +```{r} +## We load Dorothea Regulons +data(dorothea_hs, package = "dorothea") +regulons <- dorothea_hs %>% + dplyr::filter(confidence %in% c("A", "B","C")) +``` + +We run Viper using the statistic from the different expression analysis. + +```{r, message=FALSE} +dds_A549vsCOV2_stat <- as.data.frame(results_A549vsCOV2) %>% + rownames_to_column(var = "GeneID") %>% + dplyr::select(GeneID, stat) %>% + dplyr::filter(!is.na(stat)) %>% + column_to_rownames(var = "GeneID") %>% + as.matrix() + +tf_activities_A549vsCOV2_stat <- + dorothea::run_viper(as.matrix(dds_A549vsCOV2_stat), regulons, + options = list(minsize = 5, eset.filter = FALSE, + cores = 1, verbose = FALSE, nes = TRUE)) +``` + +We now display the top 25 normalized enrichment scores (NES) for the TF in a +bar plot. + +```{r, dpi=300} +tf_activities_A549vsCOV2_top25 <- tf_activities_A549vsCOV2_stat %>% + as.data.frame() %>% + rownames_to_column(var = "GeneID") %>% + dplyr::rename(NES = "stat") %>% + dplyr::top_n(25, wt = abs(NES)) %>% + dplyr::arrange(NES) %>% + dplyr::mutate(GeneID = factor(GeneID)) + +ggplot(tf_activities_A549vsCOV2_top25,aes(x = reorder(GeneID, NES), y = NES)) + + geom_bar(aes(fill = NES), stat = "identity") + + scale_fill_gradient2(low = "darkblue", high = "indianred", + mid = "whitesmoke", midpoint = 0) + + theme_minimal() + + theme(axis.title = element_text(face = "bold", size = 12), + axis.text.x = + element_text(angle = 45, hjust = 1, size =10, face= "bold"), + axis.text.y = element_text(size =10, face= "bold"), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank()) + + xlab("Transcription Factors") +``` + +### Reading Kinase Activity + +We here read the kinase activity computed by Danish Memon <memon@ebi.ac.uk> from +the phosphoproteomics data extracted from the following publication: + +<https://www.biorxiv.org/content/10.1101/2020.06.17.156455v1> + +```{r, message=FALSE} +## We take the kinase activity after 24 hours to be in line with the transcriptomics +## data +DIA_kinase_activity <- read_csv("Mann/diaCovidKinaseActMat.csv") %>% + dplyr::rename(kinase = "X1") %>% + dplyr::select(kinase, `24h`) +``` + + +### Generating the Prior Knowledge Network from Omnipath + +We use the OmnipathR package to fetch the Omnipath database and generate the +prior knowledge network. We take the signed and directed protein-protein +interactions. + +```{r} +ia_omnipath <- import_omnipath_interactions() %>% as_tibble() +ia_pwextra <- import_pathwayextra_interactions() %>% as_tibble() +ia_kinaseextra <- import_kinaseextra_interactions() %>% as_tibble() + +## We bind the datasets +interactions <- as_tibble( + bind_rows( + ia_omnipath %>% mutate(type = 'ppi'), + ia_pwextra %>% mutate(type = 'ppi'), + ia_kinaseextra %>% mutate(type = 'ppi'))) + +signed_directed_interactions <- + dplyr::filter(interactions, consensus_direction==1) %>% + filter(consensus_stimulation == 1 | consensus_inhibition == 1) %>% + dplyr::mutate(sign = if_else(consensus_stimulation==1,1,-1)) %>% + dplyr::select(source_genesymbol, sign, target_genesymbol) %>% + dplyr::rename(source ="source_genesymbol", target ="target_genesymbol") + +carnival_pkn <- signed_directed_interactions %>% + dplyr::distinct(source, target, .keep_all = TRUE) + +all_source_nodes <- unique(carnival_pkn$source) +all_target_nodes <- unique(carnival_pkn$target) +all_nodes_network <- unique(c(all_source_nodes,all_target_nodes)) +``` + +## Results + +We are going to run CARNIVAL twice using the TF activity as the CARNIVAL +measurements and the kinase activities as perturbations. In the second run, +we will additionally use pathway scores from progeny as weights for the network. +For these runs, we are going to select the top 30 TFs and the top 10 kinases. + +```{r} +top_10_kinases <- DIA_kinase_activity %>% + dplyr::filter(kinase %in% all_source_nodes) %>% + dplyr::top_n(10,abs(`24h`)) %>% + tibble::column_to_rownames(var = "kinase") %>% + t() %>% sign() %>% as.data.frame() + +top_30_tf <- tf_activities_A549vsCOV2_stat %>% + as.data.frame() %>% + rownames_to_column(var = "GeneID") %>% + dplyr::filter(GeneID %in% all_nodes_network) %>% + dplyr::arrange(desc(abs(stat))) %>% + dplyr::top_n(30, wt = abs(stat)) %>% + column_to_rownames(var = "GeneID") %>% + t() %>% as.data.frame() + +progeny_weigths <- pathways_A549vsCOV2_zscore_inputCarnival %>% + as.data.frame() %>% t() +``` + +### CARNIVAL without progeny weights + +```{r, message=FALSE, warning=FALSE, eval=FALSE} +carnival_results_noprogeny <-runCARNIVAL( + solverPath="/opt/ibm/ILOG/CPLEX_Studio129/cplex/bin/x86-64_linux/cplex", + netObj=carnival_pkn, + measObj=top_30_tf, + inputObj = top_10_kinases, + # dir_name="Carnival_Results", + # weightObj=as.data.frame(t(Kin_activity_LNCaP_noInhib_t1_EGF)), + # nodeID = 'gene', + timelimit = 900, + solver = "cplex") +saveRDS(carnival_results_noprogeny, + file = "Carnival_Results/carnival_results_noprogeny.rds") +OutputCyto(carnival_results_noprogeny, + outputFile="Carnival_Results/carnival_results_noprogeny") +``` + +Network with the results: Rectangles are the most active transcription factors +after infection and the inverse triangles are the most active kinases. Ellipses +are signaling intermediates proteins linking those kinsases and TFs. +Red means positive activity or overphosphorylation after infection and blue the +opposite. + + +<br><br> + +<br><br> + +### CARNIVAL with progeny weights + +```{r, message=FALSE, warning=FALSE, eval=FALSE} +carnival_results_withprogeny <-runCARNIVAL( + solverPath="/opt/ibm/ILOG/CPLEX_Studio129/cplex/bin/x86-64_linux/cplex", + netObj=carnival_pkn, + measObj=top_30_tf, + inputObj = top_10_kinases, + # dir_name="Carnival_Results", + weightObj=progeny_weigths, + # nodeID = 'gene', + timelimit = 900, + solver = "cplex") +saveRDS(carnival_results_withprogeny, + file = "Carnival_Results/carnival_results_withprogeny.rds") +OutputCyto(carnival_results_withprogeny, + outputFile="Carnival_Results/carnival_results_withprogeny") +``` + +<br><br> + +<br><br> + +## Session Info Details + +```{r, echo=FALSE, eval=TRUE} +sessionInfo() +``` \ No newline at end of file