From ff55acb8cb544d475478894e9f67bc8563bf6cf7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Todor=20Kondi=C4=87?= <todor.kondic@uni.lu>
Date: Sun, 23 Jun 2019 22:23:03 +0200
Subject: [PATCH] Add RMB_EIC_prescreen_df

* R/mix.R(RMB_EIC_prescreen_df): New function. Instead of pdf file
  with plots, this one records the data.frames to laters serve as
  inputs to plotting stages.
---
 R/mix.R                        | 101 ++++++++++++++++++++++++++++++---
 man/RMB_EIC_prescreen_df.Rd    |  29 ++++++++++
 man/RMB_EIC_prescreen_intrn.Rd |   2 +-
 3 files changed, 123 insertions(+), 9 deletions(-)
 create mode 100644 man/RMB_EIC_prescreen_df.Rd

diff --git a/R/mix.R b/R/mix.R
index e2ede63..3d64763 100644
--- a/R/mix.R
+++ b/R/mix.R
@@ -314,7 +314,7 @@ v<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="ar
 ##' @param cmpd_list ...
 ##' @param ppm_limit_fine ...
 ##' @param EIC_limit ...
-##' @author Emma Schymanski
+##' @author Emma Schymanski, Todor Kondić
 RMB_EIC_prescreen_intrn <- function (archive_name, RMB_mode, FileList, cmpd_list,
                                ppm_limit_fine = 10, EIC_limit = 0.001) {
     pdf_title <- paste(archive_name, "_EICscan.pdf", sep = "")
@@ -377,6 +377,91 @@ RMB_EIC_prescreen_intrn <- function (archive_name, RMB_mode, FileList, cmpd_list
 
 
 
+##' Prescreens. Writes data out. Adapted from ReSOLUTION
+##'
+##' 
+##' @title Prescreen
+##' @param wd Absolute path to the directory that will contain the
+##'     resulting data frame.
+##' @param RMB_mode ...
+##' @param FileList ...
+##' @param cmpd_list ...
+##' @param ppm_limit_fine ...
+##' @param EIC_limit ...
+##' @author Emma Schymanski, Todor Kondić
+RMB_EIC_prescreen_df <- function (wd, RMB_mode, FileList, cmpd_list,
+                                  ppm_limit_fine = 10, EIC_limit = 0.001) {
+
+
+    n_spec <- 0
+    cmpd_RT_maxI <- ""
+    msms_found <- ""
+    rts <- 0
+    max_I_prec <- ""
+    cmpd_RT_maxI_min <- ""
+    file_list <- read.csv(FileList, stringsAsFactors = FALSE)
+    cmpd_info <- read.csv(cmpd_list, stringsAsFactors = FALSE)
+    ncmpd <- nrow(cmpd_info)
+    odir=file.path(wd,"prescreen")
+    no_drama_mkdir(odir)
+    get_width <- function(maxid) {log10(maxid)+1}
+    id_field_width <- get_width(ncmpd)
+
+    fn_out<- function(id,suff) {file.path(odir,paste(formatC(id,width=id_field_width,flag=0),suff,".csv",sep=''))}
+    f <- mzR::openMSfile(file_list$Files[1])
+    for (i in 1:length(file_list$ID)) {
+        cpdID <- file_list$ID[i]
+        n_spec <- n_spec + 1
+        smiles <- tryCatch(RMassBank::findSmiles(cpdID), error = function(e) NA)
+        if (!is.na(smiles)) {
+            mz <- as.numeric(RMassBank::findMz(cpdID, RMB_mode)[3])
+        }
+        else {
+            mz <- as.numeric(RMassBank::findMz(cpdID, RMB_mode, retrieval = "unknown")[3])
+        }
+        eic <- RMassBank::findEIC(f, mz, limit = EIC_limit)
+        msms_found[n_spec] <- FALSE
+        msms <- RMassBank::findMsMsHR.mass(f, mz, 0.5, RMassBank::ppm(mz, ppm_limit_fine, 
+                                                           p = TRUE))
+        max_I_prec_index <- which.max(eic$intensity)
+        cmpd_RT_maxI[n_spec] <- eic[max_I_prec_index, 1]
+        max_I_prec[n_spec] <- eic[max_I_prec_index, 2]
+        cmpd_RT_maxI_min[n_spec] <- as.numeric(cmpd_RT_maxI[n_spec])/60
+        ## plot.new()
+        ## plot.window(range(eic$rt), range(eic$intensity))
+        ## box()
+        ## lines(eic$intensity ~ eic$rt)
+        write.csv(x=eic[c("rt","intensity")],file=fn_out(cpdID,".eic"),row.names=F)
+        for (specs in msms) {
+            if (specs@found == TRUE) {
+                
+                df <- do.call(rbind, lapply(specs@children, function(sp) c(sp@rt, 
+                                                                           intensity = max(sp@intensity))))
+                names(df) <- c("rt","intensity")
+                write.csv(x=df,file=fn_out(cpdID,".kids"),row.names=F)
+                ## lines(intensity ~ retentionTime, data = df, type = "h", 
+                ##       col = "blue")
+                msms_found[n_spec] <- TRUE
+            }
+        }
+        ## title(main = cpdID, xlab = "RT (sec)", ylab = "Intensity")
+        ## text(as.numeric(cmpd_RT_maxI[n_spec]), as.numeric(max_I_prec[n_spec]), 
+        ##      labels = as.numeric(cmpd_RT_maxI_min[n_spec]), pos = 4)
+        ## axis(1)
+        ## axis(2)
+        ## gc()
+        rts[i] <- (cmpd_RT_maxI[n_spec])
+    }
+    # dev.off()
+    write.csv(cbind(file_list$ID, cmpd_info$mz, cmpd_info$Name, 
+                    cmpd_RT_maxI, cmpd_RT_maxI_min, max_I_prec, msms_found), 
+              file = file.path(odir,"RTs_wI.csv"), 
+              row.names = F)
+}
+
+
+
+
 
 
 
@@ -396,6 +481,7 @@ RMB_EIC_prescreen_intrn <- function (archive_name, RMB_mode, FileList, cmpd_list
 ##' @author Todor Kondić
 ##' @export
 presc.single <- function(fn_data,stgs_alist,wd,mode,fn_cmpd_l,ppm_lim_fine=10,EIC_limit=0.001,nm_arch="archive") {
+    wd <- normalizePath(wd)
     gen_stgs_and_load(fn_data,stgs_alist,wd)
     
     ## Generate and load the compound list.
@@ -406,13 +492,12 @@ presc.single <- function(fn_data,stgs_alist,wd,mode,fn_cmpd_l,ppm_lim_fine=10,EI
     ## Generate file table.
     fn_table <- gen_file_table(fn_data,n_cmpd,wd)
 
-    curd <- setwd(wd)
-    res <-RMB_EIC_prescreen_intrn(archive_name=nm_arch,RMB_mode=mode,
-                                  FileList=fn_table,
-                                  cmpd_list=fn_comp,
-                                  ppm_limit_fine=ppm_lim_fine,
-                                  EIC_limit=EIC_limit)
-    setwd(curd)
+    #curd <- setwd(wd)
+    res <-RMB_EIC_prescreen_df(wd=wd,RMB_mode=mode, FileList=fn_table,
+                               cmpd_list=fn_comp,
+                               ppm_limit_fine=ppm_lim_fine,
+                               EIC_limit=EIC_limit)
+    #setwd(curd)
     res
 
 }
diff --git a/man/RMB_EIC_prescreen_df.Rd b/man/RMB_EIC_prescreen_df.Rd
new file mode 100644
index 0000000..13016b1
--- /dev/null
+++ b/man/RMB_EIC_prescreen_df.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mix.R
+\name{RMB_EIC_prescreen_df}
+\alias{RMB_EIC_prescreen_df}
+\title{Prescreen}
+\usage{
+RMB_EIC_prescreen_df(wd, RMB_mode, FileList, cmpd_list,
+  ppm_limit_fine = 10, EIC_limit = 0.001)
+}
+\arguments{
+\item{wd}{Absolute path to the directory that will contain the
+resulting data frame.}
+
+\item{RMB_mode}{...}
+
+\item{FileList}{...}
+
+\item{cmpd_list}{...}
+
+\item{ppm_limit_fine}{...}
+
+\item{EIC_limit}{...}
+}
+\description{
+Prescreens. Writes data out. Adapted from ReSOLUTION
+}
+\author{
+Emma Schymanski, Todor Kondić
+}
diff --git a/man/RMB_EIC_prescreen_intrn.Rd b/man/RMB_EIC_prescreen_intrn.Rd
index 2374aac..ef62fcf 100644
--- a/man/RMB_EIC_prescreen_intrn.Rd
+++ b/man/RMB_EIC_prescreen_intrn.Rd
@@ -24,5 +24,5 @@ RMB_EIC_prescreen_intrn(archive_name, RMB_mode, FileList, cmpd_list,
 Prescreens. Ripped off from ReSOLUTION
 }
 \author{
-Emma Schymanski
+Emma Schymanski, Todor Kondić
 }
-- 
GitLab