Commit 5c0f5614 authored by Jessy Krier's avatar Jessy Krier
Browse files

updated

parent 4f880cf2
No preview for this file type
This diff is collapsed.
55.054557800293 14314.017578125
55.421501159668 1594.11499023438
55.934642791748 1989.79821777344
56.0578956604004 8391.193359375
57.0611991882324 4140.24462890625
58.2792625427246 1574.638671875
59.070858001709 1647.42504882812
65.0387344360352 2938.79125976562
67.0543746948242 95398.171875
68.057746887207 60264.2734375
68.9948959350586 6967.8701171875
69.0610656738281 50580.453125
69.0633087158203 2483.72241210938
69.0700149536133 29038.25
82.0652694702148 1996.55346679688
83.0730056762695 5261.56103515625
84.0763549804688 4204.4443359375
85.6368103027344 1908.17797851562
86.0682373046875 4991.62255859375
87.0691833496094 8605.146484375
90.5813293457031 1764.54455566406
90.9477233886719 2393.32763671875
100.112274169922 2844.05493164062
100.324928283691 1840.66369628906
104.963439941406 1795.708984375
105.934967041016 3734.04541015625
108.088996887207 2644.48388671875
111.063453674316 4465.2392578125
112.063743591309 4231.171875
112.067008972168 8237.796875
114.01634979248 6801.482421875
114.091552734375 5603.6123046875
114.158195495605 1487.27111816406
117.128952026367 1726.23388671875
123.945701599121 3056.9296875
126.016159057617 14277.69921875
127.014137268066 2415.611328125
127.019668579102 21932.98828125
128.020294189453 33987.97265625
132.062240600586 4996.99853515625
134.068725585938 2849.08178710938
135.041961669922 3269.79223632812
140.061935424805 1636.56213378906
149.961563110352 4293.00830078125
150.026748657227 10776.16796875
152.05989074707 2327.41040039062
153.035263061523 8152.33935546875
154.036468505859 14229.0634765625
160.057312011719 2425.34033203125
162.052764892578 4334.99462890625
162.063385009766 5118.87744140625
164.042449951172 5814.64990234375
164.059646606445 5386.94580078125
164.070770263672 3628.26391601562
167.971588134766 2058.6484375
169.09098815918 1922.24096679688
182.009826660156 6990.3681640625
182.068939208984 1322706
182.118209838867 11180.181640625
182.154190063477 30946.845703125
182.190567016602 34722.609375
This diff is collapsed.
sel_ms2_files<-function(ftable,dirMS2) {
patt<-function(id,mode,tag,set) {
paste('0*',id,'_',mode,'_',tag,'_',set,'.csv',sep='')
}
gen_ms2_spec_fn <- function(id,tag,mode,set,width=6) {
suppressWarnings({
iid<-as.numeric(id)
iid<- if (!is.na(iid)) iid else id
num <- formatC(iid,width = width,format='d',flag='0')
ss<-trimws(paste(num,mode,tag,set,sep="_"),which='both')
paste(ss,".csv",sep='')
})
}
filt_state <- ftable[ftable$MS1 & ftable$MS2 & ftable$Alignment & ftable$AboveNoise & !is.na(ftable$MS2rt),]
ptrns <- patt(filt_state$ID,
filt_state$mode,
filt_state$tag,
filt_state$set)
## allfiles<-list.files(path=dirMS2,full.names = T)
thefiles <- apply(filt_state,1,function(row) file.path(dirMS2,gen_ms2_spec_fn(row['ID'],
row['tag'],
row['mode'],
row['set'])))
filt_state$MS2File<-thefiles
filt_state
}
file2tab <- function(...) read.csv(...,stringsAsFactors = F)
# Processing of Jessy's MS/MS Data from shinyscreen with MetFrag
# Emma Schymanski & Jessy Krier 09/03/2019
# Modified from C:\DATA\LCSB\KU_Denmark\KU_MetFrag_Phine.R
library(ReSOLUTION)
library(RChemMass)
library(parallel)
## **** DEPENDENCIES ****
path_to_ft2mf <- "/Users/jessykrier/Dokumente/Uni.lu/2019_SWPEST/MetFrag/"
source(file.path(path_to_ft2mf,'ft2mf.R'))
## **** INPUTS ****
## this is the summary file you've made (e.g. ftable.csv)
summary_file <- "/Users/jessykrier/Dokumente/Uni.lu/2019_SWPEST/POS/201904_SWPEST_ftable_state.csv"
summary_results_file <- sub(".csv","_MetFrag_CompTox_Results.csv",summary_file)
## Define the directories.
dirMS2 <- "/Users/jessykrier/Dokumente/Uni.lu/2019_SWPEST/POS/MS2/"
RMB_dir <- "/Users/jessykrier/Dokumente/Uni.lu/2019_SWPEST/MetFrag"
setwd(RMB_dir)
MetFrag_results_dir <- file.path(RMB_dir,"201904_SWPEST_MetFrag_POS")
dir.create(MetFrag_results_dir)
## Local database file of CompTox
localDB <- "/Users/jessykrier/Dokumente/Uni.lu/2019_SWPEST/MetFrag/CompTox_17March19_KUMetaData.csv"
## Location where MetFrag command line version is saved (the jar file)
MetFrag_dir <- "/Users/jessykrier/Dokumente/Uni.lu/2019_SWPEST/MetFrag/"
## Filter the ftable so that it contains only the good spectra.
ftable <-file2tab(summary_file)
cmpd_info <- sel_ms2_files(ftable,dirMS2=dirMS2)
run_name <- basename(RMB_dir)
## **** END INPUTS ****
## As default, isPos is TRUE. It will be tested later.
isPos <- TRUE
## Set up new columns
cmpd_info$msms_avail <- FALSE
cmpd_info$msms_peaks <- ""
## Basic MetFrag summary
cmpd_info$num_poss_IDs <- ""
cmpd_info$max_Score <- ""
cmpd_info$n_Score_GE7 <- ""
cmpd_info$n_Score_GE5 <- ""
cmpd_info$n_Score_GE4 <- ""
## Summary of top candidate and max metadata values
cmpd_info$SMILES_maxScore <- ""
cmpd_info$Name_maxScore <- ""
cmpd_info$ExplPeaks_maxScore <- ""
## Max of individual categories
## MetFrag
cmpd_info$max_NoExplPeaks <- ""
cmpd_info$NumberPeaksUsed <- ""
cmpd_info$max_FragmenterScore <- ""
## MoNA
#cmpd_info$max_MetFusion <- ""
cmpd_info$max_MoNAIndiv <- ""
## CompTox data
cmpd_info$max_PUBMED_ARTICLES <- ""
cmpd_info$max_DATA_SOURCES <- ""
cmpd_info$max_PUBCHEM_SOURCES <- ""
cmpd_info$max_CPDAT_COUNT <- ""
cmpd_info$max_TOXCAST_PERCENT_ACTIVE <- ""
cmpd_info$max_EXPOSURE <- ""
## Jessy's extras: European Exposure and Hazard scores (above are US ones)
cmpd_info$max_KEMI_EXPO <- ""
cmpd_info$max_KEMI_HAZ <- ""
## Summary of scores over all candidates
cmpd_info$cand_DTXCIDs <- ""
cmpd_info$cand_DTXSIDs <- ""
#cmpd_info$poss_CAS[i] <- paste(MetFrag_res$CASRN_DTXSID,collapse=";")
cmpd_info$cand_Scores <- ""
cmpd_info$cand_NoExplPeaks <- ""
cmpd_info$cand_FragmenterScore <- ""
## MoNA
#cmpd_info$cand_MetFusion <- ""
cmpd_info$cand_MoNAIndiv <- ""
## CompTox data
cmpd_info$cand_PUBMED_ARTICLES <- ""
cmpd_info$cand_DATA_SOURCES <- ""
cmpd_info$cand_PUBCHEM_SOURCES <- ""
cmpd_info$cand_CPDAT_COUNT <- ""
cmpd_info$cand_TOXCAST_PERCENT_ACTIVE <- ""
cmpd_info$cand_EXPOSURE <- ""
## Jessy's extras
cmpd_info$cand_KEMI_EXPO <- ""
cmpd_info$cand_KEMI_HAZ <- ""
#### The actual MetFrag analysis ----------
## Test the loop with one i
#i <- 1
## Test whether isPos stays TRUE or is set to FALSE (by default TRUE)
count1 <- 0
for (i in 1:nrow(cmpd_info)) {
#for (i in i:100) {
## Retrieve data we need
cmpdID <- paste0(cmpd_info$ID[i],"_", cmpd_info$tag[i], "_",cmpd_info$set[i])
ExactMass <- cmpd_info$mz[i] # note this is the CHARGED mass [important for later!]
## Test if we have MSMS
## Read in the MS/MS file path from cmpd_info (spectrafile)
#msms_file <- list.files(msms_dir, pattern=cmpd_info$FileName[i],full.names = T)
msms_file <- cmpd_info$MS2File[i]
if (cmpd_info$mode[i] == "mH") {
isPos <- FALSE
} else {
isPos <- TRUE #this is a bit useless as we set it globally above, but for the record ..
}
if (nchar(msms_file)<1) {
cmpd_info$msms_peaks[i] <- "N/A"
msms_file <- paste0(MetFrag_dir,"dummy_msms.txt")
msms_peaks <- read.csv(msms_file)
## write peaks to MetFrag temp file
MetFrag_msms <- paste0(RMB_dir,"MetFrag_temp_msms.txt")
write.table(msms_peaks,MetFrag_msms,quote=F,row.names=F, col.names = F)
} else {
cmpd_info$msms_avail[i] <- TRUE
#msms_file <- msms_file[1]
msms_peaks <- file2tab(msms_file)
MetFrag_msms <- file.path(RMB_dir,"MetFrag_temp_msms.txt")
write.table(msms_peaks,MetFrag_msms,quote=F,row.names=F, col.names = F)
conv_msms_peaks <- read.table(MetFrag_msms,stringsAsFactors = F)
msms_peak_line <- ""
for (j in seq_along(conv_msms_peaks$V2)) {
msms_peak_line <- paste((paste(conv_msms_peaks[j,c(1,2)],collapse=":")),msms_peak_line ,sep=";")
}
cmpd_info$msms_peaks[i] <- msms_peak_line
}
## Run MetFrag and extract results for reporting into cmpd_info
LocalCSVScoreTerms <- "DATA_SOURCES,NUMBER_OF_PUBMED_ARTICLES,PUBCHEM_DATA_SOURCES,EXPOCAST_MEDIAN_EXPOSURE_PREDICTION_MG/KG-BW/DAY,TOXCAST_PERCENT_ACTIVE,CPDAT_COUNT,KEMIMARKET_EXPO,KEMIMARKET_HAZ"
LocalCSVScoreWeights <- ",1,1,1,1,1,1,1,1"
results_filename <- paste0(run_name,"_",cmpdID)
if (isPos) {
config_file <- MetFragConfig(mass = ExactMass, adduct_type = "[M+H]+", neutralPrecursorMass=FALSE,
results_filename = results_filename, ppm=10, mzabs=0.001, frag_ppm=5,
peaklist_path = MetFrag_msms, base_dir = MetFrag_results_dir,
DB = "LocalCSV", localDB_path=localDB,useMonaIndiv = T,useMoNAMetFusion = F,
IsPosMode = TRUE,filter_by_InChIKey = T,
UDS_Category = LocalCSVScoreTerms, UDS_Weights = LocalCSVScoreWeights)
} else {
config_file <- MetFragConfig(mass = ExactMass, adduct_type = "[M-H]-", neutralPrecursorMass=FALSE,
results_filename = results_filename, ppm=10, mzabs=0.001, frag_ppm=5,
peaklist_path = MetFrag_msms, base_dir = MetFrag_results_dir,
DB = "LocalCSV", localDB_path=localDB,useMonaIndiv = T,useMoNAMetFusion = F,
IsPosMode = FALSE,filter_by_InChIKey = T,
UDS_Category = LocalCSVScoreTerms, UDS_Weights = LocalCSVScoreWeights)
}
runMetFrag(config_file, MetFrag_dir, CL_name = "MetFrag2.4.5-CL.jar")
results_file <- paste0(MetFrag_results_dir,"/results/",results_filename,".xls")
#file.exists(results_file)
MetFrag_res <- suppressWarnings(read_excel(results_file))
## Test if there are results
if (length(MetFrag_res)==0) {
cmpd_info$num_poss_IDs[i] <- 0
} else {
index_maxScore <- 1 #which(max(as.numeric(MetFrag_res$Score))==as.numeric(MetFrag_res$Score))
## Number of candidates and score summaries
cmpd_info$num_poss_IDs[i] <- length(MetFrag_res$Score)
cmpd_info$max_Score[i] <- max(as.numeric(MetFrag_res$Score))
cmpd_info$n_Score_GE7[i] <- length(which(as.numeric(MetFrag_res$Score)>=7))
cmpd_info$n_Score_GE5[i] <- length(which(as.numeric(MetFrag_res$Score)>=5))
cmpd_info$n_Score_GE4[i] <- length(which(as.numeric(MetFrag_res$Score)>=4))
## Summary of top candidate and max metadata values
cmpd_info$SMILES_maxScore[i] <- MetFrag_res$SMILES[index_maxScore]
cmpd_info$Name_maxScore[i] <- MetFrag_res$CompoundName[index_maxScore]
cmpd_info$ExplPeaks_maxScore[i] <- MetFrag_res$ExplPeaks[index_maxScore]
## Max of individual categories
## MetFrag
cmpd_info$max_NoExplPeaks[i] <- max(as.numeric(MetFrag_res$NoExplPeaks))
cmpd_info$NumberPeaksUsed[i] <- max(as.numeric(MetFrag_res$NumberPeaksUsed))
cmpd_info$max_FragmenterScore[i] <- max(as.numeric(MetFrag_res$FragmenterScore))
## MoNA
#cmpd_info$max_MetFusion[i] <- max(as.numeric(MetFrag_res$OfflineMetFusionScore))
cmpd_info$max_MoNAIndiv[i] <- max(as.numeric(MetFrag_res$OfflineIndividualMoNAScore))
## CompTox data
cmpd_info$max_PUBMED_ARTICLES[i] <- suppressWarnings(pmax(as.numeric(MetFrag_res$NUMBER_OF_PUBMED_ARTICLES),na.rm=T))
cmpd_info$max_DATA_SOURCES[i] <- suppressWarnings(pmax(as.numeric(MetFrag_res$DATA_SOURCES),na.rm=T))
cmpd_info$max_PUBCHEM_SOURCES[i] <- suppressWarnings(pmax(as.numeric(MetFrag_res$PUBCHEM_DATA_SOURCES),na.rm=T))
cmpd_info$max_CPDAT_COUNT[i] <- suppressWarnings(pmax(as.numeric(MetFrag_res$CPDAT_COUNT),na.rm=T))
cmpd_info$max_TOXCAST_PERCENT_ACTIVE[i] <- suppressWarnings(pmax(as.numeric(MetFrag_res$TOXCAST_PERCENT_ACTIVE),na.rm=T))
cmpd_info$max_EXPOSURE[i] <- suppressWarnings(pmax(as.numeric(MetFrag_res$`EXPOCAST_MEDIAN_EXPOSURE_PREDICTION_MG/KG-BW/DAY`),na.rm=T))
##Jessy's extras
cmpd_info$max_KEMI_EXPO[i] <- suppressWarnings(pmax(as.numeric(MetFrag_res$KEMIMARKET_EXPO),na.rm=T))
cmpd_info$max_KEMI_HAZ[i] <- suppressWarnings(pmax(as.numeric(MetFrag_res$KEMIMARKET_HAZ),na.rm=T))
## Summary of scores over all candidates
cmpd_info$cand_DTXCIDs[i] <- paste(MetFrag_res$DTXCID_INDIVIDUAL_COMPONENT,collapse=";")
cmpd_info$cand_DTXSIDs[i] <- paste(MetFrag_res$MAPPED_DTXSID,collapse=";")
#cmpd_info$poss_CAS[i] <- paste(MetFrag_res$CASRN_DTXSID,collapse=";")
cmpd_info$cand_Scores[i] <- paste(MetFrag_res$Score,collapse=";")
cmpd_info$cand_NoExplPeaks[i] <- paste(MetFrag_res$NoExplPeaks,collapse=";")
cmpd_info$cand_FragmenterScore[i] <- paste(MetFrag_res$FragmenterScore,collapse=";")
## MoNA
#cmpd_info$cand_MetFusion[i] <- paste(MetFrag_res$OfflineMetFusionScore,collapse=";")
cmpd_info$cand_MoNAIndiv[i] <- paste(MetFrag_res$OfflineIndividualMoNAScore,collapse=";")
## CompTox data
cmpd_info$cand_PUBMED_ARTICLES[i] <- paste(MetFrag_res$NUMBER_OF_PUBMED_ARTICLES,collapse=";")
cmpd_info$cand_DATA_SOURCES[i] <- paste(MetFrag_res$DATA_SOURCES,collapse=";")
cmpd_info$cand_PUBCHEM_SOURCES[i] <- paste(MetFrag_res$PUBCHEM_DATA_SOURCES,collapse=";")
cmpd_info$cand_CPDAT_COUNT[i] <- paste(MetFrag_res$CPDAT_COUNT,collapse=";")
cmpd_info$cand_TOXCAST_PERCENT_ACTIVE[i] <- paste(MetFrag_res$TOXCAST_PERCENT_ACTIVE,collapse=";")
cmpd_info$cand_EXPOSURE[i] <- paste(MetFrag_res$`EXPOCAST_MEDIAN_EXPOSURE_PREDICTION_MG/KG-BW/DAY`,collapse=";")
## Jessy's Extras
cmpd_info$cand_KEMI_EXPO[i] <- paste(MetFrag_res$KEMIMARKET_EXPO,collapse=";")
cmpd_info$cand_KEMI_HAZ[i] <- paste(MetFrag_res$KEMIMARKET_HAZ,collapse=";")
}
}
print(count1)
write.csv(cmpd_info,summary_results_file,row.names=F)
gc()
closeAllConnections()
options(browser="firefox")
detach("package:shinyscreen",unload=T)
system("R CMD INSTALL /home/ecijk/Documents/repos/shinyscreen")
library(shinyscreen)
launch()
shinyscreen_package @ 82209daf
Subproject commit 82209daf32f281c12f61f000021ed944262779e0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment