--- title: colasso output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vignette} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r,eval=FALSE} for(rep in 1:4){ set.seed(rep) #----- OBTAIN DATA ----- ### simulated data ### #set.seed(rep) #n <- 100; p <- 1000 #list <- colasso::colasso_simulate(p=p,n=n,cor="constant") #y <- list$y; X <- list$X ### mice data ### #data(mice,package="BGLR") #nsel <- sort(sample(seq_len(1814),size=200,replace=FALSE)) #psel <- sort(sample(seq_len(10346),size=10346,replace=FALSE)) #y <- mice.pheno$Obesity.BMI[nsel] # try different phenotypes #X <- mice.X[nsel,psel] ### wheat data ### data(wheat,package="BGLR") nsel <- seq_len(599) # sort(sample(seq_len(599),size=200,replace=FALSE)) psel <- seq_len(1279) # sort(sample(seq_len(1279),size=200,replace=FALSE)) y <- as.numeric(wheat.Y[nsel,rep]) # try different phenotypes X <- wheat.X[nsel,psel] #----- CROSS-VALIDATE ----- loss <- colasso_compare(y=y,X=X) # fold <- sample(x=rep(x=seq_len(5),length.out=length(y))) # pred <- matrix(data=NA,nrow=length(y),ncol=8) # for(i in sort(unique(fold))){ # cat("i =",i,"\n") # fit <- colasso(y=y[fold!=i],X=X[fold!=i,],alpha=1) # increase nfold? us # # MEM[[i]] <- fit # trial # # for(j in seq_along(fit)){ # pred[fold==i,j] <- glmnet::predict.glmnet(object=fit[[j]], # newx=X[fold==i,], # s=fit[[j]]$lambda.min, # type="response") # } # pred[fold==i,8] <- mean(y[fold!=i]) # intercept-only model # } # loss <- apply(X=pred,MARGIN=2,FUN=function(x) sum((y-x)^2)) #graphics::par(mar=c(3,3,1,1)) #col <- rep(x=0,times=length(loss)-1) #col[1] <- col[length(col)] <- 1 #plot(y=loss[-length(loss)],x=seq_len(length(loss)-1), # col=col+1,pch=col) # ,ylim=range(loss)) #abline(v=c(1.5,length(loss)-1.5),lty=2) #graphics::grid() #abline(h=loss[length(loss)],lty=2,col="red") } ``` # BBMRI DATA (important!) Repeat this for all normally distributed responses, omit samples with missing response, save results to file. ```{r,eval=FALSE} lib <- "/virdir/Scratch/arauschenberger/library" .libPaths(lib) devtools::install_github("rauschenberger/colasso",lib=lib) utils::data(metabolomics_RP3RP4_overlap,package="BBMRIomics") utils::data(rnaSeqData_ReadCounts_BIOS_cleaned,package="BBMRIomics") samples <- intersect(colnames(counts),colnames(metabolomicData)) Y <- t(SummarizedExperiment::assays(metabolomicData[,samples])$measurements) X <- t(SummarizedExperiment::assay(counts[,samples])) loss <- NULL for(j in seq_len(ncol(Y))){ cat("j =",j,"\n") y <- Y[,j] if(sd(y,na.rm=TRUE)==0){next} if(TRUE){ psel <- sample(seq_len(56515),size=2000) nsel <- sample(seq_len(2003),size=500) y <- y[nsel] x <- X[nsel,psel] } cond <- !is.na(y) y <- scale(y[cond]) x <- x[cond,] loss <- rbind(loss,colasso::colasso_compare(y=y,X=x)) } min <- apply(traits,2,function(x) min(x,na.rm=TRUE)) max <- apply(traits,2,function(x) max(x,na.rm=TRUE)) var <- apply(traits,2,function(x) var(x,na.rm=TRUE)) psel <- sample(seq_len(56515),size=2000) nsel <- sample(seq_len(2003),size=500) y <- YY[nsel,"totfa"] X <- XX[nsel,psel] net <- glmnet::cv.glmnet(x=X,y=y) plot(x=net$lambda,y=net$cvm) # then apply colasso function ! ``` ```{r,eval=FALSE} n <- 100; p <- 10 x <- matrix(rnorm(n*p),nrow=n,ncol=p) y <- rbinom(n=n,size=1,prob=0.2) a <- stats::glm(y~x,family="binomial") y <- log(y/(1-y)) y[y==-Inf] <- -99e99 y[y==Inf] <- 99e99 b <- stats::glm(y~x,family="gaussian") ```