---
title: colasso
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{vignette}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r,eval=FALSE}
for(rep in 1:4){
set.seed(rep)
#----- OBTAIN DATA -----
### simulated data ###
#set.seed(rep)
#n <- 100; p <- 1000
#list <- colasso::colasso_simulate(p=p,n=n,cor="constant")
#y <- list$y; X <- list$X
### mice data ###
#data(mice,package="BGLR")
#nsel <- sort(sample(seq_len(1814),size=200,replace=FALSE))
#psel <- sort(sample(seq_len(10346),size=10346,replace=FALSE))
#y <- mice.pheno$Obesity.BMI[nsel] # try different phenotypes
#X <- mice.X[nsel,psel]
### wheat data ###
data(wheat,package="BGLR")
nsel <- seq_len(599) # sort(sample(seq_len(599),size=200,replace=FALSE))
psel <- seq_len(1279) # sort(sample(seq_len(1279),size=200,replace=FALSE))
y <- as.numeric(wheat.Y[nsel,rep]) # try different phenotypes
X <- wheat.X[nsel,psel]
#----- CROSS-VALIDATE -----
loss <- colasso_compare(y=y,X=X)
# fold <- sample(x=rep(x=seq_len(5),length.out=length(y)))
# pred <- matrix(data=NA,nrow=length(y),ncol=8)
# for(i in sort(unique(fold))){
# cat("i =",i,"\n")
# fit <- colasso(y=y[fold!=i],X=X[fold!=i,],alpha=1) # increase nfold? us
# # MEM[[i]] <- fit # trial
#
# for(j in seq_along(fit)){
# pred[fold==i,j] <- glmnet::predict.glmnet(object=fit[[j]],
# newx=X[fold==i,],
# s=fit[[j]]$lambda.min,
# type="response")
# }
# pred[fold==i,8] <- mean(y[fold!=i]) # intercept-only model
# }
# loss <- apply(X=pred,MARGIN=2,FUN=function(x) sum((y-x)^2))
#graphics::par(mar=c(3,3,1,1))
#col <- rep(x=0,times=length(loss)-1)
#col[1] <- col[length(col)] <- 1
#plot(y=loss[-length(loss)],x=seq_len(length(loss)-1),
# col=col+1,pch=col) # ,ylim=range(loss))
#abline(v=c(1.5,length(loss)-1.5),lty=2)
#graphics::grid()
#abline(h=loss[length(loss)],lty=2,col="red")
}
```
# BBMRI DATA (important!)
Repeat this for all normally distributed responses, omit samples with missing response, save results to file.
```{r,eval=FALSE}
lib <- "/virdir/Scratch/arauschenberger/library"
.libPaths(lib)
devtools::install_github("rauschenberger/colasso",lib=lib)
utils::data(metabolomics_RP3RP4_overlap,package="BBMRIomics")
utils::data(rnaSeqData_ReadCounts_BIOS_cleaned,package="BBMRIomics")
samples <- intersect(colnames(counts),colnames(metabolomicData))
Y <- t(SummarizedExperiment::assays(metabolomicData[,samples])$measurements)
X <- t(SummarizedExperiment::assay(counts[,samples]))
loss <- NULL
for(j in seq_len(ncol(Y))){
cat("j =",j,"\n")
y <- Y[,j]
if(sd(y,na.rm=TRUE)==0){next}
if(TRUE){
psel <- sample(seq_len(56515),size=2000)
nsel <- sample(seq_len(2003),size=500)
y <- y[nsel]
x <- X[nsel,psel]
}
cond <- !is.na(y)
y <- scale(y[cond])
x <- x[cond,]
loss <- rbind(loss,colasso::colasso_compare(y=y,X=x))
}
min <- apply(traits,2,function(x) min(x,na.rm=TRUE))
max <- apply(traits,2,function(x) max(x,na.rm=TRUE))
var <- apply(traits,2,function(x) var(x,na.rm=TRUE))
psel <- sample(seq_len(56515),size=2000)
nsel <- sample(seq_len(2003),size=500)
y <- YY[nsel,"totfa"]
X <- XX[nsel,psel]
net <- glmnet::cv.glmnet(x=X,y=y)
plot(x=net$lambda,y=net$cvm)
# then apply colasso function !
```
```{r,eval=FALSE}
n <- 100; p <- 10
x <- matrix(rnorm(n*p),nrow=n,ncol=p)
y <- rbinom(n=n,size=1,prob=0.2)
a <- stats::glm(y~x,family="binomial")
y <- log(y/(1-y))
y[y==-Inf] <- -99e99
y[y==Inf] <- 99e99
b <- stats::glm(y~x,family="gaussian")
```