Commit 3ff2cba9 authored by Armin Rauschenberger's avatar Armin Rauschenberger
Browse files

automation

parent 3bde6039
decline:::response
set.seed(1)
grid <- expand.grid(prob=seq(from=0.01,to=0.2,length.out=11),
fac=seq(from=0.5,to=2,length.out=11))
plot(grid$prob,grid$fac)
plot(grid$prob[1:50],grid$fac[1:50])
dim(grid)
set.seed(1)
grid <- expand.grid(prob=seq(from=0.01,to=0.2,length.out=11),
fac=seq(from=0.5,to=2,length.out=11))
list <- list()
for(i in seq_len(nrow(grid))){
cat(i," ")
temp <- colasso:::.simulate(n=100,p=200,prob=grid$prob[i],fac=grid$fac[i])
y <- temp$y; X <- temp$X
#fit <- colasso::bilasso(y=y,cutoff=0,X=X)
#plot(fit)
## plot(x=fit$sigma,y=fit$sigma.cvm)
## abline(v=fit$sigma.min,col="red",lty=2)
## abline(v=sd(y),col="grey",lty=2)
list[[i]] <- colasso::bilasso_compare(y=y,cutoff=0,X=X)
}
i
loss <- list()
name <- names(list[[1]])
for(i in seq_along(name)){
loss[[name[i]]] <- t(sapply(X=list,FUN=function(x) x[[name[i]]]))
}
t(sapply(loss,colMeans))
t(sapply(X=loss,FUN=function(x) apply(x,2,median)))
# change in deviance against AUC
diff <- loss$deviance[,"grid"]-loss$deviance[,"gaussian"]
median(diff); mean(diff)
wilcox.test(diff)$p.value; t.test(diff)$p.value
auc <- loss$auc[,"gaussian"]
plot(x=auc,y=diff); abline(h=0,lty=2,col="red")
bilasso <- function(y,cutoff,X,nsigma=99,sigma=NULL,nfolds=10,foldid=NULL,type.measure="deviance",logistic=TRUE,...){
#--- temporary ---
# cutoff <- 0; nsigma <- 99; sigma <- NULL; nfolds <- 10; foldid <- NULL; type.measure <- "deviance"; logistic <- TRUE
test <- list()
test$sigma <- test$pi <- test$max <- TRUE
test$grid <- TRUE
#--- checks ---
.check(x=y,type="vector")
if(all(y %in% c(0,1))){stop("Binary response.",call.=FALSE)}
.check(x=cutoff,type="scalar",min=min(y),max=max(y))
.check(x=X,type="matrix")
if(length(y)!=nrow(X)){stop("Contradictory sample size.",call.=FALSE)}
.check(x=nsigma,type="scalar",min=10)
.check(x=sigma,type="vector",min=.Machine$double.eps,null=TRUE)
.check(x=nfolds,type="scalar",min=3)
.check(x=foldid,type="vector",values=seq_len(nfolds),null=TRUE)
.check(x=type.measure,type="string",values=c("deviance","class","mse","mae","auc"))
if(!is.null(list(...)$family)){stop("Reserved argument \"family\".",call.=FALSE)}
n <- length(y)
# binarisation
z <- 1*(y > cutoff)
# fold identifiers
if(is.null(foldid)){
foldid <- palasso:::.folds(y=z,nfolds=nfolds)
}
#--- model fitting ---
fit <- list()
fit$gaussian <- glmnet::glmnet(y=y,x=X,family="gaussian",...)
if(logistic){
fit$binomial <- glmnet::glmnet(y=z,x=X,family="binomial",...)
}
#--- tuning parameters ---
fit$lambda <- fit$gaussian$lambda
nlambda <- length(fit$gaussian$lambda)
lab.sigma <- paste0("si",seq_len(nsigma))
lab.lambda <- paste0("la",seq_len(nlambda))
if(is.null(sigma)){
fit$sigma <- exp(seq(from=log(0.05*stats::sd(y)),
to=log(10*stats::sd(y)),length.out=nsigma))
} else {
fit$sigma <- sigma
}
names(fit$sigma) <- lab.sigma
names(fit$lambda) <- lab.lambda
if(test$pi){
fit$pi <- seq(from=0,to=1,length.out=100)
}
if(test$max){
fit$max <- exp(seq(from=log(0.05*max(abs(y-cutoff))),
to=log(max(abs(y-cutoff))),length.out=100))
}
#--- cross-validation ---
pred <- list()
pred$y <- matrix(data=NA,nrow=n,ncol=nlambda)
if(logistic){
pred$z <- matrix(data=NA,nrow=n,ncol=length(fit$binomial$lambda))
}
if(test$sigma){
pred$sigma <- matrix(data=NA,nrow=n,ncol=nsigma)
}
if(test$pi){
pred$pi <- matrix(data=NA,nrow=n,ncol=length(fit$pi))
}
if(test$max){
pred$max <- matrix(data=NA,nrow=n,ncol=length(fit$max))
}
if(test$grid){
dimnames <- list(NULL,lab.sigma,lab.lambda)
pred$grid <- array(data=NA,dim=c(n,nsigma,nlambda),dimnames=dimnames)
}
for(k in seq_len(nfolds)){
y0 <- y[foldid!=k]
y1 <- y[foldid==k]
z0 <- z[foldid!=k]
z1 <- z[foldid==k]
X0 <- X[foldid!=k,,drop=FALSE]
X1 <- X[foldid==k,,drop=FALSE]
# linear regression
net <- glmnet::glmnet(y=y0,x=X0,family="gaussian",...)
temp_y <- stats::predict(object=net,newx=X1,type="response",s=fit$gaussian$lambda)
pred$y[foldid==k,seq_len(ncol(temp_y))] <- temp_y
# logistic regression
if(logistic){
net <- glmnet::glmnet(y=z0,x=X0,family="binomial",...)
temp_z <- stats::predict(object=net,newx=X1,type="response",s=fit$binomial$lambda)
pred$z[foldid==k,seq_len(ncol(temp_z))] <- temp_z
}
# fusion (sigma)
if(test$sigma){
cvm <- .loss(y=y1,fit=temp_y,family="gaussian",type.measure="deviance")[[1]]
y_hat <- temp_y[,which.min(cvm)]
for(i in seq_along(fit$sigma)){
pred$sigma[foldid==k,i] <- stats::pnorm(q=y_hat,mean=cutoff,sd=fit$sigma[i])
}
}
# fusion (grid)
if(test$grid){
for(i in seq_along(fit$sigma)){
pred$grid[foldid==k,i,] <- stats::pnorm(q=temp_y,mean=cutoff,sd=fit$sigma[i])
}
}
# fusion (pi)
if(test$pi){
cvm <- .loss(y=z1,fit=temp_z,family="binomial",type.measure=type.measure)[[1]]
z_hat <- temp_z[,which.min(cvm)]
for(i in seq_along(fit$pi)){
cont <- stats::pnorm(q=y_hat,mean=cutoff,sd=stats::sd(y))
pred$pi[foldid==k,i] <- fit$pi[i]*cont + (1-fit$pi[i])*z_hat
}
}
# fusion (max)
if(test$max){
for(i in seq_along(fit$max)){
temp <- ((y_hat-cutoff)/fit$max[i] + 1)/2
pred$max[foldid==k,i] <- pmax(0,pmin(temp,1))
}
}
}
#--- evaluation ---
# deviance (not comparable between Gaussian and binomial families)
fit$gaussian$cvm <- .loss(y=y,fit=pred$y,family="gaussian",type.measure="deviance")[[1]]
fit$gaussian$lambda.min <- fit$gaussian$lambda[which.min(fit$gaussian$cvm)]
if(logistic){
fit$binomial$cvm <- .loss(y=z,fit=pred$z,family="binomial",type.measure=type.measure)[[1]]
fit$binomial$lambda.min <- fit$binomial$lambda[which.min(fit$binomial$cvm)]
}
if(test$sigma){
fit$sigma.cvm <- .loss(y=z,fit=pred$sigma,family="binomial",type.measure=type.measure)[[1]]
fit$sigma.min <- fit$sigma[which.min(fit$sigma.cvm)]
}
#graphics::plot(x=fit$sigma,y=fit$sigma.cvm)
#graphics::abline(v=fit$sigma.min,col="red",lty=2)
#graphics::abline(v=stats::sd(y),col="grey",lty=2)
if(test$pi){
fit$pi.cvm <- .loss(y=z,fit=pred$pi,family="binomial",type.measure=type.measure)[[1]] # trial
fit$pi.min <- fit$pi[which.min(fit$pi.cvm)] # trial
}
if(test$max){
fit$max.cvm <- .loss(y=z,fit=pred$max,family="binomial",type.measure=type.measure)[[1]] # trial
fit$max.min <- fit$max[which.min(fit$max.cvm)] # trial
}
if(test$grid){
dimnames <- list(lab.sigma,lab.lambda)
fit$cvm <- matrix(data=NA,nrow=nsigma,ncol=nlambda,dimnames=dimnames)
for(i in seq_len(nsigma)){
for(j in seq_len(nlambda)){
fit$cvm[i,j] <- .loss(y=z,fit=pred$grid[,i,j],family="binomial",type.measure=type.measure)[[1]]
}
}
temp <- which(fit$cvm==min(fit$cvm),arr.ind=TRUE)
if(nrow(temp)>1){warning("MULTIPLE!",call.=FALSE);temp <- temp[1,]}
fit$grid.min <- data.frame(sigma=fit$sigma[temp[,"row"]],lambda=fit$gaussian$lambda[temp[,"col"]])
}
#--- return ---
fit$cutoff <- cutoff
fit$info <- list(type.measure=type.measure,
sd.y=stats::sd(y),
table=table(z))
class(fit) <- "bilasso"
return(fit)
}
n <- 100; p <- 200
y <- rnorm(n)
X <- matrix(rnorm(n*p),nrow=n,ncol=p)
net <- bilasso(y=y,cutoff=0,X=X,alpha=1,nlambda=50)
library(colasso)
bilasso <- function(y,cutoff,X,nsigma=99,sigma=NULL,nfolds=10,foldid=NULL,type.measure="deviance",logistic=TRUE,...){
#--- temporary ---
# cutoff <- 0; nsigma <- 99; sigma <- NULL; nfolds <- 10; foldid <- NULL; type.measure <- "deviance"; logistic <- TRUE
test <- list()
test$sigma <- test$pi <- test$max <- TRUE
test$grid <- TRUE
#--- checks ---
.check(x=y,type="vector")
if(all(y %in% c(0,1))){stop("Binary response.",call.=FALSE)}
.check(x=cutoff,type="scalar",min=min(y),max=max(y))
.check(x=X,type="matrix")
if(length(y)!=nrow(X)){stop("Contradictory sample size.",call.=FALSE)}
.check(x=nsigma,type="scalar",min=10)
.check(x=sigma,type="vector",min=.Machine$double.eps,null=TRUE)
.check(x=nfolds,type="scalar",min=3)
.check(x=foldid,type="vector",values=seq_len(nfolds),null=TRUE)
.check(x=type.measure,type="string",values=c("deviance","class","mse","mae","auc"))
if(!is.null(list(...)$family)){stop("Reserved argument \"family\".",call.=FALSE)}
n <- length(y)
# binarisation
z <- 1*(y > cutoff)
# fold identifiers
if(is.null(foldid)){
foldid <- palasso:::.folds(y=z,nfolds=nfolds)
}
#--- model fitting ---
fit <- list()
fit$gaussian <- glmnet::glmnet(y=y,x=X,family="gaussian",...)
if(logistic){
fit$binomial <- glmnet::glmnet(y=z,x=X,family="binomial",...)
}
#--- tuning parameters ---
fit$lambda <- fit$gaussian$lambda
nlambda <- length(fit$gaussian$lambda)
lab.sigma <- paste0("si",seq_len(nsigma))
lab.lambda <- paste0("la",seq_len(nlambda))
if(is.null(sigma)){
fit$sigma <- exp(seq(from=log(0.05*stats::sd(y)),
to=log(10*stats::sd(y)),length.out=nsigma))
} else {
fit$sigma <- sigma
}
names(fit$sigma) <- lab.sigma
names(fit$lambda) <- lab.lambda
if(test$pi){
fit$pi <- seq(from=0,to=1,length.out=100)
}
if(test$max){
fit$max <- exp(seq(from=log(0.05*max(abs(y-cutoff))),
to=log(max(abs(y-cutoff))),length.out=100))
}
#--- cross-validation ---
pred <- list()
pred$y <- matrix(data=NA,nrow=n,ncol=nlambda)
if(logistic){
pred$z <- matrix(data=NA,nrow=n,ncol=length(fit$binomial$lambda))
}
if(test$sigma){
pred$sigma <- matrix(data=NA,nrow=n,ncol=nsigma)
}
if(test$pi){
pred$pi <- matrix(data=NA,nrow=n,ncol=length(fit$pi))
}
if(test$max){
pred$max <- matrix(data=NA,nrow=n,ncol=length(fit$max))
}
if(test$grid){
dimnames <- list(NULL,lab.sigma,lab.lambda)
pred$grid <- array(data=NA,dim=c(n,nsigma,nlambda),dimnames=dimnames)
}
for(k in seq_len(nfolds)){
y0 <- y[foldid!=k]
y1 <- y[foldid==k]
z0 <- z[foldid!=k]
z1 <- z[foldid==k]
X0 <- X[foldid!=k,,drop=FALSE]
X1 <- X[foldid==k,,drop=FALSE]
# linear regression
net <- glmnet::glmnet(y=y0,x=X0,family="gaussian",...)
temp_y <- stats::predict(object=net,newx=X1,type="response",s=fit$gaussian$lambda)
pred$y[foldid==k,seq_len(ncol(temp_y))] <- temp_y
# logistic regression
if(logistic){
net <- glmnet::glmnet(y=z0,x=X0,family="binomial",...)
temp_z <- stats::predict(object=net,newx=X1,type="response",s=fit$binomial$lambda)
pred$z[foldid==k,seq_len(ncol(temp_z))] <- temp_z
}
# fusion (sigma)
if(test$sigma){
cvm <- .loss(y=y1,fit=temp_y,family="gaussian",type.measure="deviance")[[1]]
y_hat <- temp_y[,which.min(cvm)]
for(i in seq_along(fit$sigma)){
pred$sigma[foldid==k,i] <- stats::pnorm(q=y_hat,mean=cutoff,sd=fit$sigma[i])
}
}
# fusion (grid)
if(test$grid){
for(i in seq_along(fit$sigma)){
pred$grid[foldid==k,i,] <- stats::pnorm(q=temp_y,mean=cutoff,sd=fit$sigma[i])
}
}
# fusion (pi)
if(test$pi){
cvm <- .loss(y=z1,fit=temp_z,family="binomial",type.measure=type.measure)[[1]]
z_hat <- temp_z[,which.min(cvm)]
for(i in seq_along(fit$pi)){
cont <- stats::pnorm(q=y_hat,mean=cutoff,sd=stats::sd(y))
pred$pi[foldid==k,i] <- fit$pi[i]*cont + (1-fit$pi[i])*z_hat
}
}
# fusion (max)
if(test$max){
for(i in seq_along(fit$max)){
temp <- ((y_hat-cutoff)/fit$max[i] + 1)/2
pred$max[foldid==k,i] <- pmax(0,pmin(temp,1))
}
}
}
#--- evaluation ---
# deviance (not comparable between Gaussian and binomial families)
fit$gaussian$cvm <- .loss(y=y,fit=pred$y,family="gaussian",type.measure="deviance")[[1]]
fit$gaussian$lambda.min <- fit$gaussian$lambda[which.min(fit$gaussian$cvm)]
if(logistic){
fit$binomial$cvm <- .loss(y=z,fit=pred$z,family="binomial",type.measure=type.measure)[[1]]
fit$binomial$lambda.min <- fit$binomial$lambda[which.min(fit$binomial$cvm)]
}
if(test$sigma){
fit$sigma.cvm <- .loss(y=z,fit=pred$sigma,family="binomial",type.measure=type.measure)[[1]]
fit$sigma.min <- fit$sigma[which.min(fit$sigma.cvm)]
}
#graphics::plot(x=fit$sigma,y=fit$sigma.cvm)
#graphics::abline(v=fit$sigma.min,col="red",lty=2)
#graphics::abline(v=stats::sd(y),col="grey",lty=2)
if(test$pi){
fit$pi.cvm <- .loss(y=z,fit=pred$pi,family="binomial",type.measure=type.measure)[[1]] # trial
fit$pi.min <- fit$pi[which.min(fit$pi.cvm)] # trial
}
if(test$max){
fit$max.cvm <- .loss(y=z,fit=pred$max,family="binomial",type.measure=type.measure)[[1]] # trial
fit$max.min <- fit$max[which.min(fit$max.cvm)] # trial
}
if(test$grid){
dimnames <- list(lab.sigma,lab.lambda)
fit$cvm <- matrix(data=NA,nrow=nsigma,ncol=nlambda,dimnames=dimnames)
for(i in seq_len(nsigma)){
for(j in seq_len(nlambda)){
fit$cvm[i,j] <- .loss(y=z,fit=pred$grid[,i,j],family="binomial",type.measure=type.measure)[[1]]
}
}
temp <- which(fit$cvm==min(fit$cvm),arr.ind=TRUE)
if(nrow(temp)>1){warning("MULTIPLE!",call.=FALSE);temp <- temp[1,]}
fit$grid.min <- data.frame(sigma=fit$sigma[temp[,"row"]],lambda=fit$gaussian$lambda[temp[,"col"]])
}
#--- return ---
fit$cutoff <- cutoff
fit$info <- list(type.measure=type.measure,
sd.y=stats::sd(y),
table=table(z))
class(fit) <- "bilasso"
return(fit)
}
n <- 100; p <- 200
y <- rnorm(n)
X <- matrix(rnorm(n*p),nrow=n,ncol=p)
net <- bilasso(y=y,cutoff=0,X=X,alpha=1,nlambda=50)
.loss <- colasso:::.loss
n <- 100; p <- 200
y <- rnorm(n)
X <- matrix(rnorm(n*p),nrow=n,ncol=p)
net <- bilasso(y=y,cutoff=0,X=X,alpha=1,nlambda=50)
names(net)
set.seed(1)
#toydata <- NULL
#save(toydata,file=file.path("C:/Users/armin.rauschenberger/Desktop/package/colasso/data/toydata.R"))
#--- compile package -----------------------------------------------------------
rm(list=ls())
name <- "colasso"
#load("D:/colasso/package/toydata.RData")
pkg <- "C:/Users/armin.rauschenberger/Desktop/colasso/colasso"
setwd(dir=pkg)
devtools::as.package(x=pkg,create=FALSE)
devtools::load_all(path=pkg)
#usethis::use_data(toydata,overwrite=TRUE)
devtools::document(pkg=pkg)
unlink(file.path(pkg,"vignettes","figure"),recursive=TRUE)
all <- dir(file.path(pkg,"vignettes"))
#delete <- "..."
#sapply(delete,function(x) file.remove(file.path(pkg,"vignettes",x)))
setwd(dir=pkg)
unlink(file.path(pkg,"docs"),recursive=TRUE)
pkgdown::build_site(pkg=pkg)
file.remove(file.path(pkg,".Rbuildignore"))
usethis::use_build_ignore(files=c("Readme.Rmd",".travis.yml","_pkgdown.yml","docs","cran-comments.md","appveyor.yml"))
devtools::check(pkg=pkg,quiet=FALSE,manual=TRUE)
#devtools::build(pkg=pkg)
devtools::build(pkg=pkg,binary=TRUE) # only for zip file
.rs.restartR()
pkg <- "C:/Users/armin.rauschenberger/Desktop/colasso/colasso"
version <- substring(text=readLines(file.path(pkg,"DESCRIPTION"))[[2]],first=10)
archive <- paste0("C:/Users/armin.rauschenberger/Desktop/colasso/colasso_",version,".zip")
utils::install.packages(archive,repos=NULL)
.rs.restartR()
pkg <- "C:/Users/armin.rauschenberger/Desktop/colasso/colasso"
version <- substring(text=readLines(file.path(pkg,"DESCRIPTION"))[[2]],first=10)
archive <- paste0("C:/Users/armin.rauschenberger/Desktop/colasso/colasso_",version,".zip")
utils::install.packages(archive,repos=NULL)
set.seed(1)
grid <- expand.grid(prob=seq(from=0.01,to=0.2,length.out=11),
fac=seq(from=0.5,to=2,length.out=11))
dim(grid)
grid <- grid[sample(seq_len(nrow(grid))),]
head(grid)
set.seed(1)
grid <- expand.grid(prob=seq(from=0.01,to=0.2,length.out=11),
fac=seq(from=0.5,to=2,length.out=11))
grid <- grid[sample(seq_len(nrow(grid))),]
list <- list()
for(i in seq_len(nrow(grid))){
cat(i," ")
temp <- colasso:::.simulate(n=100,p=200,prob=grid$prob[i],fac=grid$fac[i])
y <- temp$y; X <- temp$X
#fit <- colasso::bilasso(y=y,cutoff=0,X=X)
#plot(fit)
## plot(x=fit$sigma,y=fit$sigma.cvm)
## abline(v=fit$sigma.min,col="red",lty=2)
## abline(v=sd(y),col="grey",lty=2)
list[[i]] <- colasso::bilasso_compare(y=y,cutoff=0,X=X)
}
loss <- list()
name <- names(list[[1]])
for(i in seq_along(name)){
loss[[name[i]]] <- t(sapply(X=list,FUN=function(x) x[[name[i]]]))
}
set.seed(1)
grid <- expand.grid(prob=seq(from=0.01,to=0.2,length.out=11),
fac=seq(from=0.5,to=2,length.out=11))
grid <- grid[sample(seq_len(nrow(grid))),]
list <- list()
for(i in seq_len(nrow(grid))){
cat(i," ")
temp <- colasso:::.simulate(n=100,p=200,prob=grid$prob[i],fac=grid$fac[i])
y <- temp$y; X <- temp$X
#fit <- colasso::bilasso(y=y,cutoff=0,X=X)
#plot(fit)
## plot(x=fit$sigma,y=fit$sigma.cvm)
## abline(v=fit$sigma.min,col="red",lty=2)
## abline(v=sd(y),col="grey",lty=2)
list[[i]] <- colasso::bilasso_compare(y=y,cutoff=0,X=X)
}
loss <- list()
name <- names(list[[1]])
for(i in seq_along(name)){
loss[[name[i]]] <- t(sapply(X=list,FUN=function(x) x[[name[i]]]))
}
......@@ -3,7 +3,7 @@ Version: 0.0.0
Title: Stable Sparse Regression
Description: Implements colasso.
Depends: R (>= 3.0.0)
Imports: glmnet, MASS, weights, palasso
Imports: glmnet, MASS, weights, palasso, colorspace
Suggests: knitr, testthat
Authors@R: person("Armin","Rauschenberger",email="a.rauschenberger@vumc.nl",role=c("aut","cre"))
VignetteBuilder: knitr
......
# Generated by roxygen2: do not edit by hand
export()
export(.check)
export(bilasso)
export(bilasso_compare)
export(colasso)
......
......@@ -19,11 +19,6 @@
#' numeric matrix with \eqn{n} rows (samples)
#' and \eqn{p} columns (variables)
#'
#' @param alpha
#' elastic net parameter\strong{:}
#' numeric between \eqn{0} (ridge)
#' and \eqn{1} (lasso)
#'
#' @param foldid
#' fold identifiers\strong{:}
#' vector with entries between \eqn{1} and \code{nfolds};
......@@ -49,32 +44,43 @@
#' logical
#' (currently all methods require \code{TRUE})
#'
#' @param ...
#' further arguments passed to \code{\link[glmnet]{glmnet}}
#'
#' @details
#' INCLUDE note on deviance (not comparable between lin and log models)
#' - INCLUDE note on deviance (not comparable between lin and log models)
#' - alpha: elastic net parameter\strong{:}
#' numeric between \eqn{0} (ridge) and \eqn{1} (lasso)
#' - do not use "family"
#'
#' @examples
#' n <- 100; p <- 200
#' y <- rnorm(n)
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
#' bilasso(y=y,cutoff=0,X=X)
#' net <- bilasso(y=y,cutoff=0,X=X,alpha=1,nlambda=50)
#'
bilasso <- function(y,cutoff,X,alpha=1,nfolds=10,foldid=NULL,type.measure="deviance",nsigma=100,sigma=NULL,logistic=TRUE){
#cutoff <- 0; alpha <- 1; nfolds <- 10; foldid <- NULL;
# type.measure <- "deviance"; nsigma <- 100; sigma <- NULL; logistic <- TRUE
bilasso <- function(y,cutoff,X,nsigma=99,sigma=NULL,nfolds=10,foldid=NULL,type.measure="deviance",logistic=TRUE,...){
#--- temporary ---
# cutoff <- 0; nsigma <- 99; sigma <- NULL; nfolds <- 10; foldid <- NULL; type.measure <- "deviance"; logistic <- TRUE
test <- list()
test$sigma <- test$pi <- test$max <- TRUE
test$grid <- TRUE
test$grid2 <- FALSE
# checks
.check(x=y,type="vector")
#--- checks ---
colasso:::.check(x=y,type="vector")
if(all(y %in% c(0,1))){stop("Binary response.",call.=FALSE)}
.check(x=cutoff,type="scalar",min=min(y),max=max(y))
.check(x=X,type="matrix")
.check(x=alpha,type="scalar",min=0,max=1)
.check(x=nfolds,type="scalar",min=3)
.check(x=foldid,type="vector",values=seq_len(nfolds),null=TRUE)
.check(x=type.measure,type="string",values=c("deviance","class","mse","mae","auc"))
.check(x=nsigma,type="scalar",min=10)
.check(x=sigma,type="vector",min=.Machine$double.eps,null=TRUE)
colasso:::.check(x=cutoff,type="scalar",min=min(y),max=max(y))
colasso:::.check(x=X,type="matrix")
if(length(y)!=nrow(X)){stop("Contradictory sample size.",call.=FALSE)}
colasso:::.check(x=nsigma,type="scalar",min=10)
colasso:::.check(x=sigma,type="vector",min=.Machine$double.eps,null=TRUE)
colasso:::.check(x=nfolds,type="scalar",min=3)
colasso:::.check(x=foldid,type="vector",values=seq_len(nfolds),null=TRUE)
colasso:::.check(x=type.measure,type="string",values=c("deviance","class","mse","mae","auc"))
if(!is.null(list(...)$family)){stop("Reserved argument \"family\".",call.=FALSE)}
n <- length(y)
# binarisation
z <- 1*(y > cutoff)
......@@ -84,36 +90,62 @@ bilasso <- function(y,cutoff,X,alpha=1,nfolds=10,foldid=NULL,type.measure="devia
foldid <- palasso:::.folds(y=z,nfolds=nfolds)
}
# model fitting
#--- model fitting ---
fit <- list()
fit$gaussian <- glmnet::glmnet(y=y,x=X,family="gaussian",alpha=alpha)
fit$gaussian <- glmnet::glmnet(y=y,x=X,family="gaussian",...)
if(logistic){
fit$binomial <- glmnet::glmnet(y=z,x=X,family="binomial",alpha=alpha)
fit$binomial <- glmnet::glmnet(y=z,x=X,family="binomial",...)
}
# weights
#--- tuning parameters ---
fit$lambda <- fit$gaussian$lambda
nlambda <- length(fit$gaussian$lambda)
lab.sigma <- paste0("si",seq_len(nsigma))
lab.lambda <- paste0("la",seq_len(nlambda))
if(is.null(sigma)){
fit$sigma <- exp(seq(from=log(0.05*stats::sd(y)),
to=log(10*stats::sd(y)),length.out=nsigma)) # was 2*
to=log(10*stats::sd(y)),length.out=nsigma))
} else {
fit$sigma <- sigma
}
fit$pi <- seq(from=0,to=1,length.out=100) # trial
fit$max <- exp(seq(from=log(0.05*max(abs(y-cutoff))),