Gitlab is now using https://gitlab.lcsb.uni.lu as it's primary address. Please update your bookmarks. FAQ.

Commit 92ac219c authored by Armin Rauschenberger's avatar Armin Rauschenberger
Browse files

automation

parent 55267e0c
Package: cornet
Version: 0.0.1
Title: Elastic Net for Dichotomised Outcomes
Description: Implements lasso and ridge regression for dichotomised outcomes.
Description: Implements lasso and ridge regression for dichotomised outcomes. (Such outcomes are not naturally but artificially binary.)
Depends: R (>= 3.0.0)
Imports: glmnet
Imports: glmnet, palasso
Suggests: knitr, testthat
Enhances: RColorBrewer
Authors@R: person("Armin","Rauschenberger",email="a.rauschenberger@vumc.nl",role=c("aut","cre"))
......@@ -11,5 +11,5 @@ VignetteBuilder: knitr
License: GPL-3
LazyData: true
RoxygenNote: 6.1.0
URL: https://github.com/rauschenberger/colasso
BugReports: https://github.com/rauschenberger/colasso/issues
URL: https://github.com/rauschenberger/cornet
BugReports: https://github.com/rauschenberger/cornet/issues
......@@ -3,6 +3,5 @@
S3method(coef,cornet)
S3method(plot,cornet)
S3method(predict,cornet)
export(.compare)
export(.test)
S3method(print,cornet)
export(cornet)
......@@ -7,7 +7,8 @@
#' Logistic regression with a continuous response
#'
#' @description
#' Implements logistic regression with a continuous response.
#' Implements logistic regression with a continuous response
#' in high-dimensional settings.
#'
#' @param y
#' continuous response\strong{:}
......@@ -15,7 +16,7 @@
#'
#' @param cutoff
#' cutoff point for dichotomising response into classes\strong{:}
#' value between \code{min(y)} and \code{max(y)}
#' \emph{meaningful} value between \code{min(y)} and \code{max(y)}
#'
#' @param X
#' covariates\strong{:}
......@@ -44,7 +45,7 @@
#' or \code{NULL} (default sequence)
#'
#' @param npi
#' number of \code{pi} values
#' number of \code{pi} values (weighting)
#'
#' @param sigma
#' sigma sequence\strong{:}
......@@ -52,26 +53,26 @@
#' or \code{NULL} (default sequence)
#'
#' @param nsigma
#' number of \code{sigma} values
#' number of \code{sigma} values (scaling)
#'
#' @param ...
#' further arguments passed to \code{\link[glmnet]{glmnet}}
#'
#' @details
#' This function fits a \code{"gaussian"} model for the numeric response,
#' and a \code{"binomial"} model for the binary response,
#' This function fits a \emph{gaussian} model for the numeric response,
#' and a \emph{binomial} model for the binary response,
#' meaning that the \code{glmnet} argument \code{family} is unavailable.
#' Also if \code{type.measure} equals \code{"deviance"},
#' Even if \code{type.measure} equals \code{"deviance"},
#' the loss is uncomparable between linear and logistic regression.
#'
#' @return
#' Returns an object of class \code{cornet}, a list with multiple slots:
#' \itemize{
#' \item \code{"gaussian"}: fitted linear model, class \code{glmnet}
#' \item \code{"binomial"}: fitted logistic model, class \code{glmnet}
#' \item \code{"sigma"}: scaling parameters \code{sigma},
#' \item \code{gaussian}: fitted linear model, class \code{glmnet}
#' \item \code{binomial}: fitted logistic model, class \code{glmnet}
#' \item \code{sigma}: scaling parameters \code{sigma},
#' vector of length \code{nsigma}
#' \item \code{"pi"}: weighting parameters \code{pi},
#' \item \code{pi}: weighting parameters \code{pi},
#' vector of length \code{npi}
#' \item \code{cvm}: evaluation loss,
#' matrix with \code{nsigma} rows and \code{npi} columns
......@@ -82,6 +83,11 @@
#' \item \code{cutoff}: threshold for dichotomisation
#' }
#'
#' @seealso
#' Methods for objects of class \code{cornet} include
#' \code{\link[=coef.cornet]{coef}} and
#' \code{\link[=predict.cornet]{predict}}.
#'
#' @examples
#' n <- 100; p <- 200
#' y <- rnorm(n)
......@@ -96,19 +102,19 @@ cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfold
test$combined <- TRUE
#--- checks ---
cornet:::.check(x=y,type="vector")
.check(x=y,type="vector")
if(all(y %in% c(0,1))){warning("Binary response.",call.=FALSE)}
cornet:::.check(x=cutoff,type="scalar",min=min(y),max=max(y))
cornet:::.check(x=X,type="matrix")
cornet:::.check(x=alpha,type="scalar",min=0,max=1)
.check(x=cutoff,type="scalar",min=min(y),max=max(y))
.check(x=X,type="matrix")
.check(x=alpha,type="scalar",min=0,max=1)
if(length(y)!=nrow(X)){stop("Contradictory sample size.",call.=FALSE)}
cornet:::.check(x=npi,type="scalar",min=1)
cornet:::.check(x=pi,type="vector",min=0,max=1,null=TRUE)
cornet:::.check(x=nsigma,type="scalar",min=1)
cornet:::.check(x=sigma,type="vector",min=.Machine$double.eps,null=TRUE)
cornet:::.check(x=nfolds,type="scalar",min=3)
cornet:::.check(x=foldid,type="vector",values=seq_len(nfolds),null=TRUE)
cornet:::.check(x=type.measure,type="string",values=c("deviance","class","mse","mae")) # not auc (min/max confusion)
.check(x=npi,type="scalar",min=1)
.check(x=pi,type="vector",min=0,max=1,null=TRUE)
.check(x=nsigma,type="scalar",min=1)
.check(x=sigma,type="vector",min=.Machine$double.eps,null=TRUE)
.check(x=nfolds,type="scalar",min=3)
.check(x=foldid,type="vector",values=seq_len(nfolds),null=TRUE)
.check(x=type.measure,type="string",values=c("deviance","class","mse","mae")) # not auc (min/max confusion)
if(!is.null(list(...)$family)){stop("Reserved argument \"family\".",call.=FALSE)}
n <- length(y)
......@@ -117,7 +123,7 @@ cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfold
# fold identifiers
if(is.null(foldid)){
foldid <- cornet:::.folds(y=z,nfolds=nfolds)
foldid <- palasso:::.folds(y=z,nfolds=nfolds)
}
#--- model fitting ---
......@@ -171,14 +177,14 @@ cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfold
net <- glmnet::glmnet(y=y0,x=X0,family="gaussian",alpha=alpha,...)
temp_y <- stats::predict(object=net,newx=X1,type="response",s=fit$gaussian$lambda)
pred$y[foldid==k,seq_len(ncol(temp_y))] <- temp_y
cvm <- cornet:::.loss(y=y1,fit=temp_y,family="gaussian",type.measure="deviance")[[1]]
cvm <- palasso:::.loss(y=y1,fit=temp_y,family="gaussian",type.measure="deviance")[[1]]
y_hat <- temp_y[,which.min(cvm)]
# logistic regression
net <- glmnet::glmnet(y=z0,x=X0,family="binomial",alpha=alpha,...)
temp_z <- stats::predict(object=net,newx=X1,type="response",s=fit$binomial$lambda)
pred$z[foldid==k,seq_len(ncol(temp_z))] <- temp_z
cvm <- cornet:::.loss(y=z1,fit=temp_z,family="binomial",type.measure=type.measure)[[1]]
cvm <- palasso:::.loss(y=z1,fit=temp_z,family="binomial",type.measure=type.measure)[[1]]
z_hat <- temp_z[,which.min(cvm)]
# combined regression
......@@ -196,11 +202,11 @@ cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfold
#--- evaluation ---
# linear loss
fit$gaussian$cvm <- cornet:::.loss(y=y,fit=pred$y,family="gaussian",type.measure="deviance")[[1]]
fit$gaussian$cvm <- palasso:::.loss(y=y,fit=pred$y,family="gaussian",type.measure="deviance")[[1]]
fit$gaussian$lambda.min <- fit$gaussian$lambda[which.min(fit$gaussian$cvm)]
# logistic loss
fit$binomial$cvm <- cornet:::.loss(y=z,fit=pred$z,family="binomial",type.measure=type.measure)[[1]]
fit$binomial$cvm <- palasso:::.loss(y=z,fit=pred$z,family="binomial",type.measure=type.measure)[[1]]
fit$binomial$lambda.min <- fit$binomial$lambda[which.min(fit$binomial$cvm)]
# combined loss
......@@ -209,7 +215,7 @@ cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfold
fit$cvm <- matrix(data=NA,nrow=nsigma,ncol=npi,dimnames=dimnames)
for(i in seq_len(nsigma)){
for(j in seq_len(npi)){
fit$cvm[i,j] <- cornet:::.loss(y=z,fit=pred$combined[,i,j],family="binomial",type.measure=type.measure)[[1]]
fit$cvm[i,j] <- palasso:::.loss(y=z,fit=pred$combined[,i,j],family="binomial",type.measure=type.measure)[[1]]
}
}
temp <- which(fit$cvm==min(fit$cvm),arr.ind=TRUE,useNames=TRUE)
......@@ -223,7 +229,10 @@ cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfold
fit$cutoff <- cutoff
fit$info <- list(type.measure=type.measure,
sd.y=stats::sd(y),
"+"=sum(z==1),
"-"=sum(z==0),
table=table(z),
n=n,p=ncol(X),
test=as.data.frame(test))
class(fit) <- "cornet"
......@@ -232,6 +241,43 @@ cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfold
#--- Methods -------------------------------------------------------------------
#' @export
#' @title
#' Combined regression
#'
#' @description
#' Prints summary of cornet object.
#'
#' @param x
#' \link[cornet]{cornet} object
#'
#' @param ...
#' further arguments (not applicable)
#'
#' @return
#' Returns sample size \eqn{n},
#' number of covariates \eqn{p},
#' information on dichotomisation,
#' tuned scaling parameter (sigma),
#' tuned weighting parameter (pi),
#' and corresponding loss.
#'
#' @examples
#' NA
#'
print.cornet <- function(x,...){
cat("cornet object:\n")
cat(paste0("n = ",x$info$n,","," p = ",x$info$p),"\n")
cat(paste0("z = I(y > ",signif(x$cutoff,digits=2),"): "))
cat(paste0(x$info$"+","+"," vs ",x$info$"-","-"),"\n")
cat(paste0("sigma.min = ",signif(x$sigma.min,digits=1)),"\n")
cat(paste0("pi.min = ",round(x$pi.min,digits=2)),"\n")
type <- x$info$type.measure
value <- signif(x$cvm[names(x$sigma.min),names(x$pi.min)],digits=2)
cat(paste0(type," = ",value))
return(invisible(NULL))
}
#' @export
#' @title
#' Extract estimated coefficients
......@@ -270,14 +316,13 @@ coef.cornet <- function(object,...){
return(coef)
}
#' @export
#' @title
#' Plot loss matrix
#'
#' @description
#' Plots the loss for difference combinations of
#' the weight (pi) and scale (sigma) paramters.
#' Plots the loss for different combinations of
#' scaling (sigma) and weighting (pi) parameters.
#'
#' @param x
#' \link[cornet]{cornet} object
......@@ -411,9 +456,114 @@ predict.cornet <- function(object,newx,type="probability",...){
return(as.data.frame(frame))
}
#--- Internal functions --------------------------------------------------------
#' @title
#' Arguments
#'
#' @description
#' Verifies whether two or more arguments are identical.
#'
#' @param ...
#' scalars, vectors, or matrices of equal dimensions
#'
#' @param na.rm
#' remove missing values\strong{:}
#' logical
#'
#' @examples
#' cornet:::.equal(1,1,1)
#'
.equal <- function(...,na.rm=FALSE){
list <- list(...)
cond <- vapply(X=list,
FUN=function(x) all(x==list[[1]],na.rm=na.rm),
FUN.VALUE=logical(1))
if(any(!cond)){
stop("Unequal elements.",call.=FALSE)
}
return(invisible(NULL))
}
#' @title
#' Arguments
#'
#' @description
#' Verifies whether an argument matches formal requirements.
#'
#' @param x
#' argument
#'
#' @param type
#' character \code{"string"}, \code{"scalar"}, \code{"vector"}, \code{"matrix"}
#'
#' @param miss
#' accept missing values\strong{:}
#' logical
#'
#' @param min
#' lower limit\strong{:}
#' numeric
#'
#' @param max
#' upper limit\strong{:}
#' numeric
#'
#' @param values
#' only accept specific values\strong{:}
#' vector
#'
#' @param inf
#' accept infinite (\code{Inf} or \code{-Inf}) values\strong{:}
#' logical
#'
#' @param null
#' accept \code{NULL}\strong{:}
#' logical
#'
#' @examples
#' cornet:::.check(0.5,type="scalar",min=0,max=1)
#'
.check <- function(x,type,miss=FALSE,min=NULL,max=NULL,values=NULL,inf=FALSE,null=FALSE){
name <- deparse(substitute(x))
if(null && is.null(x)){
return(invisible(NULL))
}
if(type=="string"){
cond <- is.vector(x) & is.character(x) & length(x)==1
} else if(type=="scalar"){
cond <- is.vector(x) & is.numeric(x) & length(x)==1
} else if(type=="vector"){
cond <- is.vector(x) & is.numeric(x)
} else if(type=="matrix"){
cond <- is.matrix(x) & is.numeric(x)
} else {
warning("Unknown type.")
}
if(!cond){
stop(paste0("Argument \"",name,"\" does not match formal requirements."),call.=FALSE)
}
if(!miss && any(is.na(x))){
stop(paste0("Argument \"",name,"\" contains missing values."),call.=FALSE)
}
if(!is.null(min) && any(x<min)){
stop(paste0("expecting ",name," >= ",min),call.=FALSE)
}
if(!is.null(max) && any(x>max)){
stop(paste0("expecting ",name," <= ",max),call.=FALSE)
}
if(!is.null(values) && !(x %in% values)){
stop(paste0("Argument \"",name,"\" contains invalid values."),call.=FALSE)
}
if(!inf && any(is.infinite(values))){
stop(paste0("Argument \"",name,"\ contains infinite values."),call.=FALSE)
}
return(invisible(NULL))
}
#--- Application ---------------------------------------------------------------
#' @export
#' @title
#' Performance measurement by cross-validation
#'
......@@ -435,7 +585,7 @@ predict.cornet <- function(object,newx,type="probability",...){
z <- 1*(y > cutoff)
if(is.null(foldid)){
fold <- cornet:::.folds(y=z,nfolds=nfolds)
fold <- palasso:::.folds(y=z,nfolds=nfolds)
} else {
fold <- foldid
}
......@@ -445,11 +595,11 @@ predict.cornet <- function(object,newx,type="probability",...){
cols <- c("gaussian","binomial","combined")
pred <- matrix(data=NA,nrow=length(y),ncol=length(cols),
dimnames=list(NULL,cols))
for(i in seq_len(nfolds)){
fit <- cornet::cornet(y=y[fold!=i],cutoff=cutoff,X=X[fold!=i,],alpha=alpha,type.measure=type.measure)
tryCatch(expr=cornet:::plot.cornet(fit),error=function(x) NULL)
temp <- cornet:::predict.cornet(fit,newx=X[fold==i,])
tryCatch(expr=plot.cornet(fit),error=function(x) NULL)
temp <- predict.cornet(fit,newx=X[fold==i,])
if(any(temp<0|temp>1)){stop("Outside unit interval.",call.=FALSE)}
model <- colnames(pred)
for(j in seq_along(model)){
......@@ -458,7 +608,7 @@ predict.cornet <- function(object,newx,type="probability",...){
}
type <- c("deviance","class","mse","mae","auc")
loss <- lapply(X=type,FUN=function(x) cornet:::.loss(y=z,fit=pred,family="binomial",type.measure=x,foldid=fold)[[1]])
loss <- lapply(X=type,FUN=function(x) palasso:::.loss(y=z,fit=pred,family="binomial",type.measure=x,foldid=fold)[[1]])
names(loss) <- type
#--- deviance residuals ---
......@@ -479,14 +629,13 @@ predict.cornet <- function(object,newx,type="probability",...){
for(i in seq_len(nfolds)){
cond <- fold==i
loss$resid.pvalue[i] <- stats::wilcox.test(x=rxs[cond],y=rys[cond],
paired=TRUE,alternative="greater")$p.value
paired=TRUE,alternative="greater")$p.value
}
return(loss)
}
#' @export
#' @title
#' Single-split test
#'
......@@ -507,12 +656,12 @@ predict.cornet <- function(object,newx,type="probability",...){
.test <- function(y,cutoff,X,alpha=1,type.measure="deviance"){
z <- 1*(y > cutoff)
fold <- cornet:::.folds(y=z,nfolds=5)
fold <- palasso:::.folds(y=z,nfolds=5)
fold <- ifelse(fold==1,1,0)
fit <- cornet::cornet(y=y[fold==0],cutoff=cutoff,X=X[fold==0,],alpha=alpha)
tryCatch(expr=cornet:::plot.cornet(fit),error=function(x) NULL)
pred <- cornet:::predict.cornet(fit,newx=X[fold==1,])
tryCatch(expr=plot.cornet(fit),error=function(x) NULL)
pred <- predict.cornet(fit,newx=X[fold==1,])
if(any(pred<0|pred>1)){stop("Outside unit interval.",call.=FALSE)}
#res <- (pred-z[fold==1])^2 # MSE
......@@ -528,243 +677,168 @@ predict.cornet <- function(object,newx,type="probability",...){
return(pvalue)
}
# Simulates y and X.
.simulate <- function(n,p,prob=0.2,fac=1){
beta <- stats::rnorm(n=p)
cond <- stats::rbinom(n=p,size=1,prob=prob)
beta[cond==0] <- 0
X <- matrix(stats::rnorm(n=n*p),nrow=n,ncol=p)
mean <- X %*% beta
y <- stats::rnorm(n=n,mean=mean,sd=fac*stats::sd(mean))
return(list(y=y,X=X))
}
#--- Internal functions --------------------------------------------------------
#' @title
#' Arguments
#'
#' @description
#' Verifies whether two or more arguments are identical.
#'
#' @param ...
#' scalars, vectors, or matrices of equal dimensions
#'
#' @param na.rm
#' remove missing values\strong{:}
#' logical
#'
#' @examples
#' NA
#'
.equal <- function(...,na.rm=FALSE){
list <- list(...)
cond <- vapply(X=list,
FUN=function(x) all(x==list[[1]],na.rm=na.rm),
FUN.VALUE=logical(1))
if(any(!cond)){
stop("Unequal elements.",call.=FALSE)
}
return(invisible(NULL))
}
#' @title
#' Arguments
#' Data simulation
#'
#' @description
#' Verifies whether an argument matches formal requirements.
#'
#' @param x
#' argument
#' Simulates data for unit tests
#'
#' @param type
#' character \code{"string"}, \code{"scalar"}, \code{"vector"}, \code{"matrix"}
#' @param n
#' sample size\strong{:}
#' positive integer
#'
#' @param miss
#' accept missing values\strong{:}
#' logical
#' @param p
#' covariate space\strong{:}
#' positive integer
#'
#' @param min
#' lower limit\strong{:}
#' numeric
#'
#' @param max
#' upper limit\strong{:}
#' numeric
#' @param prob
#' (approximate) proportion of causal covariates\strong{:}
#' numeric between \eqn{0} and \eqn{1}
#'
#' @param values
#' only accept specific values\strong{:}
#' vector
#' @param fac
#' noise factor\strong{:}
#' positive real number
#'
#' @param inf
#' accept infinite (\code{Inf} or \code{-Inf}) values\strong{:}
#' logical
#' @return
#' Returns invisible list with elements \code{y} and \code{X}.
#'
#' @param null
#' accept \code{NULL}\strong{:}
#' logical
#'
#' @examples
#' NA
#' data <- cornet:::.simulate(n=10,p=20,prob=0.2,fac=2)
#' names(data)
#'
.check <- function(x,type,miss=FALSE,min=NULL,max=NULL,values=NULL,inf=FALSE,null=FALSE){
name <- deparse(substitute(x))
if(null && is.null(x)){
return(invisible(NULL))
}
if(type=="string"){
cond <- is.vector(x) & is.character(x) & length(x)==1
} else if(type=="scalar"){
cond <- is.vector(x) & is.numeric(x) & length(x)==1
} else if(type=="vector"){
cond <- is.vector(x) & is.numeric(x)
} else if(type=="matrix"){
cond <- is.matrix(x) & is.numeric(x)
} else {
warning("Unknown type.")
}
if(!cond){
stop(paste0("Argument \"",name,"\" does not match formal requirements."),call.=FALSE)
}
if(!miss && any(is.na(x))){
stop(paste0("Argument \"",name,"\" contains missing values."),call.=FALSE)
}
if(!is.null(min) && any(x<min)){
stop(paste0("expecting ",name," >= ",min),call.=FALSE)
}
if(!is.null(max) && any(x>max)){
stop(paste0("expecting ",name," <= ",max),call.=FALSE)
}
if(!is.null(values) && !(x %in% values)){
stop(paste0("Argument \"",name,"\" contains invalid values."),call.=FALSE)
}
if(!inf && any(is.infinite(values))){
stop(paste0("Argument \"",name,"\ contains infinite values."),call.=FALSE)
}
return(invisible(NULL))
.simulate <- function(n,p,prob=0.2,fac=1){
beta <- stats::rnorm(n=p)
cond <- stats::rbinom(n=p,size=1,prob=prob)
beta[cond==0] <- 0
X <- matrix(stats::rnorm(n=n*p),nrow=n,ncol=p)
mean <- X %*% beta
y <- stats::rnorm(n=n,mean=mean,sd=fac*stats::sd(mean))
return(invisible(list(y=y,X=X)))
}
# Import this function from the palasso package.
.loss <- function (y,fit,family,type.measure,foldid=NULL){
if (!is.list(fit)) {
fit <- list(fit)
}
loss <- list()
for (i in seq_along(fit)) {
if (is.vector(fit[[i]])) {
fit[[i]] <- as.matrix(fit[[i]])
}
if (is.null(foldid) & (family == "cox" | type.measure ==
"auc")) {
stop("Missing foldid.", call. = FALSE)
}
if (family == "gaussian") {
if (type.measure %in% c("deviance", "mse")) {
loss[[i]] <- apply(X = fit[[i]], MARGIN = 2,
FUN = function(x) mean((x - y)^2))
}
else if (type.measure == "mae") {
loss[[i]] <- apply(X = fit[[i]], MARGIN = 2,
FUN = function(x) mean(abs(x - y)))
}
else {
stop("Invalid type measure.", call. = FALSE)