... ... @@ -7,7 +7,8 @@ #' Logistic regression with a continuous response #' #' @description #' Implements logistic regression with a continuous response. #' Implements logistic regression with a continuous response #' in high-dimensional settings. #' #' @param y #' continuous response\strong{:} ... ... @@ -15,7 +16,7 @@ #' #' @param cutoff #' cutoff point for dichotomising response into classes\strong{:} #' value between \code{min(y)} and \code{max(y)} #' \emph{meaningful} value between \code{min(y)} and \code{max(y)} #' #' @param X #' covariates\strong{:} ... ... @@ -44,7 +45,7 @@ #' or \code{NULL} (default sequence) #' #' @param npi #' number of \code{pi} values #' number of \code{pi} values (weighting) #' #' @param sigma #' sigma sequence\strong{:} ... ... @@ -52,26 +53,26 @@ #' or \code{NULL} (default sequence) #' #' @param nsigma #' number of \code{sigma} values #' number of \code{sigma} values (scaling) #' #' @param ... #' further arguments passed to \code{\link[glmnet]{glmnet}} #' #' @details #' This function fits a \code{"gaussian"} model for the numeric response, #' and a \code{"binomial"} model for the binary response, #' This function fits a \emph{gaussian} model for the numeric response, #' and a \emph{binomial} model for the binary response, #' meaning that the \code{glmnet} argument \code{family} is unavailable. #' Also if \code{type.measure} equals \code{"deviance"}, #' Even if \code{type.measure} equals \code{"deviance"}, #' the loss is uncomparable between linear and logistic regression. #' #' @return #' Returns an object of class \code{cornet}, a list with multiple slots: #' \itemize{ #' \item \code{"gaussian"}: fitted linear model, class \code{glmnet} #' \item \code{"binomial"}: fitted logistic model, class \code{glmnet} #' \item \code{"sigma"}: scaling parameters \code{sigma}, #' \item \code{gaussian}: fitted linear model, class \code{glmnet} #' \item \code{binomial}: fitted logistic model, class \code{glmnet} #' \item \code{sigma}: scaling parameters \code{sigma}, #' vector of length \code{nsigma} #' \item \code{"pi"}: weighting parameters \code{pi}, #' \item \code{pi}: weighting parameters \code{pi}, #' vector of length \code{npi} #' \item \code{cvm}: evaluation loss, #' matrix with \code{nsigma} rows and \code{npi} columns ... ... @@ -82,6 +83,11 @@ #' \item \code{cutoff}: threshold for dichotomisation #' } #' #' @seealso #' Methods for objects of class \code{cornet} include #' \code{\link[=coef.cornet]{coef}} and #' \code{\link[=predict.cornet]{predict}}. #' #' @examples #' n <- 100; p <- 200 #' y <- rnorm(n) ... ... @@ -96,19 +102,19 @@ cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfold test$combined <- TRUE #--- checks --- cornet:::.check(x=y,type="vector") .check(x=y,type="vector") if(all(y %in% c(0,1))){warning("Binary response.",call.=FALSE)} cornet:::.check(x=cutoff,type="scalar",min=min(y),max=max(y)) cornet:::.check(x=X,type="matrix") cornet:::.check(x=alpha,type="scalar",min=0,max=1) .check(x=cutoff,type="scalar",min=min(y),max=max(y)) .check(x=X,type="matrix") .check(x=alpha,type="scalar",min=0,max=1) if(length(y)!=nrow(X)){stop("Contradictory sample size.",call.=FALSE)} cornet:::.check(x=npi,type="scalar",min=1) cornet:::.check(x=pi,type="vector",min=0,max=1,null=TRUE) cornet:::.check(x=nsigma,type="scalar",min=1) cornet:::.check(x=sigma,type="vector",min=.Machine$double.eps,null=TRUE) cornet:::.check(x=nfolds,type="scalar",min=3) cornet:::.check(x=foldid,type="vector",values=seq_len(nfolds),null=TRUE) cornet:::.check(x=type.measure,type="string",values=c("deviance","class","mse","mae")) # not auc (min/max confusion) .check(x=npi,type="scalar",min=1) .check(x=pi,type="vector",min=0,max=1,null=TRUE) .check(x=nsigma,type="scalar",min=1) .check(x=sigma,type="vector",min=.Machine$double.eps,null=TRUE) .check(x=nfolds,type="scalar",min=3) .check(x=foldid,type="vector",values=seq_len(nfolds),null=TRUE) .check(x=type.measure,type="string",values=c("deviance","class","mse","mae")) # not auc (min/max confusion) if(!is.null(list(...)$family)){stop("Reserved argument \"family\".",call.=FALSE)} n <- length(y) ... ... @@ -117,7 +123,7 @@ cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfold # fold identifiers if(is.null(foldid)){ foldid <- cornet:::.folds(y=z,nfolds=nfolds) foldid <- palasso:::.folds(y=z,nfolds=nfolds) } #--- model fitting --- ... ... @@ -171,14 +177,14 @@ cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfold net <- glmnet::glmnet(y=y0,x=X0,family="gaussian",alpha=alpha,...) temp_y <- stats::predict(object=net,newx=X1,type="response",s=fit$gaussian$lambda) pred$y[foldid==k,seq_len(ncol(temp_y))] <- temp_y cvm <- cornet:::.loss(y=y1,fit=temp_y,family="gaussian",type.measure="deviance")[[1]] cvm <- palasso:::.loss(y=y1,fit=temp_y,family="gaussian",type.measure="deviance")[[1]] y_hat <- temp_y[,which.min(cvm)] # logistic regression net <- glmnet::glmnet(y=z0,x=X0,family="binomial",alpha=alpha,...) temp_z <- stats::predict(object=net,newx=X1,type="response",s=fit$binomial$lambda) pred$z[foldid==k,seq_len(ncol(temp_z))] <- temp_z cvm <- cornet:::.loss(y=z1,fit=temp_z,family="binomial",type.measure=type.measure)[[1]] cvm <- palasso:::.loss(y=z1,fit=temp_z,family="binomial",type.measure=type.measure)[[1]] z_hat <- temp_z[,which.min(cvm)] # combined regression ... ... @@ -196,11 +202,11 @@ cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfold #--- evaluation --- # linear loss fit$gaussian$cvm <- cornet:::.loss(y=y,fit=pred$y,family="gaussian",type.measure="deviance")[[1]] fit$gaussian$cvm <- palasso:::.loss(y=y,fit=pred$y,family="gaussian",type.measure="deviance")[[1]] fit$gaussian$lambda.min <- fit$gaussian$lambda[which.min(fit$gaussian$cvm)] # logistic loss fit$binomial$cvm <- cornet:::.loss(y=z,fit=pred$z,family="binomial",type.measure=type.measure)[[1]] fit$binomial$cvm <- palasso:::.loss(y=z,fit=pred$z,family="binomial",type.measure=type.measure)[[1]] fit$binomial$lambda.min <- fit$binomial$lambda[which.min(fit$binomial$cvm)] # combined loss ... ... @@ -209,7 +215,7 @@ cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfold fit$cvm <- matrix(data=NA,nrow=nsigma,ncol=npi,dimnames=dimnames) for(i in seq_len(nsigma)){ for(j in seq_len(npi)){ fit$cvm[i,j] <- cornet:::.loss(y=z,fit=pred$combined[,i,j],family="binomial",type.measure=type.measure)[[1]] fit$cvm[i,j] <- palasso:::.loss(y=z,fit=pred$combined[,i,j],family="binomial",type.measure=type.measure)[[1]] } } temp <- which(fit$cvm==min(fit$cvm),arr.ind=TRUE,useNames=TRUE) ... ... @@ -223,7 +229,10 @@ cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfold fit$cutoff <- cutoff fit$info <- list(type.measure=type.measure, sd.y=stats::sd(y), "+"=sum(z==1), "-"=sum(z==0), table=table(z), n=n,p=ncol(X), test=as.data.frame(test)) class(fit) <- "cornet" ... ... @@ -232,6 +241,43 @@ cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfold #--- Methods ------------------------------------------------------------------- #' @export #' @title #' Combined regression #' #' @description #' Prints summary of cornet object. #' #' @param x #' \link[cornet]{cornet} object #' #' @param ... #' further arguments (not applicable) #' #' @return #' Returns sample size \eqn{n}, #' number of covariates \eqn{p}, #' information on dichotomisation, #' tuned scaling parameter (sigma), #' tuned weighting parameter (pi), #' and corresponding loss. #' #' @examples #' NA #' print.cornet <- function(x,...){ cat("cornet object:\n") cat(paste0("n = ",x$info$n,","," p = ",x$info$p),"\n") cat(paste0("z = I(y > ",signif(x$cutoff,digits=2),"): ")) cat(paste0(x$info$"+","+"," vs ",x$info$"-","-"),"\n") cat(paste0("sigma.min = ",signif(x$sigma.min,digits=1)),"\n") cat(paste0("pi.min = ",round(x$pi.min,digits=2)),"\n") type <- x$info$type.measure value <- signif(x$cvm[names(x$sigma.min),names(x$pi.min)],digits=2) cat(paste0(type," = ",value)) return(invisible(NULL)) } #' @export #' @title #' Extract estimated coefficients ... ... @@ -270,14 +316,13 @@ coef.cornet <- function(object,...){ return(coef) } #' @export #' @title #' Plot loss matrix #' #' @description #' Plots the loss for difference combinations of #' the weight (pi) and scale (sigma) paramters. #' Plots the loss for different combinations of #' scaling (sigma) and weighting (pi) parameters. #' #' @param x #' \link[cornet]{cornet} object ... ... @@ -411,9 +456,114 @@ predict.cornet <- function(object,newx,type="probability",...){ return(as.data.frame(frame)) } #--- Internal functions -------------------------------------------------------- #' @title #' Arguments #' #' @description #' Verifies whether two or more arguments are identical. #' #' @param ... #' scalars, vectors, or matrices of equal dimensions #' #' @param na.rm #' remove missing values\strong{:} #' logical #' #' @examples #' cornet:::.equal(1,1,1) #' .equal <- function(...,na.rm=FALSE){ list <- list(...) cond <- vapply(X=list, FUN=function(x) all(x==list[[1]],na.rm=na.rm), FUN.VALUE=logical(1)) if(any(!cond)){ stop("Unequal elements.",call.=FALSE) } return(invisible(NULL)) } #' @title #' Arguments #' #' @description #' Verifies whether an argument matches formal requirements. #' #' @param x #' argument #' #' @param type #' character \code{"string"}, \code{"scalar"}, \code{"vector"}, \code{"matrix"} #' #' @param miss #' accept missing values\strong{:} #' logical #' #' @param min #' lower limit\strong{:} #' numeric #' #' @param max #' upper limit\strong{:} #' numeric #' #' @param values #' only accept specific values\strong{:} #' vector #' #' @param inf #' accept infinite (\code{Inf} or \code{-Inf}) values\strong{:} #' logical #' #' @param null #' accept \code{NULL}\strong{:} #' logical #' #' @examples #' cornet:::.check(0.5,type="scalar",min=0,max=1) #' .check <- function(x,type,miss=FALSE,min=NULL,max=NULL,values=NULL,inf=FALSE,null=FALSE){ name <- deparse(substitute(x)) if(null && is.null(x)){ return(invisible(NULL)) } if(type=="string"){ cond <- is.vector(x) & is.character(x) & length(x)==1 } else if(type=="scalar"){ cond <- is.vector(x) & is.numeric(x) & length(x)==1 } else if(type=="vector"){ cond <- is.vector(x) & is.numeric(x) } else if(type=="matrix"){ cond <- is.matrix(x) & is.numeric(x) } else { warning("Unknown type.") } if(!cond){ stop(paste0("Argument \"",name,"\" does not match formal requirements."),call.=FALSE) } if(!miss && any(is.na(x))){ stop(paste0("Argument \"",name,"\" contains missing values."),call.=FALSE) } if(!is.null(min) && any(x= ",min),call.=FALSE) } if(!is.null(max) && any(x>max)){ stop(paste0("expecting ",name," <= ",max),call.=FALSE) } if(!is.null(values) && !(x %in% values)){ stop(paste0("Argument \"",name,"\" contains invalid values."),call.=FALSE) } if(!inf && any(is.infinite(values))){ stop(paste0("Argument \"",name,"\ contains infinite values."),call.=FALSE) } return(invisible(NULL)) } #--- Application --------------------------------------------------------------- #' @export #' @title #' Performance measurement by cross-validation #' ... ... @@ -435,7 +585,7 @@ predict.cornet <- function(object,newx,type="probability",...){ z <- 1*(y > cutoff) if(is.null(foldid)){ fold <- cornet:::.folds(y=z,nfolds=nfolds) fold <- palasso:::.folds(y=z,nfolds=nfolds) } else { fold <- foldid } ... ... @@ -445,11 +595,11 @@ predict.cornet <- function(object,newx,type="probability",...){ cols <- c("gaussian","binomial","combined") pred <- matrix(data=NA,nrow=length(y),ncol=length(cols), dimnames=list(NULL,cols)) for(i in seq_len(nfolds)){ fit <- cornet::cornet(y=y[fold!=i],cutoff=cutoff,X=X[fold!=i,],alpha=alpha,type.measure=type.measure) tryCatch(expr=cornet:::plot.cornet(fit),error=function(x) NULL) temp <- cornet:::predict.cornet(fit,newx=X[fold==i,]) tryCatch(expr=plot.cornet(fit),error=function(x) NULL) temp <- predict.cornet(fit,newx=X[fold==i,]) if(any(temp<0|temp>1)){stop("Outside unit interval.",call.=FALSE)} model <- colnames(pred) for(j in seq_along(model)){ ... ... @@ -458,7 +608,7 @@ predict.cornet <- function(object,newx,type="probability",...){ } type <- c("deviance","class","mse","mae","auc") loss <- lapply(X=type,FUN=function(x) cornet:::.loss(y=z,fit=pred,family="binomial",type.measure=x,foldid=fold)[[1]]) loss <- lapply(X=type,FUN=function(x) palasso:::.loss(y=z,fit=pred,family="binomial",type.measure=x,foldid=fold)[[1]]) names(loss) <- type #--- deviance residuals --- ... ... @@ -479,14 +629,13 @@ predict.cornet <- function(object,newx,type="probability",...){ for(i in seq_len(nfolds)){ cond <- fold==i loss$resid.pvalue[i] <- stats::wilcox.test(x=rxs[cond],y=rys[cond], paired=TRUE,alternative="greater")$p.value paired=TRUE,alternative="greater")$p.value } return(loss) } #' @export #' @title #' Single-split test #' ... ... @@ -507,12 +656,12 @@ predict.cornet <- function(object,newx,type="probability",...){ .test <- function(y,cutoff,X,alpha=1,type.measure="deviance"){ z <- 1*(y > cutoff) fold <- cornet:::.folds(y=z,nfolds=5) fold <- palasso:::.folds(y=z,nfolds=5) fold <- ifelse(fold==1,1,0) fit <- cornet::cornet(y=y[fold==0],cutoff=cutoff,X=X[fold==0,],alpha=alpha) tryCatch(expr=cornet:::plot.cornet(fit),error=function(x) NULL) pred <- cornet:::predict.cornet(fit,newx=X[fold==1,]) tryCatch(expr=plot.cornet(fit),error=function(x) NULL) pred <- predict.cornet(fit,newx=X[fold==1,]) if(any(pred<0|pred>1)){stop("Outside unit interval.",call.=FALSE)} #res <- (pred-z[fold==1])^2 # MSE ... ... @@ -528,243 +677,168 @@ predict.cornet <- function(object,newx,type="probability",...){ return(pvalue) } # Simulates y and X. .simulate <- function(n,p,prob=0.2,fac=1){ beta <- stats::rnorm(n=p) cond <- stats::rbinom(n=p,size=1,prob=prob) beta[cond==0] <- 0 X <- matrix(stats::rnorm(n=n*p),nrow=n,ncol=p) mean <- X %*% beta y <- stats::rnorm(n=n,mean=mean,sd=fac*stats::sd(mean)) return(list(y=y,X=X)) } #--- Internal functions -------------------------------------------------------- #' @title #' Arguments #' #' @description #' Verifies whether two or more arguments are identical. #' #' @param ... #' scalars, vectors, or matrices of equal dimensions #' #' @param na.rm #' remove missing values\strong{:} #' logical #' #' @examples #' NA #' .equal <- function(...,na.rm=FALSE){ list <- list(...) cond <- vapply(X=list, FUN=function(x) all(x==list[[1]],na.rm=na.rm), FUN.VALUE=logical(1)) if(any(!cond)){ stop("Unequal elements.",call.=FALSE) } return(invisible(NULL)) } #' @title #' Arguments #' Data simulation #' #' @description #' Verifies whether an argument matches formal requirements. #' #' @param x #' argument #' Simulates data for unit tests #' #' @param type #' character \code{"string"}, \code{"scalar"}, \code{"vector"}, \code{"matrix"} #' @param n #' sample size\strong{:} #' positive integer #' #' @param miss #' accept missing values\strong{:} #' logical #' @param p #' covariate space\strong{:} #' positive integer #' #' @param min #' lower limit\strong{:} #' numeric #' #' @param max #' upper limit\strong{:} #' numeric #' @param prob #' (approximate) proportion of causal covariates\strong{:} #' numeric between \eqn{0} and \eqn{1} #' #' @param values #' only accept specific values\strong{:} #' vector #' @param fac #' noise factor\strong{:} #' positive real number #' #' @param inf #' accept infinite (\code{Inf} or \code{-Inf}) values\strong{:} #' logical #' @return #' Returns invisible list with elements \code{y} and \code{X}. #' #' @param null #' accept \code{NULL}\strong{:} #' logical #' #' @examples #' NA #' data <- cornet:::.simulate(n=10,p=20,prob=0.2,fac=2) #' names(data) #' .check <- function(x,type,miss=FALSE,min=NULL,max=NULL,values=NULL,inf=FALSE,null=FALSE){ name <- deparse(substitute(x)) if(null && is.null(x)){ return(invisible(NULL)) } if(type=="string"){ cond <- is.vector(x) & is.character(x) & length(x)==1 } else if(type=="scalar"){ cond <- is.vector(x) & is.numeric(x) & length(x)==1 } else if(type=="vector"){ cond <- is.vector(x) & is.numeric(x) } else if(type=="matrix"){ cond <- is.matrix(x) & is.numeric(x) } else { warning("Unknown type.") } if(!cond){ stop(paste0("Argument \"",name,"\" does not match formal requirements."),call.=FALSE) } if(!miss && any(is.na(x))){ stop(paste0("Argument \"",name,"\" contains missing values."),call.=FALSE) } if(!is.null(min) && any(x= ",min),call.=FALSE) } if(!is.null(max) && any(x>max)){ stop(paste0("expecting ",name," <= ",max),call.=FALSE) } if(!is.null(values) && !(x %in% values)){ stop(paste0("Argument \"",name,"\" contains invalid values."),call.=FALSE) } if(!inf && any(is.infinite(values))){ stop(paste0("Argument \"",name,"\ contains infinite values."),call.=FALSE) } return(invisible(NULL)) .simulate <- function(n,p,prob=0.2,fac=1){ beta <- stats::rnorm(n=p) cond <- stats::rbinom(n=p,size=1,prob=prob) beta[cond==0] <- 0 X <- matrix(stats::rnorm(n=n*p),nrow=n,ncol=p) mean <- X %*% beta y <- stats::rnorm(n=n,mean=mean,sd=fac*stats::sd(mean)) return(invisible(list(y=y,X=X))) } # Import this function from the palasso package. .loss <- function (y,fit,family,type.measure,foldid=NULL){ if (!is.list(fit)) { fit <- list(fit) } loss <- list() for (i in seq_along(fit)) { if (is.vector(fit[[i]])) { fit[[i]] <- as.matrix(fit[[i]]) } if (is.null(foldid) & (family == "cox" | type.measure == "auc")) { stop("Missing foldid.", call. = FALSE) } if (family == "gaussian") { if (type.measure %in% c("deviance", "mse")) { loss[[i]] <- apply(X = fit[[i]], MARGIN = 2, FUN = function(x) mean((x - y)^2)) } else if (type.measure == "mae") { loss[[i]] <- apply(X = fit[[i]], MARGIN = 2, FUN = function(x) mean(abs(x - y))) } else { stop("Invalid type measure.", call. = FALSE)