functions.R 28.6 KB
Newer Older
Armin Rauschenberger's avatar
Armin Rauschenberger committed
1

Armin Rauschenberger's avatar
Armin Rauschenberger committed
2
#--- Workhorse function --------------------------------------------------------
Armin Rauschenberger's avatar
Armin Rauschenberger committed
3

Armin Rauschenberger's avatar
Armin Rauschenberger committed
4
#' @export
Armin Rauschenberger's avatar
Armin Rauschenberger committed
5
#' @aliases cornet-package
Armin Rauschenberger's avatar
Armin Rauschenberger committed
6
#' @title
Armin Rauschenberger's avatar
Armin Rauschenberger committed
7
#' Combined regression
Armin Rauschenberger's avatar
Armin Rauschenberger committed
8
9
#' 
#' @description
Armin Rauschenberger's avatar
Armin Rauschenberger committed
10
11
12
13
#' Implements lasso and ridge regression for dichotomised outcomes.
#' Such outcomes are not naturally but artificially binary.
#' They indicate whether an underlying measurement is greater than a threshold.
#'
Armin Rauschenberger's avatar
Armin Rauschenberger committed
14
#' @param y
Armin Rauschenberger's avatar
Armin Rauschenberger committed
15
#' continuous outcome\strong{:}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
16
17
18
#' vector of length \eqn{n}
#' 
#' @param cutoff
Armin Rauschenberger's avatar
Armin Rauschenberger committed
19
#' cut-off point for dichotomising outcome into classes\strong{:}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
20
#' \emph{meaningful} value between \code{min(y)} and \code{max(y)}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
21
22
#' 
#' @param X
Armin Rauschenberger's avatar
Armin Rauschenberger committed
23
#' features\strong{:}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
24
#' numeric matrix with \eqn{n} rows (samples)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
25
#' and \eqn{p} columns (variables)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
26
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
27
28
29
30
#' @param alpha
#' elastic net mixing parameter\strong{:}
#' numeric between \eqn{0} (ridge) and \eqn{1} (lasso)
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
31
32
33
34
#' @param foldid
#' fold identifiers\strong{:}
#' vector with entries between \eqn{1} and \code{nfolds};
#' or \code{NULL} (balance)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
35
36
#' 
#' @param nfolds
Armin Rauschenberger's avatar
Armin Rauschenberger committed
37
38
#' number of folds\strong{:}
#' integer between \eqn{3} and \eqn{n}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
39
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
40
#' @param type.measure
Armin Rauschenberger's avatar
Armin Rauschenberger committed
41
42
43
#' loss function for binary classification\strong{:}
#' character \code{"deviance"}, \code{"mse"}, \code{"mae"},
#' or \code{"class"} (see \code{\link[glmnet]{cv.glmnet}})
Armin Rauschenberger's avatar
Armin Rauschenberger committed
44
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
45
46
#' @param pi
#' pi sequence\strong{:}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
47
#' vector of increasing values in the unit interval;
Armin Rauschenberger's avatar
Armin Rauschenberger committed
48
49
50
#' or \code{NULL} (default sequence)
#' 
#' @param npi
Armin Rauschenberger's avatar
Armin Rauschenberger committed
51
#' number of \code{pi} values (weighting)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
52
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
53
54
55
56
57
58
#' @param sigma
#' sigma sequence\strong{:}
#' vector of increasing positive values;
#' or \code{NULL} (default sequence)
#' 
#' @param nsigma
Armin Rauschenberger's avatar
Armin Rauschenberger committed
59
#' number of \code{sigma} values (scaling)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
60
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
61
62
63
#' @param ...
#' further arguments passed to \code{\link[glmnet]{glmnet}}
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
64
#' @details
Armin Rauschenberger's avatar
Armin Rauschenberger committed
65
66
67
68
69
70
71
72
73
#' The argument \code{family} is unavailable, because
#' this function fits a \emph{gaussian} model for the numeric response,
#' and a \emph{binomial} model for the binary response.
#' 
#' Linear regression uses the loss function \code{"deviance"} (or \code{"mse"}),
#' but the loss is incomparable between linear and logistic regression.
#' 
#' The loss function \code{"auc"} is unavailable for internal cross-validation.
#' If at all, use \code{"auc"} for external cross-validation only.
Armin Rauschenberger's avatar
Armin Rauschenberger committed
74
75
76
77
#' 
#' @return
#' Returns an object of class \code{cornet}, a list with multiple slots:
#' \itemize{
Armin Rauschenberger's avatar
Armin Rauschenberger committed
78
79
80
#'    \item \code{gaussian}: fitted linear model, class \code{glmnet}
#'    \item \code{binomial}: fitted logistic model, class \code{glmnet}
#'    \item \code{sigma}: scaling parameters \code{sigma},
Armin Rauschenberger's avatar
Armin Rauschenberger committed
81
#'           vector of length \code{nsigma}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
82
#'    \item \code{pi}: weighting parameters \code{pi},
Armin Rauschenberger's avatar
Armin Rauschenberger committed
83
84
85
86
87
88
89
90
91
#'           vector of length \code{npi}
#'    \item \code{cvm}: evaluation loss,
#'           matrix with \code{nsigma} rows and \code{npi} columns
#'    \item \code{sigma.min}: optimal scaling parameter,
#'           positive scalar
#'    \item \code{pi.min}: optimal weighting parameter,
#'           scalar in unit interval
#'    \item \code{cutoff}: threshold for dichotomisation
#' }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
92
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
93
94
95
96
97
#' @seealso
#' Methods for objects of class \code{cornet} include
#' \code{\link[=coef.cornet]{coef}} and
#' \code{\link[=predict.cornet]{predict}}.
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
98
#' @references 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
99
100
#' Armin Rauschenberger and Enrico Glaab (2019).
#' "Lasso and ridge regression for dichotomised outcomes".
Armin Rauschenberger's avatar
Armin Rauschenberger committed
101
102
#' \emph{Manuscript in preparation}.
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
103
#' @examples
Armin Rauschenberger's avatar
Armin Rauschenberger committed
104
105
106
#' n <- 100; p <- 200
#' y <- rnorm(n)
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
107
#' net <- cornet(y=y,cutoff=0,X=X)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
108
#' net
Armin Rauschenberger's avatar
Armin Rauschenberger committed
109
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
110
cornet <- function(y,cutoff,X,alpha=1,npi=101,pi=NULL,nsigma=99,sigma=NULL,nfolds=10,foldid=NULL,type.measure="deviance",...){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
111
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
112
  #--- temporary ---
Armin Rauschenberger's avatar
Armin Rauschenberger committed
113
  # cutoff <- 0; npi <- 101; pi <- NULL; nsigma <- 99; sigma <- NULL; nfolds <- 10;  foldid <- NULL; type.measure <- "deviance"; logistic <- TRUE
Armin Rauschenberger's avatar
Armin Rauschenberger committed
114
  test <- list()
Armin Rauschenberger's avatar
Armin Rauschenberger committed
115
  test$combined <- TRUE
Armin Rauschenberger's avatar
Armin Rauschenberger committed
116
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
117
  #--- checks ---
Armin Rauschenberger's avatar
Armin Rauschenberger committed
118
  n <- length(y)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
119
  .check(x=y,type="vector")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
120
  if(all(y %in% c(0,1))){warning("Binary response.",call.=FALSE)}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
121
  .check(x=cutoff,type="scalar",min=min(y),max=max(y))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
122
  if(length(y)!=nrow(X)){stop("Contradictory sample size.",call.=FALSE)}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
123
124
  .check(x=X,type="matrix",dim=c(n,NA))
  .check(x=alpha,type="scalar",min=0,max=1)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
125
126
127
128
  .check(x=npi,type="scalar",min=1)
  .check(x=pi,type="vector",min=0,max=1,null=TRUE)
  .check(x=nsigma,type="scalar",min=1)
  .check(x=sigma,type="vector",min=.Machine$double.eps,null=TRUE)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
129
130
131
132
  .check(x=nfolds,type="scalar",min=3,max=n)
  .check(x=foldid,type="vector",dim=n,values=seq_len(nfolds),null=TRUE)
  .check(x=type.measure,type="string",values=c("deviance","mse","mae","class"))
  # never auc (min/max confusion)!
Armin Rauschenberger's avatar
Armin Rauschenberger committed
133
  if(!is.null(list(...)$family)){stop("Reserved argument \"family\".",call.=FALSE)}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
134
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
135
  # binarisation
Armin Rauschenberger's avatar
Armin Rauschenberger committed
136
137
  z <- 1*(y > cutoff)
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
138
  # fold identifiers
Armin Rauschenberger's avatar
Armin Rauschenberger committed
139
  if(is.null(foldid)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
140
    foldid <- palasso:::.folds(y=z,nfolds=nfolds)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
141
142
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
143
  #--- model fitting ---
Armin Rauschenberger's avatar
Armin Rauschenberger committed
144
  fit <- list()
Armin Rauschenberger's avatar
Armin Rauschenberger committed
145
146
  fit$gaussian <- glmnet::glmnet(y=y,x=X,family="gaussian",alpha=alpha,...)
  fit$binomial <- glmnet::glmnet(y=z,x=X,family="binomial",alpha=alpha,...)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
147
148
   
  #--- sigma sequence ---
Armin Rauschenberger's avatar
Armin Rauschenberger committed
149
150
  if(is.null(sigma)){
    fit$sigma <- exp(seq(from=log(0.05*stats::sd(y)),
Armin Rauschenberger's avatar
Armin Rauschenberger committed
151
                  to=log(10*stats::sd(y)),length.out=nsigma))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
152
153
  } else {
    fit$sigma <- sigma
Armin Rauschenberger's avatar
Armin Rauschenberger committed
154
    nsigma <- length(sigma)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
155
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
156
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
157
158
  #--- pi sequence ---
  if(is.null(pi)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
159
    fit$pi <- seq(from=0,to=1,length.out=npi)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
160
161
162
  } else {
    fit$pi <- pi
    npi <- length(pi)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
163
164
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
165
166
167
168
169
  #--- tuning parameters ---
  lab.pi <- paste0("pi",seq_len(npi))
  lab.sigma <- paste0("si",seq_len(nsigma))
  names(fit$sigma) <- lab.sigma
  names(fit$pi) <- lab.pi
Armin Rauschenberger's avatar
Armin Rauschenberger committed
170
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
171
  #--- cross-validation ---
Armin Rauschenberger's avatar
Armin Rauschenberger committed
172
  pred <- list()
Armin Rauschenberger's avatar
Armin Rauschenberger committed
173
174
175
  pred$y <- matrix(data=NA,nrow=n,ncol=length(fit$gaussian$lambda))
  pred$z <- matrix(data=NA,nrow=n,ncol=length(fit$binomial$lambda))

Armin Rauschenberger's avatar
Armin Rauschenberger committed
176
  if(test$combined){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
177
    dimnames <- list(NULL,lab.sigma,lab.pi)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
178
    pred$combined <- array(data=NA,dim=c(n,nsigma,npi),dimnames=dimnames)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
179
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
180

Armin Rauschenberger's avatar
Armin Rauschenberger committed
181
  for(k in seq_len(nfolds)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
182

Armin Rauschenberger's avatar
Armin Rauschenberger committed
183
184
185
186
187
188
189
    y0 <- y[foldid!=k]
    y1 <- y[foldid==k]
    z0 <- z[foldid!=k]
    z1 <- z[foldid==k]
    X0 <- X[foldid!=k,,drop=FALSE]
    X1 <- X[foldid==k,,drop=FALSE]
    
Armin Rauschenberger's avatar
Armin Rauschenberger committed
190
    # linear regression
Armin Rauschenberger's avatar
Armin Rauschenberger committed
191
    net <- glmnet::glmnet(y=y0,x=X0,family="gaussian",alpha=alpha,...)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
192
193
    temp_y <- stats::predict(object=net,newx=X1,type="response",s=fit$gaussian$lambda)
    pred$y[foldid==k,seq_len(ncol(temp_y))] <- temp_y
Armin Rauschenberger's avatar
Armin Rauschenberger committed
194
    cvm <- palasso:::.loss(y=y1,fit=temp_y,family="gaussian",type.measure="deviance")[[1]]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
195
    y_hat <- temp_y[,which.min(cvm)]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
196
    
Armin Rauschenberger's avatar
Armin Rauschenberger committed
197
    # logistic regression
Armin Rauschenberger's avatar
Armin Rauschenberger committed
198
    net <- glmnet::glmnet(y=z0,x=X0,family="binomial",alpha=alpha,...)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
199
200
    temp_z <- stats::predict(object=net,newx=X1,type="response",s=fit$binomial$lambda)
    pred$z[foldid==k,seq_len(ncol(temp_z))] <- temp_z
Armin Rauschenberger's avatar
Armin Rauschenberger committed
201
    cvm <- palasso:::.loss(y=z1,fit=temp_z,family="binomial",type.measure=type.measure)[[1]]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
202
203
    z_hat <- temp_z[,which.min(cvm)]
    
Armin Rauschenberger's avatar
Armin Rauschenberger committed
204
    # combined regression
Armin Rauschenberger's avatar
Armin Rauschenberger committed
205
    if(test$combined){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
206
207
208
      for(i in seq_along(fit$sigma)){
        for(j in seq_along(fit$pi)){
          cont <- stats::pnorm(q=y_hat,mean=cutoff,sd=fit$sigma[i])
Armin Rauschenberger's avatar
Armin Rauschenberger committed
209
          pred$combined[foldid==k,i,j] <- fit$pi[j]*cont + (1-fit$pi[j])*z_hat
Armin Rauschenberger's avatar
Armin Rauschenberger committed
210
211
212
        }
      }
    }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
213
    
Armin Rauschenberger's avatar
Armin Rauschenberger committed
214
215
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
216
217
  #--- evaluation ---
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
218
  # linear loss
Armin Rauschenberger's avatar
Armin Rauschenberger committed
219
  fit$gaussian$cvm <- palasso:::.loss(y=y,fit=pred$y,family="gaussian",type.measure="deviance")[[1]]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
220
  fit$gaussian$lambda.min <- fit$gaussian$lambda[which.min(fit$gaussian$cvm)]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
221
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
222
  # logistic loss
Armin Rauschenberger's avatar
Armin Rauschenberger committed
223
  fit$binomial$cvm <- palasso:::.loss(y=z,fit=pred$z,family="binomial",type.measure=type.measure)[[1]]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
224
  fit$binomial$lambda.min <- fit$binomial$lambda[which.min(fit$binomial$cvm)]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
225

Armin Rauschenberger's avatar
Armin Rauschenberger committed
226
  # combined loss
Armin Rauschenberger's avatar
Armin Rauschenberger committed
227
  if(test$combined){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
228
    dimnames <- list(lab.sigma,lab.pi)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
229
    fit$cvm <- matrix(data=NA,nrow=nsigma,ncol=npi,dimnames=dimnames)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
230
    for(i in seq_len(nsigma)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
231
      for(j in seq_len(npi)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
232
        fit$cvm[i,j] <- palasso:::.loss(y=z,fit=pred$combined[,i,j],family="binomial",type.measure=type.measure)[[1]]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
233
234
      }
    }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
235
    temp <- which(fit$cvm==min(fit$cvm),arr.ind=TRUE,useNames=TRUE)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
236
    if(nrow(temp)>1){warning("Multiple!",call.=FALSE);temp <- temp[1,,drop=FALSE]}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
237
238
239
    fit$sigma.min <- fit$sigma[temp[1]]
    fit$pi.min <- fit$pi[temp[2]]
    if(fit$cvm[names(fit$sigma.min),names(fit$pi.min)]!=min(fit$cvm)){stop("Internal error.")}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
240
241
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
242
  #--- return ---
Armin Rauschenberger's avatar
Armin Rauschenberger committed
243
  fit$cutoff <- cutoff
Armin Rauschenberger's avatar
Armin Rauschenberger committed
244
245
  fit$info <- list(type.measure=type.measure,
                   sd.y=stats::sd(y),
Armin Rauschenberger's avatar
Armin Rauschenberger committed
246
247
                   "+"=sum(z==1),
                   "-"=sum(z==0),
Armin Rauschenberger's avatar
Armin Rauschenberger committed
248
                   table=table(z),
Armin Rauschenberger's avatar
Armin Rauschenberger committed
249
                   n=n,p=ncol(X),
Armin Rauschenberger's avatar
Armin Rauschenberger committed
250
                   test=as.data.frame(test))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
251

Armin Rauschenberger's avatar
Armin Rauschenberger committed
252
  class(fit) <- "cornet"
Armin Rauschenberger's avatar
Armin Rauschenberger committed
253
254
255
  return(fit)
}

Armin Rauschenberger's avatar
Armin Rauschenberger committed
256
#--- Methods -------------------------------------------------------------------
Armin Rauschenberger's avatar
Armin Rauschenberger committed
257

Armin Rauschenberger's avatar
Armin Rauschenberger committed
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#' @export
#' @title
#' Combined regression
#'
#' @description
#' Prints summary of cornet object.
#'
#' @param x
#' \link[cornet]{cornet} object
#' 
#' @param ...
#' further arguments (not applicable)
#' 
#' @return
#' Returns sample size \eqn{n},
#' number of covariates \eqn{p},
#' information on dichotomisation,
#' tuned scaling parameter (sigma),
#' tuned weighting parameter (pi),
#' and corresponding loss.
#'
#' @examples
Armin Rauschenberger's avatar
Armin Rauschenberger committed
280
#' n <- 100; p <- 200
Armin Rauschenberger's avatar
Armin Rauschenberger committed
281
282
283
#' y <- rnorm(n)
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
#' net <- cornet(y=y,cutoff=0,X=X)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
284
#' print(net)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
285
286
287
288
289
290
291
292
293
294
295
296
297
298
#' 
print.cornet <- function(x,...){
  cat("cornet object:\n")
  cat(paste0("n = ",x$info$n,","," p = ",x$info$p),"\n")
  cat(paste0("z = I(y > ",signif(x$cutoff,digits=2),"): "))
  cat(paste0(x$info$"+","+"," vs ",x$info$"-","-"),"\n")
  cat(paste0("sigma.min = ",signif(x$sigma.min,digits=1)),"\n")
  cat(paste0("pi.min = ",round(x$pi.min,digits=2)),"\n")
  type <- x$info$type.measure
  value <- signif(x$cvm[names(x$sigma.min),names(x$pi.min)],digits=2)
  cat(paste0(type," = ",value))
  return(invisible(NULL))
}

Armin Rauschenberger's avatar
Armin Rauschenberger committed
299
300
#' @export
#' @title
Armin Rauschenberger's avatar
Armin Rauschenberger committed
301
#' Extract estimated coefficients
Armin Rauschenberger's avatar
Armin Rauschenberger committed
302
303
#'
#' @description
Armin Rauschenberger's avatar
Armin Rauschenberger committed
304
305
#' Extracts estimated coefficients from linear and logistic regression,
#' under the penalty parameter that minimises the cross-validated loss.
Armin Rauschenberger's avatar
Armin Rauschenberger committed
306
307
#'
#' @param object
Armin Rauschenberger's avatar
Armin Rauschenberger committed
308
#' \link[cornet]{cornet} object
Armin Rauschenberger's avatar
Armin Rauschenberger committed
309
310
#' 
#' @param ...
Armin Rauschenberger's avatar
Armin Rauschenberger committed
311
312
313
314
315
#' further arguments (not applicable)
#' 
#' @return
#' This function returns a matrix with \eqn{n} rows and two columns,
#' where \eqn{n} is the sample size. It includes the estimated coefficients
Armin Rauschenberger's avatar
Armin Rauschenberger committed
316
317
#' from linear regression (1st column: \code{"beta"})
#' and logistic regression (2nd column: \code{"gamma"}).
Armin Rauschenberger's avatar
Armin Rauschenberger committed
318
319
#'
#' @examples
Armin Rauschenberger's avatar
Armin Rauschenberger committed
320
#' n <- 100; p <- 200
Armin Rauschenberger's avatar
Armin Rauschenberger committed
321
322
323
#' y <- rnorm(n)
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
#' net <- cornet(y=y,cutoff=0,X=X)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
324
#' coef(net)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
325
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
326
coef.cornet <- function(object,...){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
327
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
328
  if(length(list(...))!=0){warning("Ignoring arguments.")}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
329
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
330
331
332
333
334
  s <- object$gaussian$lambda.min
  beta <- glmnet::coef.glmnet(object=object$gaussian,s=s)
  
  s <- object$binomial$lambda.min
  gamma <- glmnet::coef.glmnet(object=object$binomial,s=s)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
335
336
337
338
339
340
  
  coef <- cbind(beta,gamma)
  colnames(coef) <- c("beta","gamma")
  return(coef)
}

Armin Rauschenberger's avatar
Armin Rauschenberger committed
341
342
#' @export
#' @title
Armin Rauschenberger's avatar
Armin Rauschenberger committed
343
#' Plot loss matrix
Armin Rauschenberger's avatar
Armin Rauschenberger committed
344
345
#'
#' @description
Armin Rauschenberger's avatar
Armin Rauschenberger committed
346
347
#' Plots the loss for different combinations of
#' scaling (sigma) and weighting (pi) parameters.
Armin Rauschenberger's avatar
Armin Rauschenberger committed
348
349
#'
#' @param x
Armin Rauschenberger's avatar
Armin Rauschenberger committed
350
#' \link[cornet]{cornet} object
Armin Rauschenberger's avatar
Armin Rauschenberger committed
351
352
#' 
#' @param ...
Armin Rauschenberger's avatar
Armin Rauschenberger committed
353
#' further arguments (not applicable)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
354
355
356
357
358
359
360
361
362
363
#' 
#' @return
#' This function plots the evaluation loss (\code{cvm}).
#' Whereas the matrix has sigma in the rows, and pi in the columns,
#' the plot has sigma on the \eqn{x}-axis, and pi on the \eqn{y}-axis.
#' For all combinations of sigma and pi, the colour indicates the loss.
#' If the R package \code{RColorBrewer} is installed,
#' blue represents low. Otherwise, red represents low.
#' White always represents high.
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
364
#' @examples
Armin Rauschenberger's avatar
Armin Rauschenberger committed
365
#' n <- 100; p <- 200
Armin Rauschenberger's avatar
Armin Rauschenberger committed
366
367
368
#' y <- rnorm(n)
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
#' net <- cornet(y=y,cutoff=0,X=X)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
369
#' plot(net)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
370
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
371
plot.cornet <- function(x,...){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
372
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
373
  if(length(list(...))!=0){warning("Ignoring arguments.")}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
374

Armin Rauschenberger's avatar
Armin Rauschenberger committed
375
  k <- 100
Armin Rauschenberger's avatar
Armin Rauschenberger committed
376
  levels <- stats::quantile(x$cvm,probs=seq(from=0,to=1,length.out=k+1))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
377
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
378
  # colours
Armin Rauschenberger's avatar
Armin Rauschenberger committed
379
380
381
382
383
384
385
  if("RColorBrewer" %in% .packages(all.available=TRUE)){
    pal <- rev(c("white",RColorBrewer::brewer.pal(n=9,name="Blues")))
    col <- grDevices::colorRampPalette(colors=pal)(k)
  } else {
    col <- grDevices::heat.colors(n=k)
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
386
  nsigma <- length(x$sigma)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
387
  npi <- length(x$pi)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
388
389
390
  
  graphics::plot.new()
  graphics::par(xaxs="i",yaxs="i")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
391
  graphics::plot.window(xlim=c(1-0.5,nsigma+0.5),ylim=c(1-0.5,npi+0.5))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
392
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
393
  graphics::title(xlab=expression(sigma),ylab=expression(pi),cex.lab=1)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
394
  #graphics::.filled.contour(x=seq_along(x$sigma),y=seq_along(x$pi),z=x$cvm,levels=levels,col=col)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
395
  graphics::image(x=seq_along(x$sigma),y=seq_along(x$pi),z=x$cvm,breaks=levels,col=col,add=TRUE)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
396
  graphics::box()
Armin Rauschenberger's avatar
Armin Rauschenberger committed
397
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
398
399
400
401
  ssigma <- which(x$sigma %in% x$sigma.min)
  spi <- which(x$pi %in% x$pi.min)
  
  if(length(ssigma)==1 & length(spi)==1){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
402
    # axes with labels for tuned parameters
Armin Rauschenberger's avatar
Armin Rauschenberger committed
403
404
    graphics::axis(side=1,at=c(1,ssigma,nsigma),labels=signif(x$sigma[c(1,ssigma,nsigma)],digits=2))
    graphics::axis(side=2,at=c(1,spi,npi),labels=signif(x$pi[c(1,spi,npi)],digits=2))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
405
    # point for tuned parameters
Armin Rauschenberger's avatar
Armin Rauschenberger committed
406
    graphics::points(x=ssigma,y=spi,pch=4,col="white",cex=1)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
407
  } else {
Armin Rauschenberger's avatar
Armin Rauschenberger committed
408
    # axes with standard labels
Armin Rauschenberger's avatar
Armin Rauschenberger committed
409
410
411
412
    at <- seq(from=1,to=nsigma,length.out=5)
    graphics::axis(side=1,at=at,labels=signif(x$sigma,digits=2)[at])
    at <- seq(from=1,to=nsigma,length.out=5)
    graphics::axis(side=2,at=at,labels=signif(x$pi,digits=2)[at])
Armin Rauschenberger's avatar
Armin Rauschenberger committed
413
414
415
    # points for selected parameters
    isigma <- sapply(x$sigma.min,function(y) which(x$sigma==y))
    ipi <- sapply(x$pi.min,function(y) which(x$pi==y))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
416
    graphics::points(x=isigma,y=ipi,pch=4,col="white",cex=1)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
417
418
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
419
420
}

Armin Rauschenberger's avatar
Armin Rauschenberger committed
421
422
#' @export
#' @title
Armin Rauschenberger's avatar
Armin Rauschenberger committed
423
#' Predict binary outcome
Armin Rauschenberger's avatar
Armin Rauschenberger committed
424
425
#'
#' @description
Armin Rauschenberger's avatar
Armin Rauschenberger committed
426
427
428
429
430
431
432
433
#' Predicts the binary outcome with linear, logistic, and combined regression.
#' 
#' @details
#' For linear regression, this function tentatively transforms
#' the predicted values to predicted probabilities,
#' using a Gaussian distribution with a fixed mean (threshold)
#' and a fixed variance (estimated variance of the numeric outcome).
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
434
#' @param object
Armin Rauschenberger's avatar
Armin Rauschenberger committed
435
#' \link[cornet]{cornet} object
Armin Rauschenberger's avatar
Armin Rauschenberger committed
436
437
438
439
440
441
442
443
444
445
#' 
#' @param newx
#' covariates\strong{:}
#' numeric matrix with \eqn{n} rows (samples)
#' and \eqn{p} columns (variables)
#' 
#' @param type
#' \code{"probability"}, \code{"odds"}, \code{"log-odds"}
#' 
#' @param ...
Armin Rauschenberger's avatar
Armin Rauschenberger committed
446
#' further arguments (not applicable)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
447
448
#' 
#' @examples
Armin Rauschenberger's avatar
Armin Rauschenberger committed
449
#' n <- 100; p <- 200
Armin Rauschenberger's avatar
Armin Rauschenberger committed
450
451
452
#' y <- rnorm(n)
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
#' net <- cornet(y=y,cutoff=0,X=X)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
453
#' predict(net,newx=X)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
454
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
455
predict.cornet <- function(object,newx,type="probability",...){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
456
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
457
  if(length(list(...))!=0){warning("Ignoring arguments.")}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
458
459
  
  x <- object; rm(object)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
460
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
461
462
  test <- x$info$test
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
463
464
  .check(x=newx,type="matrix")
  .check(x=type,type="string",values=c("probability","odds","log-odds"))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
465
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
466
  # linear and logistic
Armin Rauschenberger's avatar
Armin Rauschenberger committed
467
468
469
470
471
472
  prob <- list()
  link <- as.numeric(stats::predict(object=x$gaussian,
                  newx=newx,s=x$gaussian$lambda.min,type="response"))
  prob$gaussian <- stats::pnorm(q=link,mean=x$cutoff,sd=x$info$sd.y)
  prob$binomial <- as.numeric(stats::predict(object=x$binomial,
                  newx=newx,s=x$binomial$lambda.min,type="response"))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
473
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
474
  # combined
Armin Rauschenberger's avatar
Armin Rauschenberger committed
475
  if(test$combined){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
476
    cont <- stats::pnorm(q=link,mean=x$cutoff,sd=x$sigma.min)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
477
    prob$combined <- x$pi.min*cont + (1-x$pi.min)*prob$binomial
Armin Rauschenberger's avatar
Armin Rauschenberger committed
478
479
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
480
481
  # consistency tests
  lapply(X=prob,FUN=function(p) .check(x=p,type="vector",min=0,max=1))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
482
  .equal(link>x$cutoff,prob$gaussian>0.5)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
483
484
485
486
487
488
489
490
491
492

  # transformation
  if(type=="probability"){
    frame <- prob
  } else if(type=="odds"){
    frame <- lapply(X=prob,FUN=function(x) x/(1-x))
  } else if(type=="log-odds"){
    frame <- lapply(X=prob,FUN=function(x) log(x/(1-x)))
  } else {
    stop("Invalid type.",call.=FALSE)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
493
494
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
495
  return(as.data.frame(frame))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
496
497
}

Armin Rauschenberger's avatar
Armin Rauschenberger committed
498
499
500
#--- Internal functions --------------------------------------------------------

#' @title
Armin Rauschenberger's avatar
Armin Rauschenberger committed
501
#' Equality
Armin Rauschenberger's avatar
Armin Rauschenberger committed
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
#'
#' @description
#' Verifies whether two or more arguments are identical.
#'
#' @param ...
#' scalars, vectors, or matrices of equal dimensions
#' 
#' @param na.rm
#' remove missing values\strong{:}
#' logical
#' 
#' @examples 
#' cornet:::.equal(1,1,1)
#' 
.equal <- function(...,na.rm=FALSE){
  list <- list(...)
  cond <- vapply(X=list,
                 FUN=function(x) all(x==list[[1]],na.rm=na.rm),
                 FUN.VALUE=logical(1))
  if(any(!cond)){
    stop("Unequal elements.",call.=FALSE)
  }
  return(invisible(NULL))
}

#' @title
#' Arguments
#'
#' @description
#' Verifies whether an argument matches formal requirements.
#'
#' @param x
#' argument
#' 
#' @param type
#' character \code{"string"}, \code{"scalar"}, \code{"vector"}, \code{"matrix"}
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
539
540
541
542
#' @param dim
#' vector/matrix dimensionality\strong{:}
#' integer scalar/vector
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
#' @param miss
#' accept missing values\strong{:}
#' logical
#' 
#' @param min
#' lower limit\strong{:}
#' numeric
#' 
#' @param max
#' upper limit\strong{:}
#' numeric
#' 
#' @param values
#' only accept specific values\strong{:}
#' vector 
#' 
#' @param inf
#' accept infinite (\code{Inf} or \code{-Inf}) values\strong{:}
#' logical
#' 
#' @param null
#' accept \code{NULL}\strong{:}
#' logical
#'
#' @examples
#' cornet:::.check(0.5,type="scalar",min=0,max=1)
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
570
.check <- function(x,type,dim=NULL,miss=FALSE,min=NULL,max=NULL,values=NULL,inf=FALSE,null=FALSE){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
  name <- deparse(substitute(x))
  if(null && is.null(x)){
    return(invisible(NULL)) 
  }
  if(type=="string"){
    cond <- is.vector(x) & is.character(x) & length(x)==1
  } else if(type=="scalar"){
    cond <- is.vector(x) & is.numeric(x) & length(x)==1
  } else if(type=="vector"){
    cond <- is.vector(x) & is.numeric(x)
  } else if(type=="matrix"){
    cond <- is.matrix(x) & is.numeric(x)
  } else {
    warning("Unknown type.")
  }
  if(!cond){
    stop(paste0("Argument \"",name,"\" does not match formal requirements."),call.=FALSE)
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
589
  if(!is.null(dim) && length(dim)==1 && length(x)!=dim){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
590
591
      stop(paste0("Argument \"",name,"\" has invalid length."),call.=FALSE)
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
592
  if(!is.null(dim) && length(dim)>1 && any(dim(x)!=dim,na.rm=TRUE)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
      stop(paste0("Argument \"",name,"\" has invalid dimensions."),call.=FALSE)
  }
  
  #   } else if(length(dim)==2){
  #     if(!is.na(dim[1]) && nrow(x)!=dim[1]){
  #       stop(paste0("Argument \"",name,"\" has invalid row number."),call.=FALSE)
  #     }
  #     if(!is.na(dim[2]) && ncol(x)!=dim[2]){
  #       stop(paste0("Argument \"",name,"\" has invalid column number."),call.=FALSE)
  #     }
  #   } else {
  #     
  #   }
  # }
  
  # if(!is.null(length) && length(x)!=length){
  #   stop(paste0("Argument \"",name,"\" has invalid length."),call.=FALSE)
  # }
  # if(!is.null(nrow) && nrow(x)!=nrow){
  #   stop(paste0("Argument \"",name,"\" has invalid row number."),call.=FALSE)
  # }
  # if(!is.null(ncol) && ncol(x)!=ncol){
  #   stop(paste0("Argument \"",name,"\" has invalid column number."),call.=FALSE)
  # }
  
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
619
620
621
622
623
624
625
626
627
  if(!miss && any(is.na(x))){
    stop(paste0("Argument \"",name,"\" contains missing values."),call.=FALSE)
  }
  if(!is.null(min) && any(x<min)){
    stop(paste0("expecting ",name," >= ",min),call.=FALSE)
  }
  if(!is.null(max) && any(x>max)){
    stop(paste0("expecting ",name," <= ",max),call.=FALSE)
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
628
  if(!is.null(values) && any(!x %in% values)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
629
630
631
632
633
634
635
636
    stop(paste0("Argument \"",name,"\" contains invalid values."),call.=FALSE)
  }
  if(!inf && any(is.infinite(values))){
    stop(paste0("Argument \"",name,"\ contains infinite values."),call.=FALSE)
  }
  return(invisible(NULL))
}

Armin Rauschenberger's avatar
Armin Rauschenberger committed
637
#--- Application ---------------------------------------------------------------
Armin Rauschenberger's avatar
Armin Rauschenberger committed
638

Armin Rauschenberger's avatar
Armin Rauschenberger committed
639
#' @title
Armin Rauschenberger's avatar
Armin Rauschenberger committed
640
#' Performance measurement
Armin Rauschenberger's avatar
Armin Rauschenberger committed
641
642
#'
#' @description
Armin Rauschenberger's avatar
Armin Rauschenberger committed
643
#' Compares models for a continuous response with a cut-off value.
Armin Rauschenberger's avatar
Armin Rauschenberger committed
644
645
646
647
648
649
650
#' 
#' @details
#' Uses k-fold cross-validation,
#' fits linear, logistic, and combined regression,
#' calculates different loss functions,
#' and examines squared deviance residuals.
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
651
#' @inheritParams  cornet
Armin Rauschenberger's avatar
Armin Rauschenberger committed
652
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
653
#' @examples
Armin Rauschenberger's avatar
Armin Rauschenberger committed
654
655
656
657
658
#' \dontshow{n <- 100; p <- 20
#' y <- rnorm(n)
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
#' loss <- cornet:::.compare(y=y,cutoff=0,X=X,nfolds=2)
#' loss}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
659
660
661
662
663
#' \donttest{n <- 100; p <- 200
#' y <- rnorm(n)
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
#' loss <- cornet:::.compare(y=y,cutoff=0,X=X)
#' loss}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
664
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
665
.compare <- function(y,cutoff,X,alpha=1,nfolds=5,foldid=NULL,type.measure="deviance"){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
666
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
667
  z <- 1*(y > cutoff)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
668
  if(is.null(foldid)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
669
    fold <- palasso:::.folds(y=z,nfolds=nfolds)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
670
671
672
  } else {
    fold <- foldid
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
673
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
674
675
676
  #--- cross-validated loss ---
  
  cols <- c("gaussian","binomial","combined")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
677
678
  pred <- matrix(data=NA,nrow=length(y),ncol=length(cols),
                 dimnames=list(NULL,cols))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
679
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
680
  for(i in seq_len(nfolds)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
681
    fit <- cornet::cornet(y=y[fold!=i],cutoff=cutoff,X=X[fold!=i,],alpha=alpha,type.measure=type.measure)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
682
683
    tryCatch(expr=plot.cornet(fit),error=function(x) NULL)
    temp <- predict.cornet(fit,newx=X[fold==i,])
Armin Rauschenberger's avatar
Armin Rauschenberger committed
684
    if(any(temp<0|temp>1)){stop("Outside unit interval.",call.=FALSE)}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
685
686
687
688
    model <- colnames(pred)
    for(j in seq_along(model)){
      pred[fold==i,model[j]] <- temp[[model[j]]]
    }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
689
690
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
691
  type <- c("deviance","class","mse","mae","auc")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
692
  loss <- lapply(X=type,FUN=function(x) palasso:::.loss(y=z,fit=pred,family="binomial",type.measure=x,foldid=fold)[[1]])
Armin Rauschenberger's avatar
Armin Rauschenberger committed
693
  names(loss) <- type
Armin Rauschenberger's avatar
Armin Rauschenberger committed
694
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
695
  #--- deviance residuals ---
Armin Rauschenberger's avatar
Armin Rauschenberger committed
696
697
698
699
700
701
702
  
  # squared deviance residuals
  limit <- 1e-05
  pred[pred < limit] <- limit
  pred[pred > 1 - limit] <- 1 - limit
  res <- -2 * (z * log(pred) + (1 - z) * log(1 - pred))
  rxs <- res[,"binomial"]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
703
704
705
706
  rys <- res[,"combined"]
  
  # residual increase/decrease
  loss$resid.factor <- stats::median((rys-rxs)/rxs)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
707
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
708
709
  # paired test for each fold
  loss$resid.pvalue <- numeric()
Armin Rauschenberger's avatar
Armin Rauschenberger committed
710
711
  for(i in seq_len(nfolds)){
    cond <- fold==i
Armin Rauschenberger's avatar
Armin Rauschenberger committed
712
    loss$resid.pvalue[i] <- stats::wilcox.test(x=rxs[cond],y=rys[cond],
Armin Rauschenberger's avatar
Armin Rauschenberger committed
713
                                               paired=TRUE,alternative="greater")$p.value
Armin Rauschenberger's avatar
Armin Rauschenberger committed
714
715
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
716
  return(loss)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
717
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
718
719
720
}

#' @title
Armin Rauschenberger's avatar
Armin Rauschenberger committed
721
#' Single-split test
Armin Rauschenberger's avatar
Armin Rauschenberger committed
722
723
#'
#' @description
Armin Rauschenberger's avatar
Armin Rauschenberger committed
724
#' Compares models for a continuous response with a cut-off value.
Armin Rauschenberger's avatar
Armin Rauschenberger committed
725
726
727
728
729
730
731
#' 
#' @details
#' Splits samples into 80% for training and 20% for testing,
#' calculates squared deviance residuals of logistic and combined regression,
#' conducts the paired one-sided Wilcoxon signed rank test,
#' and returns the p-value.
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
732
733
734
#' @inheritParams cornet
#' 
#' @examples
Armin Rauschenberger's avatar
Armin Rauschenberger committed
735
#' n <- 100; p <- 200
Armin Rauschenberger's avatar
Armin Rauschenberger committed
736
737
#' y <- rnorm(n)
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
738
#' cornet:::.test(y=y,cutoff=0,X=X)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
739
740
741
742
#' 
.test <- function(y,cutoff,X,alpha=1,type.measure="deviance"){
  
  z <- 1*(y > cutoff)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
743
  fold <- palasso:::.folds(y=z,nfolds=5)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
744
745
746
  fold <- ifelse(fold==1,1,0)
  
  fit <- cornet::cornet(y=y[fold==0],cutoff=cutoff,X=X[fold==0,],alpha=alpha)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
747
748
  tryCatch(expr=plot.cornet(fit),error=function(x) NULL)
  pred <- predict.cornet(fit,newx=X[fold==1,])
Armin Rauschenberger's avatar
Armin Rauschenberger committed
749
750
751
  if(any(pred<0|pred>1)){stop("Outside unit interval.",call.=FALSE)}
  
  #res <- (pred-z[fold==1])^2 # MSE
Armin Rauschenberger's avatar
Armin Rauschenberger committed
752
  #pvalue <- wilcox.test(x=res[,"binomial"],y=res[,"combined"],paired=TRUE,alternative="greater")$p.value
Armin Rauschenberger's avatar
Armin Rauschenberger committed
753
  #colMeans(abs(pred-0.5)) # distance from 0.5
Armin Rauschenberger's avatar
Armin Rauschenberger committed
754
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
755
756
757
  limit <- 1e-05
  pred[pred < limit] <- limit
  pred[pred > 1 - limit] <- 1 - limit
Armin Rauschenberger's avatar
Armin Rauschenberger committed
758
  res <- -2 * (z[fold==1] * log(pred) + (1 - z[fold==1]) * log(1 - pred))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
759
  pvalue <- stats::wilcox.test(x=res[,"binomial"],y=res[,"combined"],paired=TRUE,alternative="greater")$p.value
Armin Rauschenberger's avatar
Armin Rauschenberger committed
760
761
762
763
  
  return(pvalue)
}

Armin Rauschenberger's avatar
Armin Rauschenberger committed
764
#' @title
Armin Rauschenberger's avatar
Armin Rauschenberger committed
765
#' Data simulation
Armin Rauschenberger's avatar
Armin Rauschenberger committed
766
767
#'
#' @description
Armin Rauschenberger's avatar
Armin Rauschenberger committed
768
#' Simulates data for unit tests
Armin Rauschenberger's avatar
Armin Rauschenberger committed
769
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
770
771
772
#' @param n
#' sample size\strong{:}
#' positive integer
Armin Rauschenberger's avatar
Armin Rauschenberger committed
773
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
774
775
776
#' @param p
#' covariate space\strong{:}
#' positive integer
Armin Rauschenberger's avatar
Armin Rauschenberger committed
777
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
778
779
780
#' @param prob
#' (approximate) proportion of causal covariates\strong{:}
#' numeric between \eqn{0} and \eqn{1}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
781
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
782
783
784
#' @param fac
#' noise factor\strong{:}
#' positive real number
Armin Rauschenberger's avatar
Armin Rauschenberger committed
785
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
786
787
#' @return
#' Returns invisible list with elements \code{y} and \code{X}.
Armin Rauschenberger's avatar
Armin Rauschenberger committed
788
789
#' 
#' @examples
Armin Rauschenberger's avatar
Armin Rauschenberger committed
790
791
#' data <- cornet:::.simulate(n=10,p=20,prob=0.2,fac=2)
#' names(data)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
792
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
793
794
795
796
797
798
799
800
.simulate <- function(n,p,prob=0.2,fac=1){
  beta <- stats::rnorm(n=p)
  cond <- stats::rbinom(n=p,size=1,prob=prob)
  beta[cond==0] <- 0
  X <- matrix(stats::rnorm(n=n*p),nrow=n,ncol=p)
  mean <- X %*% beta
  y <- stats::rnorm(n=n,mean=mean,sd=fac*stats::sd(mean))
  return(invisible(list(y=y,X=X)))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
801
802
}

Armin Rauschenberger's avatar
Armin Rauschenberger committed
803

Armin Rauschenberger's avatar
Armin Rauschenberger committed
804
#--- Legacy --------------------------------------------------------------------
Armin Rauschenberger's avatar
Armin Rauschenberger committed
805

Armin Rauschenberger's avatar
Armin Rauschenberger committed
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
# # Import this function from the palasso package.
# .loss <- function (y,fit,family,type.measure,foldid=NULL){
#   if (!is.list(fit)) {
#     fit <- list(fit)
#   }
#   loss <- list()
#   for (i in seq_along(fit)) {
#     if (is.vector(fit[[i]])) {
#       fit[[i]] <- as.matrix(fit[[i]])
#     }
#     if (is.null(foldid) & (family == "cox" | type.measure == 
#                            "auc")) {
#       stop("Missing foldid.", call. = FALSE)
#     }
#     if (family == "gaussian") {
#       if (type.measure %in% c("deviance", "mse")) {
#         loss[[i]] <- apply(X = fit[[i]], MARGIN = 2, 
#                            FUN = function(x) mean((x - y)^2))
#       }
#       else if (type.measure == "mae") {
#         loss[[i]] <- apply(X = fit[[i]], MARGIN = 2, 
#                            FUN = function(x) mean(abs(x - y)))
#       }
#       else {
#         stop("Invalid type measure.", call. = FALSE)
#       }
#     }
#     else if (family == "binomial") {
#       if (type.measure == "deviance") {
#         limit <- 1e-05
#         fit[[i]][fit[[i]] < limit] <- limit
#         fit[[i]][fit[[i]] > 1 - limit] <- 1 - limit
#         loss[[i]] <- apply(X = fit[[i]], MARGIN = 2, 
#                            FUN = function(x) mean(-2 * (y * log(x) + (1 - 
#                                                                         y) * log(1 - x))))
#       }
#       else if (type.measure == "mse") {
#         loss[[i]] <- apply(X = fit[[i]], MARGIN = 2, 
#                            FUN = function(x) 2 * mean((x - y)^2))
#       }
#       else if (type.measure == "mae") {
#         loss[[i]] <- apply(X = fit[[i]], MARGIN = 2, 
#                            FUN = function(x) 2 * mean(abs(x - y)))
#       }
#       else if (type.measure == "class") {
#         loss[[i]] <- apply(X = fit[[i]], MARGIN = 2, 
#                            FUN = function(x) mean(abs(round(x) - y)))
#       }
#       else if (type.measure == "auc") {
#         weights <- table(foldid)
#         cvraw <- matrix(data = NA, nrow = length(weights), 
#                         ncol = ncol(fit[[i]])) # typo in palasso package !
#         for (k in seq_along(weights)) {
#           cvraw[k, ] <- apply(X = fit[[i]], MARGIN = 2, 
#                               FUN = function(x) glmnet::auc(y = y[foldid == 
#                                                                     k], prob = x[foldid == k]))
#         }
#         loss[[i]] <- apply(X = cvraw, MARGIN = 2, FUN = function(x) stats::weighted.mean(x = x, 
#                                                                                          w = weights, na.rm = TRUE))
#         names(loss[[i]]) <- colnames(fit[[i]]) # typo in palasso package!
#       }
#       else {
#         stop("Invalid type measure.", call. = FALSE)
#       }
#     }
#     else if (family == "poisson") {
#       if (type.measure == "deviance") {
#         loss[[i]] <- apply(X = fit[[i]], MARGIN = 2, 
#                            FUN = function(x) mean(2 * (ifelse(y == 0, 
#                                                               0, y * log(y/x)) - y + x), na.rm = TRUE))
#       }
#       else if (type.measure == "mse") {
#         loss[[i]] <- apply(X = fit[[i]], MARGIN = 2, 
#                            FUN = function(x) mean((x - y)^2))
#       }
#       else if (type.measure == "mae") {
#         loss[[i]] <- apply(X = fit[[i]], MARGIN = 2, 
#                            FUN = function(x) mean(abs(x - y)))
#       }
#       else {
#         stop("Invalid type measure.", call. = FALSE)
#       }
#     }
#     else if (family == "cox") {
#       if (type.measure == "deviance") {
#         weights <- tapply(X = y[, "status"], INDEX = foldid, 
#                           FUN = sum)
#         loss[[i]] <- apply(X = fit[[i]], MARGIN = 2, 
#                            FUN = function(x) stats::weighted.mean(x = x/weights, 
#                                                                   w = weights, na.rm = TRUE))
#       }
#       else {
#         stop("Invalid type measure.", call. = FALSE)
#       }
#     }
#     else {
#       stop("Invalid family.", call. = FALSE)
#     }
#     if (sum(diff(is.na(loss[[i]]))) == 1) {
#       loss[[i]] <- loss[[i]][!is.na(loss[[i]])]
#     }
#   }
#   return(loss)
# }
# 
# # Import this function from the palasso package.
# .folds <- function (y, nfolds, foldid = NULL){
#   if(!is.null(foldid)){
#     return(foldid)
#   }
#   #if (survival::is.Surv(y)){ # active in palasso
#   #  y <- y[, "status"] # active in palasso
#   #} # active in palasso
#   if(all(y %in% c(0, 1))){
#     foldid <- rep(x = NA, times = length(y))
#     foldid[y == 0] <- sample(x = rep(x = seq_len(nfolds), 
#                                      length.out = sum(y == 0)))
#     foldid[y == 1] <- sample(x = rep(x = seq_len(nfolds), 
#                                      length.out = sum(y == 1)))
#   } else {
#     foldid <- sample(x = rep(x = seq_len(nfolds), length.out = length(y)))
#   }
#   return(foldid)
# }