functions.R 45.8 KB
Newer Older
Armin Rauschenberger's avatar
Armin Rauschenberger committed
1

Armin Rauschenberger's avatar
Armin Rauschenberger committed
2
#--- import unexported functions:
Armin Rauschenberger's avatar
Armin Rauschenberger committed
3
# FUNCTION <- get("FUNCTION",envir=asNamespace("PACKAGE"))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
4

Armin Rauschenberger's avatar
Armin Rauschenberger committed
5
6
7
#--- deactivate on Solaris:
# if(!grepl('SunOS',Sys.info()['sysname'])){}

Armin Rauschenberger's avatar
Armin Rauschenberger committed
8
#--- Main function -------------------------------------------------------------
Armin Rauschenberger's avatar
Armin Rauschenberger committed
9

Armin Rauschenberger's avatar
Armin Rauschenberger committed
10
11
#' @export
#' @title
Armin Rauschenberger's avatar
Armin Rauschenberger committed
12
#' Multivariate Elastic Net Regression
Armin Rauschenberger's avatar
Armin Rauschenberger committed
13
14
#' 
#' @description
Armin Rauschenberger's avatar
Armin Rauschenberger committed
15
16
17
18
19
#' Implements multivariate elastic net regression.
#'  
#' @param Y
#' outputs\strong{:}
#' numeric matrix with \eqn{n} rows (samples)
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
20
#' and \eqn{q} columns (outputs)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
21
22
#' 
#' @param X
Armin Rauschenberger's avatar
Armin Rauschenberger committed
23
#' inputs\strong{:}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
24
#' numeric matrix with \eqn{n} rows (samples)
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
25
#' and \eqn{p} columns (inputs)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
26
27
28
29
30
31
32
33
34
#'
#' @param family
#' distribution\strong{:}
#' vector of length \eqn{1} or \eqn{q} with entries
#' \code{"gaussian"}, \code{"binomial"} or \code{"poisson"}
#'
#' @param nfolds
#' number of folds
#'
Armin Rauschenberger's avatar
Armin Rauschenberger committed
35
36
#' @param foldid
#' fold identifiers\strong{:}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
37
#' vector of length \eqn{n} with entries between \eqn{1} and \code{nfolds};
Armin Rauschenberger's avatar
Armin Rauschenberger committed
38
#' or \code{NULL} (balance)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
39
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
40
#' @param type.measure
Armin Rauschenberger's avatar
Armin Rauschenberger committed
41
42
43
44
45
46
47
48
#' loss function\strong{:}
#' vector of length \eqn{1} or \eqn{q} with entries
#' \code{"deviance"}, \code{"class"}, \code{"mse"} or \code{"mae"}
#' (see \code{\link[glmnet]{cv.glmnet}})
#'
#' @param alpha.base
#' elastic net mixing parameter for base learners\strong{:}
#' numeric between \eqn{0} (ridge) and \eqn{1} (lasso)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
49
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
50
#' @param alpha.meta
Armin Rauschenberger's avatar
Armin Rauschenberger committed
51
#' elastic net mixing parameter for meta learners\strong{:}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
52
#' numeric between \eqn{0} (ridge) and \eqn{1} (lasso)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
53
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
54
#' @param weight
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
55
56
57
58
59
60
61
62
63
64
#' input-output effects\strong{:}
#' matrix with \eqn{p} rows (inputs) and \eqn{q} columns (outputs)
#' with entries \eqn{0} (exclude) and \eqn{1} (include),
#' or \code{NULL} (see details)
#' 
#' @param sign
#' output-output effects\strong{:}
#' matrix with \eqn{q} rows ("meta-inputs") and \eqn{q} columns (outputs), 
#' with entries \eqn{-1} (negative), \eqn{0} (none),
#' \eqn{1} (positive) and \eqn{NA} (any),
Armin Rauschenberger's avatar
Armin Rauschenberger committed
65
66
#' or \code{NULL} (see details)
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
67
68
69
#' @param ...
#' further arguments passed to \code{\link[glmnet]{glmnet}}
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
70
#' @references 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
71
#' Armin Rauschenberger, Enrico Glaab (2021)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
72
#' "Predicting correlated outcomes from molecular data"
Armin Rauschenberger's avatar
Armin Rauschenberger committed
73
#' \emph{Manuscript in preparation}.
Armin Rauschenberger's avatar
Armin Rauschenberger committed
74
75
#' 
#' @details
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
76
77
78
79
80
#' TO BE DELETED:
#' constraint
#' non-negativity constraints\strong{:}
#' logical (see details)
#' non-negativity constraints\strong{:}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
81
82
83
84
85
86
#' If it is reasonable to assume that the outcomes
#' are \emph{positively} correlated
#' (potentially after changing the sign of some outcomes)
#' we recommend to set \code{constraint=TRUE}.
#' Then non-negativity constraints are imposed on the meta learner.
#' 
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#' \strong{input-output relations:}
#' In this matrix with \eqn{p} rows and \eqn{q} columns,
#' the entry in the \eqn{j}th row and the \eqn{k}th column
#' indicates whether the \eqn{j}th input may be used for 
#' modelling the \eqn{k}th output
#' (where \eqn{0} means yes and
#' \eqn{1} means no).
#' By default, 
#' 
#' \strong{output-output relations:}
#' In this matrix with \eqn{q} rows and \eqn{q} columns,
#' the entry in the \eqn{k}th row and the \eqn{l}th column
#' indicates how the \eqn{k}th output may be used for
#' modelling the \eqn{l}th output
#' (where \eqn{-1} means negative effect,
#' \eqn{0} means no effect,
#' \eqn{1} means positive effect,
#' and \eqn{NA} means any effect).
#' All entries on the diagonal must equal \eqn{1}.
#' By default (\code{sign=NULL}),
#' Kendall correlation determines this matrix,
#' with \eqn{-1} for significant negative, \eqn{0} for insignificant,
#' \eqn{1} for significant positive correlations.
#' The short-cuts \code{sign=1} and \code{code=NA} set all off-diagonal entries
#' equal to \eqn{1} (non-negativity constraints)
#' or \code{NA} (no constraints), respectively.
#' The former short-cut is useful if all pairs of outcomes
#' are assumed to be \emph{positively} correlated
#' (potentially after changing the sign of some outcomes).
Armin Rauschenberger's avatar
Armin Rauschenberger committed
116
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
117
#' \strong{elastic net:}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
118
119
#' \code{alpha.base} controls input-output effects,
#' \code{alpha.meta} controls output-output effects;
Armin Rauschenberger's avatar
Armin Rauschenberger committed
120
121
122
123
124
125
126
127
128
129
130
#' lasso renders sparse models (\code{alpha}\eqn{=1}),
#' ridge renders dense models (\code{alpha}\eqn{=0})
#' 
#' @return
#' This function returns an object of class \code{joinet}.
#' Available methods include
#' \code{\link[=predict.joinet]{predict}},
#' \code{\link[=coef.joinet]{coef}},
#' and \code{\link[=weights.joinet]{weights}}.
#' The slots \code{base} and \code{meta} each contain
#' \eqn{q} \code{\link[glmnet]{cv.glmnet}}-like objects.
Armin Rauschenberger's avatar
Armin Rauschenberger committed
131
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
132
#' @seealso
Armin Rauschenberger's avatar
Armin Rauschenberger committed
133
#' \code{\link{cv.joinet}}, vignette
Armin Rauschenberger's avatar
Armin Rauschenberger committed
134
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
135
#' @examples
Armin Rauschenberger's avatar
Armin Rauschenberger committed
136
#' \dontshow{
Armin Rauschenberger's avatar
Armin Rauschenberger committed
137
#' if(!grepl('SunOS',Sys.info()['sysname'])){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
138
#' n <- 50; p <- 100; q <- 3
Armin Rauschenberger's avatar
Armin Rauschenberger committed
139
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
140
#' Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5])))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
141
#' object <- joinet(Y=Y,X=X)}}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
142
143
144
145
#' \dontrun{
#' n <- 50; p <- 100; q <- 3
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
#' Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5])))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
146
#' object <- joinet(Y=Y,X=X)}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
147
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
148
149
150
#' \dontrun{
#' browseVignettes("joinet") # further examples}
#' 
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
151
152
joinet <- function(Y,X,family="gaussian",nfolds=10,foldid=NULL,type.measure="deviance",alpha.base=1,alpha.meta=1,weight=NULL,sign=NULL,...){
  # VERIFY CODE FOR WEIGHT AND SIGN!
Armin Rauschenberger's avatar
Armin Rauschenberger committed
153
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
154
  #--- temporary ---
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
155
  # family <- "gaussian"; nfolds <- 10; foldid <- NULL; type.measure <- "deviance"; alpha.base <- alpha.meta <- 1; weight <- NULL; sign <- NULL
Armin Rauschenberger's avatar
Armin Rauschenberger committed
156
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
157
  #--- checks ---
Armin Rauschenberger's avatar
Armin Rauschenberger committed
158
159
160
  Y <- as.matrix(Y)
  X <- as.matrix(X)
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
161
  cornet:::.check(x=Y,type="matrix",miss=TRUE)
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
162
163
164
165
166
167
168
169
170
171
  #if(constraint){
  #  for(i in 1:(ncol(Y)-1)){
  #    for(j in i:ncol(Y)){
  #      cor <- stats::cor.test(Y[,i],Y[,j],use="pairwise.complete.obs")
  #      if(cor$estimate<0 & cor$p.value<0.05){
  #        warning(paste("Columns",i,"and",j,"are negatively correlated. Consider using constraint=FALSE."),call.=FALSE)
  #      }
  #    }
  #  }
  #}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
172
  #if(any(stats::cor(Y,use="pairwise.complete.obs")<0,na.rm=TRUE)){warning("Negative correlation!",call.=FALSE)}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
173
  cornet:::.check(x=X,type="matrix")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
174
  #cornet:::.check(x=family,type="vector",values=c("gaussian","binomial","poisson"))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
175
  if(nrow(Y)!=nrow(X)){stop("Contradictory sample size.",call.=FALSE)}
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
176
177
  n <- nrow(Y); q <- ncol(Y); p <- ncol(X)
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
178
179
180
181
182
  cornet:::.check(x=nfolds,type="scalar",min=3)
  cornet:::.check(x=foldid,type="vector",values=seq_len(nfolds),null=TRUE)
  cornet:::.check(x=type.measure,type="string",values=c("deviance","class","mse","mae")) # not auc (min/max confusion)
  cornet:::.check(x=alpha.base,type="scalar",min=0,max=1)
  cornet:::.check(x=alpha.meta,type="scalar",min=0,max=1)
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
183
  cornet:::.check(x=weight,type="matrix",min=0,max=1,null=TRUE,dim=c(p,q))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
184
185
  if(!is.null(c(list(...)$lower.limits,list(...)$upper.limits))){
    stop("Reserved arguments \"lower.limits\" and \"upper.limits\".",call.=FALSE)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
186
187
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
188
  if(is.null(weight)){
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
189
    pf <- matrix(1,nrow=p,ncol=q)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
190
191
192
193
  } else {
    pf <- 1/weight
  }
  
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
  #--- constraints --- 
  null <- is.null(sign)
  if(null){sign <- 0}
  if(!is.matrix(sign)){
    sign <- matrix(sign,nrow=q,ncol=q)
    diag(sign) <- 1
  }
  if(any(diag(sign)!=1)){
    warning("Matrix \"sign\" requires ones on the diagonal.",call.=FALSE)
  }
  temp <- sign[lower.tri(sign)|upper.tri(sign)]
  if(!null & all(!is.na(temp)&temp==0)){
    warning("Matrix \"sign\" only has zeros off the diagonal.",call.=FALSE)
  }
  for(i in seq(from=1,to=q,by=1)){
    for(j in seq(from=i,to=q,by=1)){
      cor <- stats::cor.test(Y[,i],Y[,j],use="pairwise.complete.obs",method="kendall")
      if(cor$p.value>0.05){next}
      if(null){
        sign[i,j] <- sign[j,i] <- sign(cor$estimate)
      } else if(!is.na(sign[i,j]) & sign[i,j]*sign(cor$estimate)==-1){
        warning(paste("Outputs",i,"and",j,"have a significant",ifelse(sign(cor$estimate)==1,"*positive*","*negative*"),"correlation. Consider changing argument \"sign\" (e.g. sign=NULL)."),call.=FALSE)
      }
    }
  }

  # # long version
  # 
  # if(is.null(sign)){
  #   sign <- matrix(0,nrow=q,ncol=q)
  #   for(i in seq(from=1,to=q,by=1)){
  #     for(j in seq(from=i,to=q,by=1)){
  #       cor <- stats::cor.test(Y[,i],Y[,j],use="pairwise.complete.obs",method="kendall")
  #       if(cor$p.value<0.05){
  #         sign[i,j] <- sign[j,i] <- sign(cor$estimate)
  #       }
  #     }
  #   }
  # } else {
  #   if(!is.matrix(sign)){
  #     sign <- matrix(sign,nrow=q,ncol=q) # accept 1 and NA
  #   }
  #   for(i in seq(from=1,to=q,by=1)){
  #     for(j in seq(from=i,to=q,by=1)){
  #       cor <- stats::cor.test(Y[,i],Y[,j],use="pairwise.complete.obs",method="kendall")
  #       if(!is.na(sign[i,j]) & cor$p.value<0.05 & sign[i,j]*sign(cor$estimate)==-1){
  #         warning(paste("Outputs",i,"and",j,"have an unexpected significant correlation. Consider using sign=NULL."),call.=FALSE)
  #       }
  #     }
  #   }
  # }
  
  
  ## old snippets
  
  #if(is.null(sign)){
  #  sign <- 0   # 0 or NA or 1
  #  check <- FALSE
  #} else {
  #  check <- TRUE
  #}
  #
  #if(is.matrix(sign)){
  #  # perform checks
  #} if else(!is.matrix(sign) & !is.null(sign)){
  #  sign <- matrix(sign,nrow=q,ncol=q)
  #  # perform checks
  #} else {  
  #  if(is.null(sign)){sign <- 0}  # 0 or NA or 1
  #  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
265
266
267
268
269
  #--- family ---
  if(length(family)==1){
    family <- rep(family,times=q)
  } else if(length(family)!=q){
    stop("Invalid argument family",call.=FALSE)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
270
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
271
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
272
273
274
275
  #--- fold identifiers ---
  # provide foldid as matrix?
  if(is.null(foldid)){
    foldid <- palasso:::.folds(y=Y[,1],nfolds=nfolds) # temporary Y[,1]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
276
  } else {
Armin Rauschenberger's avatar
Armin Rauschenberger committed
277
    nfolds <- length(unique(foldid))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
278
279
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
280
281
282
283
284
285
  #--- full fit ---
  nlambda <- numeric()
  base <- lapply(seq_len(q),function(x) list())
  for(i in seq_len(q)){
    cond <- !is.na(Y[,i])
    #if(sum(cond)==0){nlambda[i] <- 0; next}
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
286
    base[[i]]$glmnet.fit <- glmnet::glmnet(y=Y[cond,i],x=X[cond,],family=family[i],alpha=alpha.base,penalty.factor=pf[,i],...) # ellipsis
Armin Rauschenberger's avatar
Armin Rauschenberger committed
287
288
289
    base[[i]]$lambda <- base[[i]]$glmnet.fit$lambda
    nlambda[i] <- length(base[[i]]$glmnet.fit$lambda)
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
290
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
291
292
293
294
  #--- predictions ---
  link <- list()
  for(i in seq_len(q)){
    link[[i]] <- matrix(data=NA,nrow=n,ncol=nlambda[i])
Armin Rauschenberger's avatar
Armin Rauschenberger committed
295
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
296
297
  
  #--- base cross-validation ---
Armin Rauschenberger's avatar
Armin Rauschenberger committed
298
  for(k in seq_len(nfolds)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
299
300
    Y0 <- Y[foldid!=k,,drop=FALSE]
    Y1 <- Y[foldid==k,,drop=FALSE]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
301
302
    X0 <- X[foldid!=k,,drop=FALSE]
    X1 <- X[foldid==k,,drop=FALSE]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
303
304
305
    for(i in seq_len(q)){
      cond <- !is.na(Y0[,i])
      #if(sum(cond)==0){next}
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
306
      object <- glmnet::glmnet(y=Y0[cond,i],x=X0[cond,],family=family[i],alpha=alpha.base,penalty.factor=pf[,i],...) # ellipsis
Armin Rauschenberger's avatar
Armin Rauschenberger committed
307
308
309
      temp <- stats::predict(object=object,newx=X1,type="link",
                             s=base[[i]]$glmnet.fit$lambda)
      link[[i]][foldid==k,seq_len(ncol(temp))] <- temp
Armin Rauschenberger's avatar
Armin Rauschenberger committed
310
    }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
311
312
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
313
314
315
316
317
318
319
  #--- tune base lambdas ---
  for(i in seq_len(q)){
    fit <- .mean.function(link[[i]],family=family[i])
    cond <- !is.na(Y[,i])
    base[[i]]$cvm <- palasso:::.loss(y=Y[cond,i],fit=fit[cond,],
                                     family=family[i],type.measure=type.measure)[[1]]
    base[[i]]$lambda.min <- base[[i]]$lambda[which.min(base[[i]]$cvm)]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
320
    class(base[[i]]) <- "cv.glmnet" # trial 2020-01-10
Armin Rauschenberger's avatar
Armin Rauschenberger committed
321
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
322
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
323
324
325
326
327
  #--- predictions ---
  hat <- matrix(NA,nrow=n,ncol=q)
  for(i in seq_len(q)){
    hat[,i] <- link[[i]][,base[[i]]$lambda==base[[i]]$lambda.min]
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
328
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
329
330
331
  #--- meta cross-validation ---
  meta <- list()
  for(i in seq_len(q)){
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
332
333
334
335
336
337
    # trial start
    lower.limits <- rep(-Inf,times=q)
    upper.limits <- rep(Inf,times=q)
    lower.limits[sign[,i]>=0] <- 0
    upper.limits[sign[,i]<=0] <- 0
    # trial end
Armin Rauschenberger's avatar
Armin Rauschenberger committed
338
339
    cond <- !is.na(Y[,i])
    meta[[i]] <- glmnet::cv.glmnet(y=Y[cond,i],x=hat[cond,],
Armin Rauschenberger's avatar
readme    
Armin Rauschenberger committed
340
341
                                   lower.limits=lower.limits, # was first lower.limits=0 and later ifelse(constraint,0,-Inf) 
                                   upper.limits=upper.limits, # was first upper.limits=Inf
Armin Rauschenberger's avatar
Armin Rauschenberger committed
342
343
344
345
346
347
                                   foldid=foldid[cond],
                                   family=family[i],
                                   type.measure=type.measure,
                                   intercept=TRUE, # with intercept
                                   alpha=alpha.meta,...) # ellipsis
    # consider trying different alpha.meta and selecting best one
Armin Rauschenberger's avatar
Armin Rauschenberger committed
348
349
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
350
  #--- return ---
Armin Rauschenberger's avatar
Armin Rauschenberger committed
351
352
353
  names(base) <- names(meta) <- paste0("y",seq_len(q))
  info <- data.frame(q=q,p=p,family=family,type.measure=type.measure)
  list <- list(base=base,meta=meta,info=info)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
354
  class(list) <- "joinet"
Armin Rauschenberger's avatar
Armin Rauschenberger committed
355
  return(list)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
356
357
}

Armin Rauschenberger's avatar
Armin Rauschenberger committed
358
.mean.function <- function(x,family){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
359
  if(family %in% c("gaussian","cox")){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
360
361
362
363
364
365
366
367
368
    return(x)
  } else if(family=="binomial"){
    return(1/(1+exp(-x)))
  } else if(family=="poisson"){
    return(exp(x))
  } else {
    stop("Family not implemented.",call.=FALSE)
  }
}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
369

Armin Rauschenberger's avatar
Armin Rauschenberger committed
370
.link.function <- function(x,family){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
371
  if(family %in% c("gaussian","cox")){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
372
373
    return(x)
  } else if(family=="binomial"){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
374
375
    if(any(x<0|x>1)){stop("Invalid!",call.=FALSE)}
    return(log(x/(1-x)))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
376
  } else if(family=="poisson"){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
377
    if(any(x<0)){stop("Invalid!",call.=FALSE)}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
378
379
380
381
    return(log(x))
  } else {
    stop("Family not implemented.",call.=FALSE)
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
382
383
}

Armin Rauschenberger's avatar
Armin Rauschenberger committed
384
#--- Methods for class "joinet" -----------------------------------------------
Armin Rauschenberger's avatar
Armin Rauschenberger committed
385

Armin Rauschenberger's avatar
Armin Rauschenberger committed
386
387
#' @export
#' @title
Armin Rauschenberger's avatar
Armin Rauschenberger committed
388
#' Make Predictions
Armin Rauschenberger's avatar
Armin Rauschenberger committed
389
390
#'
#' @description
Armin Rauschenberger's avatar
Armin Rauschenberger committed
391
#' Predicts outcome from features with stacked model.
Armin Rauschenberger's avatar
Armin Rauschenberger committed
392
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
393
#' @param object
Armin Rauschenberger's avatar
Armin Rauschenberger committed
394
#' \link[joinet]{joinet} object
Armin Rauschenberger's avatar
Armin Rauschenberger committed
395
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
396
397
398
399
#' @param newx
#' covariates\strong{:}
#' numeric matrix with \eqn{n} rows (samples)
#' and \eqn{p} columns (variables)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
400
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
401
402
#' @param type
#' character "link" or "response"
Armin Rauschenberger's avatar
Armin Rauschenberger committed
403
404
#' 
#' @param ...
Armin Rauschenberger's avatar
Armin Rauschenberger committed
405
#' further arguments (not applicable)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
406
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
407
408
409
410
411
#' @return 
#' This function returns predictions from base and meta learners.
#' The slots \code{base} and \code{meta} each contain a matrix
#' with \eqn{n} rows (samples) and \eqn{q} columns (variables).
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
412
#' @examples
Armin Rauschenberger's avatar
Armin Rauschenberger committed
413
#' \dontshow{
Armin Rauschenberger's avatar
Armin Rauschenberger committed
414
#' if(!grepl('SunOS',Sys.info()['sysname'])){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
415
#' n <- 50; p <- 100; q <- 3
Armin Rauschenberger's avatar
Armin Rauschenberger committed
416
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
417
#' Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5])))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
418
419
#' Y[,1] <- 1*(Y[,1]>median(Y[,1]))
#' object <- joinet(Y=Y,X=X,family=c("binomial","gaussian","gaussian"))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
420
421
422
423
424
425
426
427
#' predict(object,newx=X)}}
#' \dontrun{
#' n <- 50; p <- 100; q <- 3
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
#' Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5])))
#' Y[,1] <- 1*(Y[,1]>median(Y[,1]))
#' object <- joinet(Y=Y,X=X,family=c("binomial","gaussian","gaussian"))
#' predict(object,newx=X)}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
428
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
429
predict.joinet <- function(object,newx,type="response",...){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
430
  if(length(list(...))!=0){warning("Ignoring argument.",call.=FALSE)}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
431
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
432
  x <- object; rm(object)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
433
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
434
  newx <- as.matrix(newx)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
435
  cornet:::.check(x=newx,type="matrix")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
436
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
437
438
439
  q <- length(x$base)
  n <- nrow(newx)
  base <- meta <- matrix(NA,nrow=n,ncol=q)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
440
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
441
442
  # base learners
  for(i in seq_len(q)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
443
444
    base[,i] <- as.numeric(stats::predict(object=x$base[[i]]$glmnet.fit,newx=newx,s=x$base[[i]]$lambda.min,type="link"))
    #base[,i] <- as.numeric(glmnet:::predict.cv.glmnet(object=x$base[[i]],newx=newx,s="lambda.min",type="link"))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
445
446
    # check whether fine for "binomial" family
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
447
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
448
449
450
451
452
  # meta learners
  for(i in seq_len(q)){
    meta[,i] <- as.numeric(stats::predict(object=x$meta[[i]],
                                          newx=base,s="lambda.min",type="link"))
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
453
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
454
  list <- list(base=base,meta=meta)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
455
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
456
457
458
459
460
461
462
  if(type=="response"){
    for(i in seq_len(q)){
      base[,i] <- .mean.function(x=base[,i],family=x$info$family[i])
      meta[,i] <- .mean.function(x=meta[,i],family=x$info$family[i])
    }
  } else if(type!="link"){
    stop("Invalid type.",call.=FALSE)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
463
464
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
465
466
467
  list <- list(base=base,meta=meta)
  return(list)
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
468
469
}

Armin Rauschenberger's avatar
Armin Rauschenberger committed
470
471
#' @export
#' @title
Armin Rauschenberger's avatar
Armin Rauschenberger committed
472
#' Extract Coefficients
Armin Rauschenberger's avatar
Armin Rauschenberger committed
473
474
#'
#' @description
Armin Rauschenberger's avatar
Armin Rauschenberger committed
475
#' Extracts pooled coefficients.
Armin Rauschenberger's avatar
Armin Rauschenberger committed
476
477
#' (The meta learners linearly combines
#' the coefficients from the base learners.)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
478
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
479
#' @param object
Armin Rauschenberger's avatar
Armin Rauschenberger committed
480
#' \link[joinet]{joinet} object
Armin Rauschenberger's avatar
Armin Rauschenberger committed
481
482
#' 
#' @param ...
Armin Rauschenberger's avatar
Armin Rauschenberger committed
483
#' further arguments (not applicable)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
484
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
485
486
487
488
489
490
491
#' @return
#' This function returns the pooled coefficients.
#' The slot \code{alpha} contains the intercepts
#' in a vector of length \eqn{q},
#' and the slot \code{beta} contains the slopes
#' in a matrix with \eqn{p} rows (inputs) and \eqn{q} columns.
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
492
#' @examples
Armin Rauschenberger's avatar
Armin Rauschenberger committed
493
#' \dontshow{
Armin Rauschenberger's avatar
Armin Rauschenberger committed
494
#' if(!grepl('SunOS',Sys.info()['sysname'])){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
495
#' n <- 50; p <- 100; q <- 3
Armin Rauschenberger's avatar
Armin Rauschenberger committed
496
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
497
#' Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5])))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
498
#' object <- joinet(Y=Y,X=X)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
499
500
501
502
503
504
505
#' coef <- coef(object)}}
#' \dontrun{
#' n <- 50; p <- 100; q <- 3
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
#' Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5])))
#' object <- joinet(Y=Y,X=X)
#' coef <- coef(object)}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
506
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
507
coef.joinet <- function(object,...){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
508
509
510
511
  if(length(list(...))!=0){warning("Ignoring argument.",call.=FALSE)}
  
  # base coefficients
  base <- list()
Armin Rauschenberger's avatar
Armin Rauschenberger committed
512
  coef <- sapply(object$base,function(x) stats::coef(object=x$glmnet.fit,s=x$lambda.min))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
513
514
515
516
517
518
  base$alpha <- sapply(coef,function(x) x[1,])
  base$beta <- sapply(coef,function(x) x[-1,])
  names(base$alpha) <- colnames(base$beta) <- names(object$base)
  
  # meta coefficients
  meta <- list()
Armin Rauschenberger's avatar
Armin Rauschenberger committed
519
  weights <- weights.joinet(object)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
  meta$alpha <- weights[1,]
  meta$beta <- weights[-1,]
  
  # pooled coefficients
  pool <- list()
  pool$alpha <- meta$alpha + base$alpha %*% meta$beta
  pool$beta <- base$beta %*% meta$beta
  
  # q <- unique(object$info$q)
  # p <- unique(object$info$p)
  # pool$alpha <- rep(NA,times=q)
  # for(i in seq_len(q)){
  #   pool$alpha[i] <-  meta$alpha[i] + sum(meta$beta[,i] * base$alpha)
  # }
  # pool$beta <- matrix(NA,nrow=p,ncol=q)
  # for(i in seq_len(p)){
  #   for(j in seq_len(q)){
  #     pool$beta[i,j] <-  sum(meta$beta[,j] * base$beta[i,])
  #   }
  # }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
540
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
541
  return(pool)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
542
543
}

Armin Rauschenberger's avatar
Armin Rauschenberger committed
544
545
#' @export
#' @importFrom stats weights
Armin Rauschenberger's avatar
Armin Rauschenberger committed
546
#' @title
Armin Rauschenberger's avatar
Armin Rauschenberger committed
547
#' Extract Weights
Armin Rauschenberger's avatar
Armin Rauschenberger committed
548
549
#'
#' @description
Armin Rauschenberger's avatar
Armin Rauschenberger committed
550
551
#' Extracts coefficients from the meta learner,
#' i.e. the weights for the base learners.
Armin Rauschenberger's avatar
Armin Rauschenberger committed
552
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
553
#' @param object
Armin Rauschenberger's avatar
Armin Rauschenberger committed
554
#' \link[joinet]{joinet} object
Armin Rauschenberger's avatar
Armin Rauschenberger committed
555
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
556
557
#' @param ...
#' further arguments (not applicable)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
558
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
559
560
561
562
563
564
565
566
#' @return
#' This function returns a matrix with
#' \eqn{1+q} rows and \eqn{q} columns.
#' The first row contains the intercepts,
#' and the other rows contain the slopes,
#' which are the effects of the outcomes
#' in the row on the outcomes in the column.
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
567
#' @examples
Armin Rauschenberger's avatar
Armin Rauschenberger committed
568
#' \dontshow{
Armin Rauschenberger's avatar
Armin Rauschenberger committed
569
#' if(!grepl('SunOS',Sys.info()['sysname'])){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
570
#' n <- 50; p <- 100; q <- 3
Armin Rauschenberger's avatar
Armin Rauschenberger committed
571
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
572
#' Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5])))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
573
#' object <- joinet(Y=Y,X=X)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
574
575
576
577
578
579
580
#' weights(object)}}
#' \dontrun{
#' n <- 50; p <- 100; q <- 3
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
#' Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5])))
#' object <- joinet(Y=Y,X=X)
#' weights(object)}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
581
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
582
weights.joinet <- function(object,...){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
583
584
  if(length(list(...))!=0){warning("Ignoring argument.",call.=FALSE)}
  x <- object$meta
Armin Rauschenberger's avatar
Armin Rauschenberger committed
585
  coef <- lapply(object$meta,function(x) stats::coef(object=x,s=x$lambda.min))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
586
587
588
589
  coef <- do.call(what="cbind",args=coef)
  coef <- as.matrix(coef)
  colnames(coef) <- names(object$meta)
  return(coef)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
590
591
}

Armin Rauschenberger's avatar
Armin Rauschenberger committed
592
593
print.joinet <- function(x,...){
  cat(paste0("joinet object"),"\n")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
594
595
596
597
}

#--- Manuscript functions ------------------------------------------------------

Armin Rauschenberger's avatar
Armin Rauschenberger committed
598
#' @export
Armin Rauschenberger's avatar
Armin Rauschenberger committed
599
#' @title
Armin Rauschenberger's avatar
Armin Rauschenberger committed
600
#' Model comparison
Armin Rauschenberger's avatar
Armin Rauschenberger committed
601
602
#'
#' @description
Armin Rauschenberger's avatar
Armin Rauschenberger committed
603
#' Compares univariate and multivariate regression.
Armin Rauschenberger's avatar
Armin Rauschenberger committed
604
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
605
#' @inheritParams joinet
Armin Rauschenberger's avatar
Armin Rauschenberger committed
606
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
607
608
#' @param nfolds.ext
#' number of external folds
Armin Rauschenberger's avatar
Armin Rauschenberger committed
609
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
610
611
#' @param nfolds.int
#' number of internal folds
Armin Rauschenberger's avatar
Armin Rauschenberger committed
612
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
613
614
615
616
617
#' @param foldid.ext
#' external fold identifiers\strong{:}
#' vector of length \eqn{n} with entries
#' between \eqn{1} and \code{nfolds.ext};
#' or \code{NULL}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
618
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
619
620
621
622
623
#' @param foldid.int
#' internal fold identifiers\strong{:}
#' vector of length \eqn{n} with entries
#' between \eqn{1} and \code{nfolds.int};
#' or \code{NULL}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
624
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
625
#' @param compare
Armin Rauschenberger's avatar
Armin Rauschenberger committed
626
#' experimental arguments\strong{:}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
627
628
629
#' character vector with entries "mnorm", "spls", "mrce",
#' "sier", "mtps", "rmtl", "gpm" and others
#' (requires packages \code{spls}, \code{MRCE}, \code{SiER}, \code{MTPS}, \code{RMTL} or \code{GPM})
Armin Rauschenberger's avatar
Armin Rauschenberger committed
630
631
632
633
#' 
#' @param mice
#' missing data imputation\strong{:}
#' logical (\code{mice=TRUE} requires package \code{mice})
Armin Rauschenberger's avatar
Armin Rauschenberger committed
634
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
635
#' @param cvpred
Armin Rauschenberger's avatar
Armin Rauschenberger committed
636
#' return cross-validated predictions: logical
Armin Rauschenberger's avatar
Armin Rauschenberger committed
637
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
638
639
640
641
#' @param times
#' measure computation time\strong{:}
#' logical
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
642
#' @param ...
Armin Rauschenberger's avatar
Armin Rauschenberger committed
643
644
645
646
647
#' further arguments passed to \code{\link[glmnet]{glmnet}}
#' and \code{\link[glmnet]{cv.glmnet}}
#' 
#' @return 
#' This function returns a matrix with \eqn{q} columns,
Armin Rauschenberger's avatar
Armin Rauschenberger committed
648
649
650
#' including the cross-validated loss from the univariate models
#' (\code{base}), the multivariate models (\code{meta}),
#' and the intercept-only models (\code{none}).
Armin Rauschenberger's avatar
Armin Rauschenberger committed
651
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
652
#' @examples
Armin Rauschenberger's avatar
Armin Rauschenberger committed
653
#' \dontshow{
Armin Rauschenberger's avatar
Armin Rauschenberger committed
654
#' if(!grepl('SunOS',Sys.info()['sysname'])){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
655
#' n <- 50; p <- 100; q <- 3
Armin Rauschenberger's avatar
Armin Rauschenberger committed
656
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
657
#' Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5])))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
658
659
660
661
662
663
#' cv.joinet(Y=Y,X=X)}}
#' \dontrun{
#' n <- 50; p <- 100; q <- 3
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
#' Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5])))
#' cv.joinet(Y=Y,X=X)}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
664
665
666
#' 
#' \dontrun{
#' # correlated features
Armin Rauschenberger's avatar
Armin Rauschenberger committed
667
#' n <- 50; p <- 100; q <- 3
Armin Rauschenberger's avatar
Armin Rauschenberger committed
668
669
670
671
#' mu <- rep(0,times=p)
#' Sigma <- 0.90^abs(col(diag(p))-row(diag(p)))
#' X <- MASS::mvrnorm(n=n,mu=mu,Sigma=Sigma)
#' mu <- rowSums(X[,sample(seq_len(p),size=5)])
Armin Rauschenberger's avatar
Armin Rauschenberger committed
672
673
674
#' Y <- replicate(n=q,expr=rnorm(n=n,mean=mu))
#' #Y <- t(MASS::mvrnorm(n=q,mu=mu,Sigma=diag(n)))
#' cv.joinet(Y=Y,X=X)}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
675
676
677
#' 
#' \dontrun{
#' # other distributions
Armin Rauschenberger's avatar
Armin Rauschenberger committed
678
#' n <- 50; p <- 100; q <- 3
Armin Rauschenberger's avatar
Armin Rauschenberger committed
679
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
680
#' eta <- rowSums(X[,1:5])
Armin Rauschenberger's avatar
Armin Rauschenberger committed
681
682
683
#' Y <- replicate(n=q,expr=rbinom(n=n,size=1,prob=1/(1+exp(-eta))))
#' cv.joinet(Y=Y,X=X,family="binomial")
#' Y <- replicate(n=q,expr=rpois(n=n,lambda=exp(scale(eta))))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
#' cv.joinet(Y=Y,X=X,family="poisson")}
#' 
#' \dontrun{
#' # uncorrelated outcomes
#' n <- 50; p <- 100; q <- 3
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
#' y <- rnorm(n=n,mean=rowSums(X[,1:5]))
#' Y <- cbind(y,matrix(rnorm(n*(q-1)),nrow=n,ncol=q-1))
#' cv.joinet(Y=Y,X=X)}
#' 
#' \dontrun{
#' # sparse and dense models
#' n <- 50; p <- 100; q <- 3
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
#' Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5])))
#' set.seed(1) # fix folds
#' cv.joinet(Y=Y,X=X,alpha.base=1) # lasso
#' set.seed(1)
#' cv.joinet(Y=Y,X=X,alpha.base=0) # ridge}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
703
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
704
cv.joinet <- function(Y,X,family="gaussian",nfolds.ext=5,nfolds.int=10,foldid.ext=NULL,foldid.int=NULL,type.measure="deviance",alpha.base=1,alpha.meta=1,compare=FALSE,mice=FALSE,cvpred=FALSE,times=FALSE,...){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
705
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
706
  if(FALSE){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
707
  fold <- foldid.ext
Armin Rauschenberger's avatar
Armin Rauschenberger committed
708
  family <- "gaussian"; nfolds.ext <- 5; nfolds.int <- 10; foldid.ext <- foldid.int <- NULL; type.measure <- "deviance"; alpha.base <- alpha.meta <- 1; mice <- cvpred <- times <- FALSE
Armin Rauschenberger's avatar
Armin Rauschenberger committed
709
710
  foldid.ext <- fold; nfolds.ext <- 1
  #nfolds.ext <- 1; nfolds.int <- 10; foldid.int <- NULL; compare <- TRUE
Armin Rauschenberger's avatar
Armin Rauschenberger committed
711
712
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
713
  if(length(compare)==1 && compare==TRUE){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
714
715
716
717
718
719
720
    if(all(family=="gaussian")){
      compare <- c("mnorm","mars","spls","mrce","map","mrf","sier","mcen","gpm","rmtl","mtps")
    } else if(all(family=="binomial")){
      compare <- c("mars","mcen","rmtl","mtps")
    } else {
      stop("Comparison not implemented for mixed families.",call.=FALSE)
    }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
721
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
722
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
723
724
725
  n <- nrow(Y)
  q <- ncol(Y)
  p <- ncol(X)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
726
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
727
728
729
730
  #--- fold identifiers ---
  if(is.null(foldid.ext)){
    foldid.ext <- palasso:::.folds(y=Y[,1],nfolds=nfolds.ext) # temporary Y[,1]
  } else {
Armin Rauschenberger's avatar
Armin Rauschenberger committed
731
732
    #nfolds.ext <- length(unique(foldid.ext))
    nfolds.ext <- max(foldid.ext)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
733
734
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
735
736
737
738
739
  #--- family ---
  if(length(family)==1){
    family <- rep(family,times=q)
  } else if(length(family)!=q){
    stop("Invalid argument family",call.=FALSE)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
740
741
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
742
743
  # check packages
  pkgs <- .packages(all.available=TRUE)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
744
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
745
  if(is.character(compare)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
746
747
    for(i in seq_along(compare)){
      pkg <- switch(compare[i],mnorm="glmnet",mars="earth",spls="spls",
Armin Rauschenberger's avatar
Armin Rauschenberger committed
748
749
750
                  mrce="MRCE",map="remMap",mrf="MultivariateRandomForest",
                  sier="SiER",mcen="mcen",gpm="GPM",rmtl="RMTL",mtps="MTPS",
                  stop("Invalid method.",call.=FALSE))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
751
752
753
      if(!pkg %in% pkgs){
        stop("Method \"",compare[i],"\" requires package \"",pkg,"\".",call.=FALSE)
      }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
754
    }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
755
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
756
757
758
759
760
761
762
763

  #--- checks ---
  #if(any( & any(family!="gaussian")){
  #  stop("\"mnorm\", \"spls\", \"mrce\" and \"sier\" require family \"gaussian\".",call.=FALSE)
  #}
  #if(any(mtps,rmtl) & any(!family %in% c("gaussian","binomial"))){
  #  stop("\"mtps\" and \"rmtl\" require family \"gaussian\" or \"binomial\".",call.=FALSE)
  #}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
764
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
765
  #--- cross-validated predictions ---
Armin Rauschenberger's avatar
Armin Rauschenberger committed
766
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
767
  models <- c("base","meta",compare,"none")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
768
  pred <- lapply(X=models,function(x) matrix(data=NA,nrow=n,ncol=q))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
769
770
  time <- lapply(X=models,function(x) NA)
  names(pred) <- names(time) <- models
Armin Rauschenberger's avatar
Armin Rauschenberger committed
771
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
772
773
774
  for(i in seq_len(nfolds.ext)){
    
    Y0 <- Y[foldid.ext!=i,,drop=FALSE]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
775
    #Y1 <- Y[foldid.ext==i,,drop=FALSE]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
776
777
    X0 <- X[foldid.ext!=i,,drop=FALSE]
    X1 <- X[foldid.ext==i,,drop=FALSE]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
778
    
Armin Rauschenberger's avatar
Armin Rauschenberger committed
779
780
781
782
783
784
785
    # standardise features (trial)
    #mu <- apply(X=X0,MARGIN=2,FUN=function(x) mean(x))
    #sd <- apply(X=X0,MARGIN=2,FUN=function(x) stats::sd(x))
    #X0 <- t((t(X0)-mu)/sd)[,sd!=0]
    #X1 <- t((t(X1)-mu)/sd)[,sd!=0]
    # or standardise once before cv?
    
Armin Rauschenberger's avatar
Armin Rauschenberger committed
786
787
788
789
    # remove constant features
    cond <- apply(X=X0,MARGIN=2,FUN=function(x) stats::sd(x)!=0)
    X0 <- X0[,cond]; X1 <- X1[,cond]
    
Armin Rauschenberger's avatar
Armin Rauschenberger committed
790
791
792
793
794
795
796
    if(is.null(foldid.int)){
      foldid <- palasso:::.folds(y=Y0[,1],nfolds=nfolds.int) # temporary Y0[,1]
    } else {
      foldid <- foldid.int[foldid.ext!=i]
    }
    
    # base and meta learners
Armin Rauschenberger's avatar
Armin Rauschenberger committed
797
    start <- Sys.time()
Armin Rauschenberger's avatar
Armin Rauschenberger committed
798
    fit <- joinet(Y=Y0,X=X0,family=family,type.measure=type.measure,alpha.base=alpha.base,alpha.meta=alpha.meta,foldid=foldid) # add ellipsis (...)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
799
    # also do not standardise!
Armin Rauschenberger's avatar
Armin Rauschenberger committed
800
    temp <- predict.joinet(fit,newx=X1)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
801
802
    pred$base[foldid.ext==i,] <- temp$base
    pred$meta[foldid.ext==i,] <- temp$meta
Armin Rauschenberger's avatar
Armin Rauschenberger committed
803
804
    end <- Sys.time()
    time$meta <- as.numeric(difftime(end,start,units="secs"))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
805
    
Armin Rauschenberger's avatar
Armin Rauschenberger committed
806
    # missing values
Armin Rauschenberger's avatar
Armin Rauschenberger committed
807
808
809
810
    if(mice & any(is.na(Y0))){
      if(requireNamespace("mice",quietly=TRUE)){
        y0 <- as.matrix(mice::complete(data=mice::mice(Y0,m=1,method="pmm",seed=1,printFlag=FALSE),action="all")[[1]])
      } else {
Armin Rauschenberger's avatar
Armin Rauschenberger committed
811
        stop("Imputation by PMM requires install.packages(\"mice\").",call.=FALSE)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
812
813
814
815
      }
    } else {
      y0 <- apply(X=Y0,MARGIN=2,FUN=function(x) ifelse(is.na(x),stats::median(x[!is.na(x)]),x))
    }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
816
    all(Y0==y0,na.rm=TRUE)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
817
    
Armin Rauschenberger's avatar
Armin Rauschenberger committed
818
    # other learners
Armin Rauschenberger's avatar
Armin Rauschenberger committed
819
820
    
    if("mnorm" %in% compare){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
821
822
823
824
825
      cat("mnorm"," ")
      start <- Sys.time()
      if(any(family!="gaussian")){
        stop("mnorm requires \"gaussian\" family.",call.=FALSE)
      }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
826
      net <- glmnet::cv.glmnet(x=X0,y=y0,family="mgaussian",foldid=foldid,alpha=alpha.base) # add ellipsis (...)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
827
      pred$mnorm[foldid.ext==i,] <- stats::predict(object=net,newx=X1,s="lambda.min",type="response")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
828
829
830
831
      end <- Sys.time()
      time$mnorm <- as.numeric(difftime(end,start,units="secs"))
    } else {
      net <- glmnet::glmnet(x=X0,y=y0,family="mgaussian")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
832
    }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
833
834
    
    if("mars" %in% compare){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
835
836
      cat("mars"," ")
      start <- Sys.time()
Armin Rauschenberger's avatar
Armin Rauschenberger committed
837
      if(all(family=="gaussian")){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
838
        object <- earth::earth(x=X0,y=y0)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
839
        # equivalent: object <- mda::mars(x=X0,y=y0)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
840
      } else if(all(family=="binomial")){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
841
        object <- earth::earth(x=X0,y=y0,glm=list(family=stats::binomial))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
842
      } else {
Armin Rauschenberger's avatar
Armin Rauschenberger committed
843
        stop("MARS requires either \"gaussian\" or \"binomial\" family.",call.=FALSE)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
844
      }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
845
      # pmethod="cv" not available for multivariate outputs
Armin Rauschenberger's avatar
Armin Rauschenberger committed
846
      
Armin Rauschenberger's avatar
Armin Rauschenberger committed
847
848
849
850
851
852
853
854
855
      ### start trial ###
      if(FALSE){
      #nk <- min(200, max(20, 2 * ncol(X0))) + 1
      #nprune <- round(seq(from=2,to=nk,length.out=10))
      #object <- list()
      #for(j in seq_along(nprune)){
      #  object[[j]] <- earth::earth(x=X0,y=y0,nprune=nprune[j],pmethod="cv",nfold=nfolds.int)
      #}
      #sapply(object,function(x) x$gcv)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
856
857
858
      ## i.e. run earth/mars with tryCatch for each nprune
      ## and select run with best cvm (here gcv)
      # tune nprune (use default nk)!
Armin Rauschenberger's avatar
Armin Rauschenberger committed
859
      }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
860
      
Armin Rauschenberger's avatar
Armin Rauschenberger committed
861
862
      #pred$mars[foldid.ext==i,] <- earth:::predict.earth(object=object,newdata=X1,type="response") # original
      pred$mars[foldid.ext==i,] <- stats::predict(object=object,newdata=X1,type="response") # trial
Armin Rauschenberger's avatar
Armin Rauschenberger committed
863
864
      end <- Sys.time()
      time$mars <- as.numeric(difftime(end,start,units="secs"))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
865
    }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
866
867
    
    if("spls" %in% compare){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
868
869
870
871
872
873
      cat("spls"," ")
      start <- Sys.time()
      if(any(family!="gaussian")){
        stop("spls requires \"gaussian\" family.")
      }
      invisible(utils::capture.output(cv.spls <- spls::cv.spls(x=X0,y=y0,fold=nfolds.int,K=seq_len(min(ncol(X0),10)),
Armin Rauschenberger's avatar
Armin Rauschenberger committed
874
                               eta=seq(from=0.0,to=0.9,by=0.1),plot.it=FALSE))) #scale.x=FALSE
Armin Rauschenberger's avatar
Armin Rauschenberger committed
875
      object <- spls::spls(x=X0,y=y0,K=cv.spls$K.opt,eta=cv.spls$eta.opt) #scale.x=FALSE
Armin Rauschenberger's avatar
Armin Rauschenberger committed
876
      pred$spls[foldid.ext==i,] <- spls::predict.spls(object=object,newx=X1,type="fit")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
877
878
      end <- Sys.time()
      time$spls <- as.numeric(difftime(end,start,units="secs"))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
879
    }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
880
    
Armin Rauschenberger's avatar
Armin Rauschenberger committed
881
882
    if("mrce" %in% compare){
      cat("mrce"," ")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
883
      start <- Sys.time()
Armin Rauschenberger's avatar
Armin Rauschenberger committed
884
885
      if(any(family!="gaussian")){
        stop("MRCE requires \"gaussian\" family.",call.=FALSE)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
886
      }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
887
      lam1 <- lam2 <- 10^seq(from=1,to=-4,length.out=11)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
888
      invisible(utils::capture.output(trials <- lapply(lam2,function(x) tryCatch(expr=MRCE::mrce(X=X0,Y=y0,lam1.vec=lam1,lam2.vec=x,method="cv",kfold=nfolds.int),error=function(x) NULL))))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
889
890
891
      cv.err <- sapply(trials,function(x) ifelse(is.null(x),Inf,min(x$cv.err)))
      object <- trials[[which.min(cv.err)]]
      pred$mrce[foldid.ext==i,] <- matrix(object$muhat,nrow=nrow(X1),ncol=q,byrow=TRUE) + X1 %*% object$Bhat
Armin Rauschenberger's avatar
Armin Rauschenberger committed
892
      end <- Sys.time()
Armin Rauschenberger's avatar
Armin Rauschenberger committed
893
      time$mrce <- as.numeric(difftime(end,start,units="secs"))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
894
    }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
895
    
Armin Rauschenberger's avatar
Armin Rauschenberger committed
896
897
    if("map" %in% compare){
      cat("map"," ")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
898
      start <- Sys.time()
Armin Rauschenberger's avatar
Armin Rauschenberger committed
899
900
      if(any(family!="gaussian")){
        stop("map requires \"gaussian\" family.")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
901
      }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
902
903
      mean <- colMeans(y0)
      y0s <- y0-matrix(data=mean,nrow=nrow(X0),ncol=ncol(y0),byrow=TRUE)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
904
      lamL1.v <- lamL2.v <- exp(seq(from=0,to=5,length.out=11))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
905
      cv <- remMap::remMap.CV(X=X0,Y=y0s,lamL1.v=lamL1.v,lamL2.v=lamL2.v,fold=nfolds.int)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
906
      #graphics::plot(x=lamL1.v,y=log(as.numeric(cv$ols.cv[,3])))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
907
908
909
910
911
912
913
914
      pick <- which.min(as.vector(cv$ols.cv))
      lamL1 <- cv$l.index[1,pick]
      lamL2 <- cv$l.index[2,pick]
      # index <- which(cv$ols.cv==min(cv$ols.cv),arr.ind=TRUE)[1,]
      # rev(lamL1.v)[index[1]]
      # rev(lamL2.v)[index[2]]
      ##cat("lam1:",lamL1,", lam2:",lamL2)
      object <- remMap::remMap(X.m=X0,Y.m=y0s,lamL1=lamL1,lamL2=lamL2)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
915
      pred$map[foldid.ext==i,] <- matrix(data=mean,nrow=nrow(X1),ncol=ncol(y0),byrow=TRUE) + X1 %*% object$phi
Armin Rauschenberger's avatar
Armin Rauschenberger committed
916
      end <- Sys.time()
Armin Rauschenberger's avatar
Armin Rauschenberger committed
917
      time$map <- as.numeric(difftime(end,start,units="secs"))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
918
    }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
919
920
    
    if("mrf" %in% compare){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
921
922
923
924
925
926
      cat("mrf"," ")
      start <- Sys.time()
      if(any(family!="gaussian")){
        stop("mrf requires \"gaussian\" family.")
      }
      pred$mrf[foldid.ext==i,] <- MultivariateRandomForest::build_forest_predict(trainX=X0,trainY=y0,
Armin Rauschenberger's avatar
Armin Rauschenberger committed
927
                                   n_tree=100,m_feature=min(ncol(X0)-1,5),min_leaf=min(nrow(X0),5),testX=X1)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
928
      # use n_tree=500, m_feature=floor(ncol(X0)/3)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
929
      # alternative: IntegratedMRF
Armin Rauschenberger's avatar
Armin Rauschenberger committed
930
931
932
      # Check why this does not work well!
      end <- Sys.time()
      time$mrf <- as.numeric(difftime(end,start,units="secs"))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
933
934
    }
    
Armin Rauschenberger's avatar
Armin Rauschenberger committed
935
936
937
938
939
940
    if("sier" %in% compare){
      cat("sier"," ")
      start <- Sys.time()
      if(any(family!="gaussian")){
        stop("SiER requires \"gaussian\" family.",call.=FALSE)
      }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
941
      invisible(utils::capture.output(object <- SiER::cv.SiER(X=X0,Y=y0,K.cv=3)))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
942
943
944
945
946
947
948
      # trial with K.cv=3 (for spped-up)
      # use upper.comp=10 and thres=0.01  (changed for speed-up)
      pred$sier[foldid.ext==i,] <- SiER::pred.SiER(cv.fit=object,X.new=X1)
      end <- Sys.time()
      time$sier <- as.numeric(difftime(end,start,units="secs"))
    }
    
Armin Rauschenberger's avatar
Armin Rauschenberger committed
949
    if("mcen" %in% compare){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
950
951
952
953
954
955
956
957
958
959
      cat("mcen"," ")
      start <- Sys.time()
      if(all(family=="gaussian")){
        type <- "mgaussian"
      } else if(all(family=="binomial")){
        type <- "mbinomial"
      } else {
        stop("mcen requires either \"gaussian\" or \"binomial\".",call.=FALSE)
      }
      object <- mcen::cv.mcen(x=X0,y=y0,family=type,folds=foldid,ky=1,
Armin Rauschenberger's avatar
Armin Rauschenberger committed
960
961
                              gamma_y=seq(from=0.1,to=5.1,by=1),ndelta=5)
      # TEMPORARY gamma_y=seq(from=0.1,to=5.1,length.out=3) and ndelta=3 (for speed-up)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
962
963
      #temp <- mcen:::predict.cv.mcen(object=object,newx=X1) # original
      temp <- stats::predict(object=object,newx=X1) # trial
Armin Rauschenberger's avatar
Armin Rauschenberger committed
964
      pred$mcen[foldid.ext==i,] <- as.matrix(temp)
Armin Rauschenberger's avatar
Armin Rauschenberger committed