functions.R 8.74 KB
Newer Older
Rauschenberger's avatar
Rauschenberger committed
1

Rauschenberger's avatar
Rauschenberger committed
2
#' @export
Rauschenberger's avatar
Rauschenberger committed
3
#' @aliases colasso-package
Rauschenberger's avatar
Rauschenberger committed
4
#' @title
Rauschenberger's avatar
Rauschenberger committed
5
#' colasso
Rauschenberger's avatar
Rauschenberger committed
6
7
#' 
#' @description
Armin Rauschenberger's avatar
Armin Rauschenberger committed
8
#' Implements penalised regression with response moderation.
Rauschenberger's avatar
Rauschenberger committed
9
#'  
Rauschenberger's avatar
Rauschenberger committed
10
11
12
#' @param y
#' response\strong{:}
#' vector of length \eqn{n}
Rauschenberger's avatar
Rauschenberger committed
13
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
14
15
16
17
18
#' @param Y
#' response\strong{:}
#' matrix with \eqn{n} rows and \eqn{p} columns,
#' or vector of length \eqn{n} (see details)
#' 
Rauschenberger's avatar
Rauschenberger committed
19
20
21
#' @param X
#' covariates\strong{:}
#' matrix with \eqn{n} rows (samples) and \eqn{p} columns (variables)
Rauschenberger's avatar
Rauschenberger committed
22
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
23
24
25
26
27
28
#' @param alpha
#' elastic net parameter\strong{:}
#' numeric between \eqn{0} and \eqn{1};
#' \eqn{alpha=1} for lasso,
#' \eqn{alpha=0} for ridge
#' 
Rauschenberger's avatar
Rauschenberger committed
29
#' @param nfolds
Rauschenberger's avatar
Rauschenberger committed
30
#' number of folds
Rauschenberger's avatar
Rauschenberger committed
31
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
32
33
34
35
36
#' @param family
#' see glmnet
#' 
#' @param type.measure
#' see glmnet
Rauschenberger's avatar
Rauschenberger committed
37
#' 
Rauschenberger's avatar
Rauschenberger committed
38
#' @examples
Armin Rauschenberger's avatar
Armin Rauschenberger committed
39
#' n <- 100; p <- 20; q <- 10
Rauschenberger's avatar
Rauschenberger committed
40
#' X <- matrix(rnorm(n*p),nrow=n,ncol=p)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
41
#' Y <- matrix(rnorm(n*q),nrow=n,ncol=q)
Rauschenberger's avatar
Rauschenberger committed
42
43
#' #y <- rbinom(n=n,size=1,prob=0.2)
#' y <- rnorm(n=n)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
44
#' test <- colasso(y=y,Y=Y,X=X)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
45
46
47
48
49
#' 
colasso <- function(y,Y,X,alpha=1,nfolds=10,family="gaussian",type.measure="deviance"){
  
  # properties
  n <- nrow(X); p <- ncol(X)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
50
51
52
  if(!family %in% c("gaussian","poisson","binomial")){
    stop("Family not implemented.")
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
53
  if(length(y)!=n){stop("sample size")}
Armin Rauschenberger's avatar
Armin Rauschenberger committed
54
55
  #foldid <- sample(x=rep(x=seq_len(nfolds),length.out=n))
  foldid <- palasso:::.folds(y=y,nfolds=nfolds)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
56
57
58
  
  # weights
  pi <- seq(from=0,to=1,by=0.2) # adapt this
Rauschenberger's avatar
Rauschenberger committed
59

Armin Rauschenberger's avatar
Armin Rauschenberger committed
60
61
  # model fitting
  fit <- list()
Armin Rauschenberger's avatar
Armin Rauschenberger committed
62
  ym <- colasso::colasso_moderate(Y=Y) # trial
Armin Rauschenberger's avatar
Armin Rauschenberger committed
63
64
65
66
  for(i in seq_along(pi)){
    weights <- rep(c(1-pi[[i]],pi[[i]]),each=n)
    fit[[i]] <- glmnet::glmnet(y=c(y,ym),x=rbind(X,X),weights=weights,family=family,alpha=alpha)
  }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
67
  names(fit) <- paste0("pi",pi)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
68
69
70
  
  # inner cross-validation
  pred <- lapply(pi,function(x) matrix(data=NA,nrow=length(y),ncol=100))
Armin Rauschenberger's avatar
Armin Rauschenberger committed
71
  for(k in unique(foldid)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
72
    y0 <- y[foldid!=k]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
73
    y1 <- y[foldid==k]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
74
    Y0 <- Y[foldid!=k,,drop=FALSE]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
75
    Y1 <- Y[foldid==k,,drop=FALSE]
Armin Rauschenberger's avatar
Armin Rauschenberger committed
76
77
    X0 <- X[foldid!=k,,drop=FALSE]
    X1 <- X[foldid==k,,drop=FALSE]
Rauschenberger's avatar
Rauschenberger committed
78
    
Armin Rauschenberger's avatar
Armin Rauschenberger committed
79
    y0m <- colasso_moderate(Y=Y0)
Rauschenberger's avatar
Rauschenberger committed
80
    for(i in seq_along(pi)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
81
82
83
84
      weights <- rep(c(1-pi[[i]],pi[[i]]),each=sum(foldid!=k)) # trial
      glmnet <- glmnet::glmnet(y=c(y0,y0m),x=rbind(X0,X0),weights=weights,family=family,alpha=alpha)
      temp <- stats::predict(object=glmnet,newx=X1,type="response",s=fit[[i]]$lambda)
      pred[[i]][foldid==k,seq_len(ncol(temp))] <- temp
Rauschenberger's avatar
Rauschenberger committed
85
    }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
86
87
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
88
  # loss sequence 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
89
  for(i in seq_along(pi)){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
90
91
92
93
94
95
96
97
98
    # WATCH OUT: adapt to all loss fuctions
    #fit[[i]]$cvm <- apply(X=pred[[i]],MARGIN=2,FUN=function(x) mean((x-y)^2))
    fit[[i]]$cvm <- palasso:::.loss(y=y,fit=pred[[i]],family=family,type.measure=type.measure,foldid=foldid)[[1]]
    # WATCH OUT: minimise or maximise
    if(type.measure=="AUC"){
      fit[[i]]$lambda.min <- fit[[i]]$lambda[which.max(fit[[i]]$cvm)]
    } else {
      fit[[i]]$lambda.min <- fit[[i]]$lambda[which.min(fit[[i]]$cvm)]
    }
Armin Rauschenberger's avatar
Armin Rauschenberger committed
99
100
  }
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
101
102
103
104
105
106
  # loss sequence
  #cvm <- palasso:::.loss(y=y,fit=pred,family=family,type.measure=type.measure,foldid=foldid)
  
  # optimisation
  #model <- .extract(fit=fit.full,lambda=lambda,cvm=cvm,type.measure=args$type.measure)
  
Armin Rauschenberger's avatar
Armin Rauschenberger committed
107
108
109
110
111
112
113
114
115
116
  # selection
  cvm <- sapply(fit,function(x) x$cvm[which(x$lambda==x$lambda.min)])
  if(type.measure=="AUC"){
    sel <- which.max(cvm)
  } else {
    sel <- which.min(cvm)
  }
  fit[[length(pi)+1]] <- fit[[sel]]
  
  #graphics::plot(cvm); graphics::abline(v=sel,lty=2)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
117
  names(fit) <- c("glmnet",paste0("pi",pi[-1]),"conet")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
118
  return(fit)
Rauschenberger's avatar
Rauschenberger committed
119
120
121
}


Rauschenberger's avatar
Rauschenberger committed
122
123


Rauschenberger's avatar
Rauschenberger committed
124
125
#' @export
#' @title
Rauschenberger's avatar
Rauschenberger committed
126
#' moderated response
Armin Rauschenberger's avatar
Armin Rauschenberger committed
127
#'
Rauschenberger's avatar
Rauschenberger committed
128
#' @description
Rauschenberger's avatar
Rauschenberger committed
129
#' This function ...
Armin Rauschenberger's avatar
Armin Rauschenberger committed
130
#'
Rauschenberger's avatar
Rauschenberger committed
131
#' @inheritParams colasso
Armin Rauschenberger's avatar
Armin Rauschenberger committed
132
133
134
#' 
#' @param ...
#' further arguments (currently not implemented)
Rauschenberger's avatar
Rauschenberger committed
135
#' vector with entries between \eqn{0} and \eqn{1} (rename argument)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
136
137
138
#'
#'
#'
Rauschenberger's avatar
Rauschenberger committed
139
140
#' @examples
#' NA
Armin Rauschenberger's avatar
Armin Rauschenberger committed
141
colasso_moderate <- function(Y,...){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
142
143
  # (most basic version possible)
  y <- rowMeans(Y)
Armin Rauschenberger's avatar
Armin Rauschenberger committed
144
145
  y <- apply(Y,1,stats::median)
  if(all(y %in% c(0,0.5,1))){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
146
147
148
149
    y[y==0.5] <- 1
    warning("Invalid unless binomial family.")
  }
  return(y)
Rauschenberger's avatar
Rauschenberger committed
150
151
}

Rauschenberger's avatar
Rauschenberger committed
152
153
#' @export
#' @title
Rauschenberger's avatar
Rauschenberger committed
154
#' simulate data
Rauschenberger's avatar
Rauschenberger committed
155
156
157
#' 
#' @description
#' This function ...
Rauschenberger's avatar
Rauschenberger committed
158
159
160
#'  
#' @param n
#' sample size
Rauschenberger's avatar
Rauschenberger committed
161
#' 
Rauschenberger's avatar
Rauschenberger committed
162
163
#' @param p
#' number of covariates
Rauschenberger's avatar
Rauschenberger committed
164
#' 
Rauschenberger's avatar
Rauschenberger committed
165
166
#' @param cor
#' correlation structure
Rauschenberger's avatar
Rauschenberger committed
167
#' 
Rauschenberger's avatar
Rauschenberger committed
168
169
#' @param plot
#' logical
Rauschenberger's avatar
Rauschenberger committed
170
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
171
172
173
#' @param family
#' character
#' 
Rauschenberger's avatar
Rauschenberger committed
174
#' @examples
Rauschenberger's avatar
Rauschenberger committed
175
176
#' # CONTINUE HERE
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
177
colasso_simulate <- function(n=100,p=500,cor="constant",family="gaussian",plot=TRUE){
Rauschenberger's avatar
Rauschenberger committed
178
179
180
181
182
183
184
    # correlation matrix
    if(cor=="none"){
        Sigma <- matrix(data=0,nrow=p,ncol=p)
    } else if(cor=="constant"){
        Sigma <- matrix(data=0.05,nrow=p,ncol=p)
    } else if(cor=="autoregressive"){
        # adjust 0.9 to p, such that mean(Sigma)=0.05
Armin Rauschenberger's avatar
Armin Rauschenberger committed
185
        # sum(2*(p-seq_len(p)+1)*0.9^seq_len(p))/(p*p)
Rauschenberger's avatar
Rauschenberger committed
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
        Sigma <- 0.9^abs(col(diag(p))-row(diag(p)))
    } else if(cor=="unstructured"){
        Sigma <- matrix(data=stats::rbeta(n=p,shape1=0.05,shape2=1),nrow=p,ncol=p)
    }
    diag(Sigma) <- 1
    
    X <- MASS::mvrnorm(n=n,mu=rep(0,p),Sigma=Sigma)
    stats::median(abs(as.numeric(cor(X))))

    # non-sparse effects
    #beta <- stats::rnorm(n=p,mean=0,sd=1)
    #mu <- X %*% beta
    #y <- stats::rnorm(n=n,mean=mu)

    # sparse effects
Armin Rauschenberger's avatar
Armin Rauschenberger committed
201
202
    beta <- rep(x=1,times=p) 
    beta[stats::rbinom(n=p,size=1,prob=0.95)==1] <- 0
Rauschenberger's avatar
Rauschenberger committed
203
    mu <- X %*% beta
Armin Rauschenberger's avatar
Armin Rauschenberger committed
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
    
    if(family=="gaussian"){
      Y <- replicate(n=10,expr=stats::rnorm(n=n,mean=mu,sd=10))
    } else if(family=="binomial"){
      prob <- exp(mu)/(1+exp(mu))
      Y <- replicate(n=10,expr=stats::rbinom(n=n,size=1,prob=prob))
    } else if(family=="poisson"){
      lambda <- exp(mu)
      Y <- replicate(n=10,expr=stats::rpois(n=n,lambda=lambda))
    } else if(family=="cox"){
      warning("Cox regression not implemented!")
    }
    
    #y <- stats::rnorm(n=n,mean=mu,sd=10)
    y <- Y[,1]
Rauschenberger's avatar
Rauschenberger committed
219
220
221

    # predictivity -----------------------------------------------------------------
    if(plot){
Armin Rauschenberger's avatar
Armin Rauschenberger committed
222
        graphics::par(mar=c(3,3,1,1))
Rauschenberger's avatar
Rauschenberger committed
223
224
225
        test <- glmnet::cv.glmnet(x=X,y=y)
        graphics::plot(x=log(test$lambda),y=test$cvm)
        graphics::abline(h=test$cvm[test$lambda==max(test$lambda)],lty=2)
Rauschenberger's avatar
Rauschenberger committed
226
    }
Rauschenberger's avatar
Rauschenberger committed
227
        
Armin Rauschenberger's avatar
Armin Rauschenberger committed
228
    return(list(y=y,Y=Y,X=X))
Rauschenberger's avatar
Rauschenberger committed
229
230
}

Rauschenberger's avatar
Rauschenberger committed
231
232
#' @export
#' @title
Rauschenberger's avatar
Rauschenberger committed
233
#' External cross-validation
Rauschenberger's avatar
Rauschenberger committed
234
235
#' 
#' @description
Rauschenberger's avatar
Rauschenberger committed
236
237
#' This function ...
#'  
Rauschenberger's avatar
Rauschenberger committed
238
#' @param y
Armin Rauschenberger's avatar
Armin Rauschenberger committed
239
240
241
242
#' response vector
#' 
#' @param Y
#' response moderation matrix
Rauschenberger's avatar
Rauschenberger committed
243
#' 
Rauschenberger's avatar
Rauschenberger committed
244
245
#' @param X
#' covariates
Rauschenberger's avatar
Rauschenberger committed
246
#' 
Rauschenberger's avatar
Rauschenberger committed
247
248
#' @param plot
#' logical
Rauschenberger's avatar
Rauschenberger committed
249
#' 
Rauschenberger's avatar
Rauschenberger committed
250
251
252
#' @param nfolds.int
#' internal folds
#' 
Armin Rauschenberger's avatar
Armin Rauschenberger committed
253
254
#' @inheritParams colasso
#' 
Rauschenberger's avatar
Rauschenberger committed
255
#' @examples
Rauschenberger's avatar
Rauschenberger committed
256
257
#' NA
#'
Armin Rauschenberger's avatar
Armin Rauschenberger committed
258
colasso_compare <- function(y,Y,X,plot=TRUE,nfolds.int=10,family="gaussian",type.measure="deviance"){
Rauschenberger's avatar
Rauschenberger committed
259
260
261
    
    fold <- sample(x=rep(x=seq_len(5),length.out=length(y)))
    pred <- matrix(data=NA,nrow=length(y),ncol=8)
Rauschenberger's avatar
Rauschenberger committed
262
    select <- list()
Rauschenberger's avatar
Rauschenberger committed
263
264
    for(i in sort(unique(fold))){
        cat("i =",i,"\n")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
265
        fit <- colasso(y=y[fold!=i],Y=Y[fold!=i,],X=X[fold!=i,],alpha=1,nfolds=nfolds.int,type.measure=type.measure)
Rauschenberger's avatar
Rauschenberger committed
266
267
268
269
270
        for(j in seq_along(fit)){
            pred[fold==i,j] <- glmnet::predict.glmnet(object=fit[[j]],
                                                  newx=X[fold==i,],
                                                  s=fit[[j]]$lambda.min,
                                                  type="response")
Rauschenberger's avatar
Rauschenberger committed
271
        }
Rauschenberger's avatar
Rauschenberger committed
272
        select[[i]] <- lapply(fit,function(x) which(x$beta[,x$lambda==x$lambda.min]!=0))
Rauschenberger's avatar
Rauschenberger committed
273
        pred[fold==i,8] <- mean(y[fold!=i]) # intercept-only model
Rauschenberger's avatar
Rauschenberger committed
274
    }
Rauschenberger's avatar
Rauschenberger committed
275
    colnames(pred) <- c(names(fit),"intercept")
Armin Rauschenberger's avatar
Armin Rauschenberger committed
276
277
278
279
280
281
282
283
    
    if(family=="gaussian" & type.measure=="deviance" | family=="binomial" & type.measure=="mse"){
      loss <- apply(X=pred,MARGIN=2,FUN=function(x) sum((y-x)^2))
    } else if(family=="binomial" & type.measure=="class"){
      loss <- apply(X=pred,MARGIN=2,FUN=function(x) mean(y!=x))
    } else {
      warning("Implement other loss functions!")
    }
Rauschenberger's avatar
Rauschenberger committed
284
    
Rauschenberger's avatar
Rauschenberger committed
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
    ### start temporary ###
    #stability <- numeric()
    #for(k in seq_along(fit)){
    #    matrix <- matrix(data=NA,nrow=5,ncol=5)
    #    for(i in seq_len(5)){
    #        for(j in seq_len(5)){
    #            a <- select[[i]][[k]]
    #            b <- select[[j]][[k]]
    #            matrix[i,j] <- length(intersect(a,b))/length(union(a,b))
    #        }
    #    }
    #    diag(matrix) <- NA
    #    stability[k] <- mean(matrix,na.rm=TRUE)
    #}
    #cat(stability)
    ### end temporary ###
    
Rauschenberger's avatar
Rauschenberger committed
302
303
304
305
    if(plot){
        graphics::par(mar=c(3,3,1,1))
        col <- rep(x=0,times=length(loss)-1)
        col[1] <- col[length(col)] <- 1
Armin Rauschenberger's avatar
Armin Rauschenberger committed
306
307
308
309
310
311
        graphics::plot.new()
        graphics::plot.window(xlim=c(1,length(loss)-1),ylim=range(loss))
        graphics::axis(side=1,at=seq_len(length(loss)-1),labels=names(loss)[-length(loss)])
        graphics::axis(side=2)
        graphics::box()
        graphics::points(y=loss[-length(loss)],
Rauschenberger's avatar
Rauschenberger committed
312
                       x=seq_len(length(loss)-1),
Armin Rauschenberger's avatar
Armin Rauschenberger committed
313
                       col=col+1,pch=col)
Rauschenberger's avatar
Rauschenberger committed
314
315
316
        graphics::abline(v=c(1.5,length(loss)-1.5),lty=2)
        graphics::grid()
        graphics::abline(h=loss[length(loss)],lty=2,col="red")
Rauschenberger's avatar
Rauschenberger committed
317
318
    }
    
Rauschenberger's avatar
Rauschenberger committed
319
    return(loss)
Rauschenberger's avatar
Rauschenberger committed
320
}