... ... @@ -21,7 +21,7 @@ knitr::opts_chunk$set( ## Scope Multivariate Elastic Net Regression (extending [glmnet](https://CRAN.R-project.org/package=glmnet)). Multivariate Elastic Net Regression (extending the [R](https://cran.r-project.org) package [glmnet](https://CRAN.R-project.org/package=glmnet)). ## Installation ... ...  ... ... @@ -10,7 +10,8 @@ Status](https://codecov.io/github/rauschenberger/joinet/coverage.svg?branch=mast ## Scope Multivariate Elastic Net Regression (extending Multivariate Elastic Net Regression (extending the [R](https://cran.r-project.org) package [glmnet](https://CRAN.R-project.org/package=glmnet)). ## Installation ... ...  ... ... @@ -89,12 +89,80 @@ Installation Install the current release from CRAN: or the latest development version from GitHub: Or install the latest development version from GitHub: Access the help pages with: ?joinet help(joinet) Initialisation Load and attach the package: library(joinet) And access the documentation: ?joinet help(joinet) browseVignettes("joinet") Simulation For n samples, we simulate p inputs (features, covariates) and q outputs (outcomes, responses). We assume high-dimensional inputs (p $$\gg$$ n) and low-dimensional outputs (q $$\ll$$ n). We simulate the p inputs from a multivariate normal distribution. For the mean, we use the p-dimensional vector mu, where all elements equal zero. For the covariance, we use the p $$\times$$ p matrix Sigma, where the entry in row $$i$$ and column $$j$$ equals rho$$^{|i-j|}$$. The parameter rho determines the strength of the correlation among the inputs, with small rho leading weak correlations, and large rho leading to strong correlations (0 < rho < 1). The input matrix X has n rows and p columns. mu <- rep(0,times=p) rho <- 0.90 Sigma <- rho^abs(col(diag(p))-row(diag(p))) X <- MASS::mvrnorm(n=n,mu=mu,Sigma=Sigma) We simulate the input-output effects from independent Bernoulli distributions. The parameter pi determines the number of effects, with small pi leading to few effects, and large pi leading to many effects (0 < pi < 1). The scalar alpha represents the intercept, and the p-dimensional vector beta represents the slopes. From the intercept alpha, the slopes beta and the inputs X, we calculate the linear predictor, the n-dimensional vector eta. Rescale the linear predictor to make the effects weaker or stronger. Set the argument family to "gaussian", "binomial", or "poisson" to define the distribution. The n times p matrix Y represents the outputs. We assume the outcomes are positively correlated. Application The function joinet fits univariate and multivariate regression. Set the argument alpha.base to 0 (ridge) or 1 (lasso). object <- joinet(Y=Y,X=X,family=family) Standard methods are available. The function predict returns the predicted values from the univariate (base) and multivariate (meta) models. The function coef returns the estimated intercepts (alpha) and slopes (beta) from the multivariate model (input-output effects). And the function weights returns the weights from stacking (output-output effects). predict(object,newx=X) coef(object) weights(object) The function cv.joinet compares the predictive performance of univariate (base) and multivariate (meta) regression by nested cross-validation. The argument type.measure determines the loss function. cv.joinet(Y=Y,X=X,family=family) ## [,1] [,2] ## base 1.204741 1.523550 ## meta 1.161487 1.283678 ## none 3.206394 3.495571 Reference Armin Rauschenberger and Enrico Glaab (2019). “Multivariate regression through stacked generalisation”. Manuscript in preparation. Reference Armin Rauschenberger and Enrico Glaab (2019). “Multivariate regression through stacked generalisation”. Manuscript in preparation. ... ... @@ -131,6 +193,9 @@ loss <- joinet:::cv.joinet(Y=Y,X=X) Contents ... ...  ... ... @@ -82,7 +82,7 @@ Scope Multivariate Elastic Net Regression (extending glmnet). Multivariate Elastic Net Regression (extending the R package glmnet). ... ...  ... ... @@ -149,7 +149,7 @@ in a matrix with $$p$$ rows (inputs) and $$q$$ columns. Examples n <- 50; q <- 3; p <- 100 n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5]))) object <- joinet(Y=Y,X=X) ... ...  ... ... @@ -204,7 +204,7 @@ including the cross-validated loss. Examples n <- 50; q <- 3; p <- 100 n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5]))) cv.joinet(Y=Y,X=X) #> [,1] [,2] [,3] ... ... @@ -213,26 +213,43 @@ including the cross-validated loss. #> none 6.028672 6.220040 6.206345 # NOT RUN { # correlated features n <- 50; q <- 3; p <- 100 n <- 50; p <- 100; q <- 3 mu <- rep(0,times=p) Sigma <- 0.90^abs(col(diag(p))-row(diag(p))) X <- MASS::mvrnorm(n=n,mu=mu,Sigma=Sigma) mu <- rowSums(X[,sample(seq_len(p),size=5)]) Sigma <- diag(n) Y <- t(MASS::mvrnorm(n=q,mu=mu,Sigma=Sigma)) Y <- replicate(n=q,expr=rnorm(n=n,mean=mu)) #Y <- t(MASS::mvrnorm(n=q,mu=mu,Sigma=diag(n))) cv.joinet(Y=Y,X=X) # } # NOT RUN { # other distributions n <- 50; q <- 3; p <- 100 mu <- rep(0,times=p) n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) eta <- rowSums(X[,sample(seq_len(p),size=5)]) eta <- rowSums(X[,1:5]) Y <- replicate(n=q,expr=rbinom(n=n,size=1,prob=1/(1+exp(-eta)))) cv.joinet(Y=Y,X=X,family="binomial") Y <- replicate(n=q,expr=rpois(n=n,lambda=exp(scale(eta)))) cv.joinet(Y=Y,X=X,family="poisson") # } # NOT RUN { # uncorrelated outcomes n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) y <- rnorm(n=n,mean=rowSums(X[,1:5])) Y <- cbind(y,matrix(rnorm(n*(q-1)),nrow=n,ncol=q-1)) cv.joinet(Y=Y,X=X) # } # NOT RUN { # sparse and dense models n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5]))) set.seed(1) # fix folds cv.joinet(Y=Y,X=X,alpha.base=1) # lasso set.seed(1) cv.joinet(Y=Y,X=X,alpha.base=0) # ridge # }  ... ... @@ -204,9 +204,13 @@ ridge renders dense models (alpha$$=0$$) "Multivariate elastic net regression through stacked generalisation" Manuscript in preparation. See also Type browseVignettes("joinet") for examples. Examples n <- 50; q <- 3; p <- 100 n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5]))) object <- joinet(Y=Y,X=X) ... ... @@ -221,6 +225,8 @@ ridge renders dense models (alpha$$=0$$) • Details • References • See also • Examples • ... ...  ... ... @@ -153,7 +153,7 @@ with $$n$$ rows (samples) and $$q$$ columns (variables). Examples n <- 50; q <- 3; p <- 100 n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5]))) object <- joinet(Y=Y,X=X) ... ...  ... ... @@ -148,7 +148,7 @@ in the row on the outcomes in the column. Examples n <- 50; q <- 3; p <- 100 n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5]))) object <- joinet(Y=Y,X=X) ... ...  ... ... @@ -24,7 +24,7 @@ Extracts pooled coefficients. the coefficients from the base learners.) } \examples{ n <- 50; q <- 3; p <- 100 n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5]))) object <- joinet(Y=Y,X=X) ... ...  ... ... @@ -62,33 +62,48 @@ including the cross-validated loss. Compares univariate and multivariate regression } \examples{ n <- 50; q <- 3; p <- 100 n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5]))) cv.joinet(Y=Y,X=X) \dontrun{ # correlated features n <- 50; q <- 3; p <- 100 n <- 50; p <- 100; q <- 3 mu <- rep(0,times=p) Sigma <- 0.90^abs(col(diag(p))-row(diag(p))) X <- MASS::mvrnorm(n=n,mu=mu,Sigma=Sigma) mu <- rowSums(X[,sample(seq_len(p),size=5)]) Sigma <- diag(n) Y <- t(MASS::mvrnorm(n=q,mu=mu,Sigma=Sigma)) cv.joinet(Y=Y,X=X) } Y <- replicate(n=q,expr=rnorm(n=n,mean=mu)) #Y <- t(MASS::mvrnorm(n=q,mu=mu,Sigma=diag(n))) cv.joinet(Y=Y,X=X)} \dontrun{ # other distributions n <- 50; q <- 3; p <- 100 mu <- rep(0,times=p) n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) eta <- rowSums(X[,sample(seq_len(p),size=5)]) eta <- rowSums(X[,1:5]) Y <- replicate(n=q,expr=rbinom(n=n,size=1,prob=1/(1+exp(-eta)))) cv.joinet(Y=Y,X=X,family="binomial") Y <- replicate(n=q,expr=rpois(n=n,lambda=exp(scale(eta)))) cv.joinet(Y=Y,X=X,family="poisson") } cv.joinet(Y=Y,X=X,family="poisson")} \dontrun{ # uncorrelated outcomes n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) y <- rnorm(n=n,mean=rowSums(X[,1:5])) Y <- cbind(y,matrix(rnorm(n*(q-1)),nrow=n,ncol=q-1)) cv.joinet(Y=Y,X=X)} \dontrun{ # sparse and dense models n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5]))) set.seed(1) # fix folds cv.joinet(Y=Y,X=X,alpha.base=1) # lasso set.seed(1) cv.joinet(Y=Y,X=X,alpha.base=0) # ridge} }  ... ... @@ -65,7 +65,7 @@ lasso renders sparse models (\code{alpha}\eqn{=1}), ridge renders dense models (\code{alpha}\eqn{=0}) } \examples{ n <- 50; q <- 3; p <- 100 n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5]))) object <- joinet(Y=Y,X=X) ... ... @@ -76,3 +76,6 @@ Armin Rauschenberger, Enrico Glaab (2019) "Multivariate elastic net regression through stacked generalisation" \emph{Manuscript in preparation}. } \seealso{ Type \code{browseVignettes("joinet")} for examples. }  ... ... @@ -26,7 +26,7 @@ with \eqn{n} rows (samples) and \eqn{q} columns (variables). Predicts outcome from features with stacked model. } \examples{ n <- 50; q <- 3; p <- 100 n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5]))) object <- joinet(Y=Y,X=X) ... ...  ... ... @@ -24,7 +24,7 @@ Extracts coefficients from the meta learner, i.e. the weights for the base learners. } \examples{ n <- 50; q <- 3; p <- 100 n <- 50; p <- 100; q <- 3 X <- matrix(rnorm(n*p),nrow=n,ncol=p) Y <- replicate(n=q,expr=rnorm(n=n,mean=rowSums(X[,1:5]))) object <- joinet(Y=Y,X=X) ... ...  ... ... @@ -11,6 +11,7 @@ editor_options: {r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) set.seed(1)  ## Installation ... ... @@ -21,20 +22,111 @@ Install the current release from [CRAN](https://CRAN.R-project.org/package=joine install.packages("joinet")  or the latest development version from [GitHub](https://github.com/rauschenberger/joinet): Or install the latest development version from [GitHub](https://github.com/rauschenberger/joinet): {r,eval=FALSE} #install.packages("devtools") devtools::install_github("rauschenberger/joinet")  Access the help pages with: ## Initialisation Load and attach the package: {r} library(joinet)  And access the [documentation](https://rauschenberger.github.io/joinet/): {r,eval=FALSE} ?joinet help(joinet) browseVignettes("joinet")  ## Simulation For n samples, we simulate p inputs (features, covariates) and q outputs (outcomes, responses). We assume high-dimensional inputs (p $\gg$ n) and low-dimensional outputs (q $\ll$ n). {r} n <- 100 q <- 2 p <- 500  We simulate the p inputs from a multivariate normal distribution. For the mean, we use the p-dimensional vector mu, where all elements equal zero. For the covariance, we use the p $\times$ p matrix Sigma, where the entry in row $i$ and column $j$ equals rho$^{|i-j|}$. The parameter rho determines the strength of the correlation among the inputs, with small rho leading weak correlations, and large rho leading to strong correlations (0 < rho < 1). The input matrix X has n rows and p columns. {r} mu <- rep(0,times=p) rho <- 0.90 Sigma <- rho^abs(col(diag(p))-row(diag(p))) X <- MASS::mvrnorm(n=n,mu=mu,Sigma=Sigma)  We simulate the input-output effects from independent Bernoulli distributions. The parameter pi determines the number of effects, with small pi leading to few effects, and large pi leading to many effects (0 < pi < 1). The scalar alpha represents the intercept, and the p-dimensional vector beta represents the slopes. {r} pi <- 0.01 alpha <- 0 beta <- rbinom(n=p,size=1,prob=pi)  From the intercept alpha, the slopes beta` and the inp