Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
J
joinet
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Environments
Packages & Registries
Packages & Registries
Package Registry
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
External Wiki
External Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Armin Rauschenberger
joinet
Commits
20ba57e5
Commit
20ba57e5
authored
Jun 29, 2020
by
Armin Rauschenberger
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
competing models
parent
476709d4
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
295 additions
and
293 deletions
+295
-293
R/functions.R
R/functions.R
+232
-66
docs/articles/joinet.html
docs/articles/joinet.html
+1
-83
docs/pkgdown.yml
docs/pkgdown.yml
+1
-1
docs/reference/cv.joinet.html
docs/reference/cv.joinet.html
+1
-0
docs/reference/joinet.html
docs/reference/joinet.html
+1
-1
docs/reference/predict.joinet.html
docs/reference/predict.joinet.html
+51
-51
docs/reference/weights.joinet.html
docs/reference/weights.joinet.html
+5
-5
man/cv.joinet.Rd
man/cv.joinet.Rd
+1
-0
man/joinet.Rd
man/joinet.Rd
+1
-1
vignettes/joinet.Rmd
vignettes/joinet.Rmd
+1
-85
No files found.
R/functions.R
View file @
20ba57e5
...
...
@@ -90,7 +90,7 @@
#' \dontrun{
#' browseVignettes("joinet") # further examples}
#'
joinet
<-
function
(
Y
,
X
,
family
=
"gaussian"
,
nfolds
=
10
,
foldid
=
NULL
,
type.measure
=
"deviance"
,
alpha.base
=
1
,
alpha.meta
=
0
,
...
){
joinet
<-
function
(
Y
,
X
,
family
=
"gaussian"
,
nfolds
=
10
,
foldid
=
NULL
,
type.measure
=
"deviance"
,
alpha.base
=
1
,
alpha.meta
=
1
,
...
){
#--- temporary ---
# family <- "gaussian"; nfolds <- 10; foldid <- NULL; type.measure <- "deviance"
...
...
@@ -100,7 +100,19 @@ joinet <- function(Y,X,family="gaussian",nfolds=10,foldid=NULL,type.measure="dev
X
<-
as.matrix
(
X
)
cornet
:::
.check
(
x
=
Y
,
type
=
"matrix"
,
miss
=
TRUE
)
if
(
any
(
stats
::
cor
(
Y
,
use
=
"pairwise.complete.obs"
)
<
0
,
na.rm
=
TRUE
)){
warning
(
"Negative correlation!"
,
call.
=
FALSE
)}
### trial start ###
for
(
i
in
1
:
(
ncol
(
Y
)
-1
)){
for
(
j
in
i
:
ncol
(
Y
)){
cor
<-
stats
::
cor.test
(
Y
[,
i
],
Y
[,
j
],
use
=
"pairwise.complete.obs"
)
if
(
cor
$
statistic
<
0
&
cor
$
p.value
<
0.05
){
warning
(
paste
(
"Columns"
,
i
,
"and"
,
j
,
"are negatively correlated."
))
}
}
}
### trial end ###
#if(any(stats::cor(Y,use="pairwise.complete.obs")<0,na.rm=TRUE)){warning("Negative correlation!",call.=FALSE)}
cornet
:::
.check
(
x
=
X
,
type
=
"matrix"
)
#cornet:::.check(x=family,type="vector",values=c("gaussian","binomial","poisson"))
if
(
nrow
(
Y
)
!=
nrow
(
X
)){
stop
(
"Contradictory sample size."
,
call.
=
FALSE
)}
...
...
@@ -515,11 +527,15 @@ print.joinet <- function(x,...){
#' set.seed(1)
#' cv.joinet(Y=Y,X=X,alpha.base=0) # ridge}
#'
cv.joinet
<-
function
(
Y
,
X
,
family
=
"gaussian"
,
nfolds.ext
=
5
,
nfolds.int
=
10
,
foldid.ext
=
NULL
,
foldid.int
=
NULL
,
type.measure
=
"deviance"
,
alpha.base
=
1
,
alpha.meta
=
1
,
compare
=
NULL
,
mice
=
FALSE
,
cvpred
=
FALSE
,
...
){
cv.joinet
<-
function
(
Y
,
X
,
family
=
"gaussian"
,
nfolds.ext
=
5
,
nfolds.int
=
10
,
foldid.ext
=
NULL
,
foldid.int
=
NULL
,
type.measure
=
"deviance"
,
alpha.base
=
1
,
alpha.meta
=
1
,
compare
=
NULL
,
mice
=
FALSE
,
cvpred
=
FALSE
,
times
=
FALSE
,
...
){
if
(
FALSE
){
nfolds.ext
<-
5
;
nfolds.int
<-
10
;
foldid.ext
<-
foldid.int
<-
NULL
;
type.measure
<-
"deviance"
;
alpha.base
<-
1
;
alpha.meta
<-
1
;
compare
<-
c
(
"mnorm"
,
"spls"
,
"mars"
,
"mrf"
);
mice
<-
cvpred
<-
TRUE
nfolds.ext
<-
1
;
foldid.ext
<-
fold
;
nfolds.int
<-
10
;
foldid.int
<-
NULL
family
<-
"gaussian"
;
nfolds.ext
<-
5
;
nfolds.int
<-
10
;
foldid.ext
<-
foldid.int
<-
NULL
;
type.measure
<-
"deviance"
;
alpha.base
<-
alpha.meta
<-
1
;
mice
<-
cvpred
<-
times
<-
FALSE
#nfolds.ext <- 1; foldid.ext <- fold; nfolds.int <- 10; foldid.int <- NULL; compare <- TRUE
}
if
(
!
is.null
(
compare
)
&&
length
(
compare
)
==
1
&&
compare
){
compare
<-
c
(
"mnorm"
,
"spls"
,
"mars"
,
"mrf"
,
"map"
,
"rmtl"
,
"mtps"
,
"mcen"
,
"gpm"
,
"sier"
,
"mrce"
)
}
n
<-
nrow
(
Y
)
...
...
@@ -546,7 +562,7 @@ cv.joinet <- function(Y,X,family="gaussian",nfolds.ext=5,nfolds.int=10,foldid.ex
for
(
i
in
seq_along
(
compare
)){
pkg
<-
switch
(
compare
[
i
],
mnorm
=
"glmnet"
,
mars
=
"earth"
,
spls
=
"spls"
,
mtps
=
"MTPS"
,
rmtl
=
"RMTL"
,
mrf
=
"MultivariateRandomForest"
,
mcen
=
"mcen"
,
map
=
"remMap"
,
s
top
(
"Invalid method."
,
call.
FALSE
))
map
=
"remMap"
,
s
ier
=
"SiER"
,
gpm
=
"GPM"
,
mrce
=
"MRCE"
,
ecc
=
"MLPUGS"
,
stop
(
"Invalid method."
,
call.
=
FALSE
))
if
(
!
pkg
%in%
pkgs
){
stop
(
"Method \""
,
compare
[
i
],
"\" requires package \""
,
pkg
,
"\"."
,
call.
=
FALSE
)
}
...
...
@@ -564,14 +580,23 @@ cv.joinet <- function(Y,X,family="gaussian",nfolds.ext=5,nfolds.int=10,foldid.ex
models
<-
c
(
"base"
,
"meta"
,
compare
,
"none"
)
pred
<-
lapply
(
X
=
models
,
function
(
x
)
matrix
(
data
=
NA
,
nrow
=
n
,
ncol
=
q
))
names
(
pred
)
<-
models
time
<-
lapply
(
X
=
models
,
function
(
x
)
NA
)
names
(
pred
)
<-
names
(
time
)
<-
models
for
(
i
in
seq_len
(
nfolds.ext
)){
Y0
<-
Y
[
foldid.ext
!=
i
,,
drop
=
FALSE
]
#Y1 <- Y[foldid.ext==i,,drop=FALSE]
X0
<-
X
[
foldid.ext
!=
i
,,
drop
=
FALSE
]
X1
<-
X
[
foldid.ext
==
i
,,
drop
=
FALSE
]
# standardise features (trial)
#mu <- apply(X=X0,MARGIN=2,FUN=function(x) mean(x))
#sd <- apply(X=X0,MARGIN=2,FUN=function(x) stats::sd(x))
#X0 <- t((t(X0)-mu)/sd)[,sd!=0]
#X1 <- t((t(X1)-mu)/sd)[,sd!=0]
# or standardise once before cv?
# remove constant features
cond
<-
apply
(
X
=
X0
,
MARGIN
=
2
,
FUN
=
function
(
x
)
stats
::
sd
(
x
)
!=
0
)
X0
<-
X0
[,
cond
];
X1
<-
X1
[,
cond
]
...
...
@@ -583,10 +608,14 @@ cv.joinet <- function(Y,X,family="gaussian",nfolds.ext=5,nfolds.int=10,foldid.ex
}
# base and meta learners
start
<-
Sys.time
()
fit
<-
joinet
(
Y
=
Y0
,
X
=
X0
,
family
=
family
,
type.measure
=
type.measure
,
alpha.base
=
alpha.base
,
alpha.meta
=
alpha.meta
,
foldid
=
foldid
)
# add ellipsis (...)
# also do not standardise!
temp
<-
predict.joinet
(
fit
,
newx
=
X1
)
pred
$
base
[
foldid.ext
==
i
,]
<-
temp
$
base
pred
$
meta
[
foldid.ext
==
i
,]
<-
temp
$
meta
end
<-
Sys.time
()
time
$
meta
<-
as.numeric
(
difftime
(
end
,
start
,
units
=
"secs"
))
# missing values
if
(
mice
&
any
(
is.na
(
Y0
))){
...
...
@@ -603,32 +632,54 @@ cv.joinet <- function(Y,X,family="gaussian",nfolds.ext=5,nfolds.int=10,foldid.ex
# other learners
if
(
"mnorm"
%in%
compare
){
cat
(
"mnorm"
,
" "
)
start
<-
Sys.time
()
if
(
any
(
family
!=
"gaussian"
)){
stop
(
"mnorm requires \"gaussian\" family."
,
call.
=
FALSE
)
}
net
<-
glmnet
::
cv.glmnet
(
x
=
X0
,
y
=
y0
,
family
=
"mgaussian"
,
foldid
=
foldid
,
alpha
=
alpha.base
)
# add ellipsis (...)
pred
$
mnorm
[
foldid.ext
==
i
,]
<-
stats
::
predict
(
object
=
net
,
newx
=
X1
,
s
=
"lambda.min"
,
type
=
"response"
)
end
<-
Sys.time
()
time
$
mnorm
<-
as.numeric
(
difftime
(
end
,
start
,
units
=
"secs"
))
}
else
{
net
<-
glmnet
::
glmnet
(
x
=
X0
,
y
=
y0
,
family
=
"mgaussian"
)
}
if
(
"mars"
%in%
compare
){
cat
(
"mars"
,
" "
)
start
<-
Sys.time
()
if
(
all
(
family
==
"gaussian"
)){
object
<-
earth
::
earth
(
x
=
X0
,
y
=
y0
)
object
<-
earth
::
earth
(
x
=
X0
,
y
=
y0
,
nfold
=
nfolds.int
)
# add:pmethod="cv"; new: nfolds and pmethod
# equivalent: object <- mda::mars(x=X0,y=y0)
}
else
if
(
all
(
family
==
"binomial"
)){
object
<-
earth
::
earth
(
x
=
X0
,
y
=
y0
,
glm
=
list
(
family
=
binomial
))
object
<-
earth
::
earth
(
x
=
X0
,
y
=
y0
,
glm
=
list
(
family
=
stats
::
binomial
),
nfold
=
nfolds.int
)
# add pmethod="cv", new: nfolds and pmethod
}
else
{
stop
(
"
Invalid
."
,
call.
=
FALSE
)
stop
(
"
MARS requires either \"gaussian\" or \"binomial\" family
."
,
call.
=
FALSE
)
}
pred
$
mars
[
foldid.ext
==
i
,]
<-
earth
:::
predict.earth
(
object
=
object
,
newdata
=
X1
,
type
=
"response"
)
#object <- mda::mars(x=X0,y=Y0
)
#
pred$mars[foldid.ext==i,] <- mda:::predict.mars(object=object,newdata=X1
)
end
<-
Sys.time
()
time
$
mars
<-
as.numeric
(
difftime
(
end
,
start
,
units
=
"secs"
)
)
#
mean((Y-pred$mars)^2,na.rm=TRUE
)
}
if
(
"spls"
%in%
compare
){
cv.spls
<-
spls
::
cv.spls
(
x
=
X0
,
y
=
y0
,
fold
=
nfolds.int
,
K
=
seq_len
(
10
),
eta
=
seq
(
from
=
0.1
,
to
=
0.9
,
by
=
0.1
),
plot.it
=
FALSE
)
object
<-
spls
::
spls
(
x
=
X0
,
y
=
y0
,
K
=
cv.spls
$
K.opt
,
eta
=
cv.spls
$
eta.opt
)
cat
(
"spls"
,
" "
)
start
<-
Sys.time
()
if
(
any
(
family
!=
"gaussian"
)){
stop
(
"spls requires \"gaussian\" family."
)
}
invisible
(
utils
::
capture.output
(
cv.spls
<-
spls
::
cv.spls
(
x
=
X0
,
y
=
y0
,
fold
=
nfolds.int
,
K
=
seq_len
(
min
(
ncol
(
X0
),
10
)),
eta
=
seq
(
from
=
0.1
,
to
=
0.9
,
by
=
0.1
),
plot.it
=
FALSE
)))
#scale.x=FALSE
object
<-
spls
::
spls
(
x
=
X0
,
y
=
y0
,
K
=
cv.spls
$
K.opt
,
eta
=
cv.spls
$
eta.opt
)
#scale.x=FALSE
pred
$
spls
[
foldid.ext
==
i
,]
<-
spls
::
predict.spls
(
object
=
object
,
newx
=
X1
,
type
=
"fit"
)
end
<-
Sys.time
()
time
$
spls
<-
as.numeric
(
difftime
(
end
,
start
,
units
=
"secs"
))
}
if
(
"mtps"
%in%
compare
){
cat
(
"mtps"
,
" "
)
start
<-
Sys.time
()
if
(
alpha.base
==
0
){
step1
<-
MTPS
::
glmnet.ridge
}
else
if
(
alpha.base
==
1
){
...
...
@@ -636,6 +687,7 @@ cv.joinet <- function(Y,X,family="gaussian",nfolds.ext=5,nfolds.int=10,foldid.ex
}
else
{
stop
(
"MTPS requires alpha.base 0 or 1."
,
call.
=
FALSE
)
}
#-------------------------------
#--- manual lambda.min start ---
#body <- body(step1)
...
...
@@ -650,14 +702,36 @@ cv.joinet <- function(Y,X,family="gaussian",nfolds.ext=5,nfolds.int=10,foldid.ex
#}
#--- manual lambda.min end ---
#-----------------------------
step2
<-
MTPS
::
rpart1
#---------------------------
#--- manual kmeans start ---
#fun <- MTPS::cv.multiFit
#body <- body(fun)
#body[[11]][[3]][[3]] <- nrow(unique(y0))
#body(fun) <- body
#assignInNamespace(x="cv.multiFit",value=step1,ns="MTPS")
#--- manual kmeans end ---
#-------------------------
if
(
all
(
family
==
"gaussian"
)){
step2
<-
MTPS
::
rpart1
}
else
if
(
all
(
family
==
"binomial"
)){
step2
<-
MTPS
::
glm1
}
else
{
stop
(
"MTPS requires family gaussian or binomial."
,
call.
=
FALSE
)
}
object
<-
MTPS
::
MTPS
(
xmat
=
X0
,
ymat
=
y0
,
family
=
family
,
nfold
=
nfolds.int
,
method.step1
=
step1
,
method.step2
=
step2
)
method.step1
=
step1
,
method.step2
=
step2
,
cv
=
TRUE
,
residual
=
TRUE
)
pred
$
mtps
[
foldid.ext
==
i
,]
<-
MTPS
::
predict.MTPS
(
object
=
object
,
newdata
=
X1
,
type
=
"response"
)
# nfold has no effect for cv=FALSE (default)
end
<-
Sys.time
()
time
$
mtps
<-
as.numeric
(
difftime
(
end
,
start
,
units
=
"secs"
))
# now using cross-validation residual stacking (CVRS)
}
if
(
"rmtl"
%in%
compare
){
cat
(
"rmtl"
,
" "
)
start
<-
Sys.time
()
if
(
all
(
family
==
"gaussian"
)){
type
<-
"Regression"
y0l
<-
lapply
(
seq_len
(
ncol
(
y0
)),
function
(
i
)
y0
[,
i
,
drop
=
FALSE
])
...
...
@@ -671,91 +745,180 @@ cv.joinet <- function(Y,X,family="gaussian",nfolds.ext=5,nfolds.int=10,foldid.ex
X1l
<-
lapply
(
seq_len
(
ncol
(
y0
)),
function
(
i
)
X1
)
#---------------------------
#--- manual tuning start ---
Lam2_seq
<-
10
^
seq
(
from
=
2
,
to
=
-5
,
length.out
=
101
)
cvm
<-
numeric
()
Lam1_seq
<-
c
(
10
^
seq
(
from
=
1
,
to
=
-4
,
length.out
=
11
),
0
)
Lam2_seq
<-
c
(
10
^
seq
(
from
=
1
,
to
=
-4
,
length.out
=
11
),
0
)
cvMTL
<-
list
()
seed
<-
.Random.seed
for
(
j
in
seq_along
(
Lam2_seq
)){
set.seed
(
1
)
cv
m
[
j
]
<-
min
(
RMTL
::
cvMTL
(
X
=
X0l
,
Y
=
y0l
,
type
=
type
,
Lam1
=
0
,
Lam2
=
Lam2_seq
[
j
])
$
cvm
)
.Random.seed
<-
seed
cv
MTL
[[
j
]]
<-
RMTL
::
cvMTL
(
X
=
X0l
,
Y
=
y0l
,
type
=
type
,
Lam1_seq
=
Lam1_seq
,
Lam2
=
Lam2_seq
[
j
]
)
}
#graphics::plot(x=Lam2_seq,y=cvm)
cvm
<-
vapply
(
X
=
cvMTL
,
FUN
=
function
(
x
)
min
(
x
$
cvm
),
FUN.VALUE
=
numeric
(
1
))
Lam1
<-
cvMTL
[[
which.min
(
cvm
)]]
$
Lam1.min
Lam2
<-
Lam2_seq
[
which.min
(
cvm
)]
#graphics::plot(x=Lam2_seq,y=cvm)
#cat(Lam1," ",Lam2,"\n")
#--- manual tuning end ----
#--------------------------
cvMTL
<-
RMTL
::
cvMTL
(
X
=
X0l
,
Y
=
y0l
,
type
=
type
,
Lam2
=
Lam2
)
MTL
<-
RMTL
::
MTL
(
X
=
X0l
,
Y
=
y0l
,
type
=
type
,
Lam1
=
cvMTL
$
Lam1.min
)
MTL
<-
RMTL
::
MTL
(
X
=
X0l
,
Y
=
y0l
,
type
=
type
,
Lam1
=
Lam1
,
Lam2
=
Lam2
)
temp
<-
RMTL
:::
predict.MTL
(
object
=
MTL
,
newX
=
X1l
)
pred
$
rmtl
[
foldid.ext
==
i
,]
<-
do.call
(
what
=
"cbind"
,
args
=
temp
)
end
<-
Sys.time
()
time
$
rmtl
<-
as.numeric
(
difftime
(
end
,
start
,
units
=
"secs"
))
}
if
(
"mrf"
%in%
compare
){
pred
$
mrf
[
foldid.ext
==
i
,]
<-
MultivariateRandomForest
::
build_forest_predict
(
trainX
=
X0
,
trainY
=
Y0
,
n_tree
=
10
,
m_feature
=
floor
(
ncol
(
X0
)
/
3
),
min_leaf
=
5
,
testX
=
X1
)
# use n_tree = 500
cat
(
"mrf"
,
" "
)
start
<-
Sys.time
()
if
(
any
(
family
!=
"gaussian"
)){
stop
(
"mrf requires \"gaussian\" family."
)
}
pred
$
mrf
[
foldid.ext
==
i
,]
<-
MultivariateRandomForest
::
build_forest_predict
(
trainX
=
X0
,
trainY
=
y0
,
n_tree
=
100
,
m_feature
=
min
(
ncol
(
X
)
-1
,
5
),
min_leaf
=
5
,
testX
=
X1
)
# use n_tree=500, m_feature=floor(ncol(X0)/3)
# alternative: IntegratedMRF
# Check why this does not work!
# Check why this does not work well!
end
<-
Sys.time
()
time
$
mrf
<-
as.numeric
(
difftime
(
end
,
start
,
units
=
"secs"
))
}
if
(
"mcen"
%in%
compare
){
if
(
all
(
family
==
"gaussian"
)){
mfamily
<-
"mgaussian"
}
if
(
all
(
family
==
"binomial"
)){
mfamily
<-
"mbinomial"
}
object
<-
mcen
::
cv.mcen
(
x
=
X0
,
y
=
y0
,
family
=
mfamily
,
folds
=
foldid
,
ky
=
1
)
cat
(
"mcen"
,
" "
)
start
<-
Sys.time
()
if
(
all
(
family
==
"gaussian"
)){
type
<-
"mgaussian"
}
else
if
(
all
(
family
==
"binomial"
)){
type
<-
"mbinomial"
}
else
{
stop
(
"mcen requires either \"gaussian\" or \"binomial\"."
,
call.
=
FALSE
)
}
object
<-
mcen
::
cv.mcen
(
x
=
X0
,
y
=
y0
,
family
=
type
,
folds
=
foldid
,
ky
=
1
,
gamma_y
=
seq
(
from
=
0.1
,
to
=
5.1
,
length.out
=
3
),
ndelta
=
3
)
# TEMPORARY gamma_y and ndelta (for speed-up)
temp
<-
mcen
:::
predict.cv.mcen
(
object
=
object
,
newx
=
X1
)
pred
$
mcen
[
foldid.ext
==
i
,]
<-
as.matrix
(
temp
)
# default returns error (thus ky=1)
# single cluster (ky=1) due to setting and error
end
<-
Sys.time
()
time
$
mcen
<-
as.numeric
(
difftime
(
end
,
start
,
units
=
"secs"
))
}
if
(
"map"
%in%
compare
){
#if(alpha.base==0){
# lamL1.v <- 0
# lamL2.v <- fit$base$y1$lambda
#} else if(alpha.base==1){
# lamL1.v <- fit$base$y1$lambda
# lamL2.v <- 0
#} else {
# stop("")
#}
cat
(
"map"
,
" "
)
start
<-
Sys.time
()
if
(
any
(
family
!=
"gaussian"
)){
stop
(
"map requires \"gaussian\" family."
)
}
mean
<-
colMeans
(
y0
)
y0s
<-
y0
-
matrix
(
data
=
mean
,
nrow
=
nrow
(
X0
),
ncol
=
ncol
(
y0
),
byrow
=
TRUE
)
temp
<-
glmnet
::
glmnet
(
x
=
X0
,
y
=
y0s
,
family
=
"mgaussian"
,
intercept
=
FALSE
)
lamL1.v
<-
exp
(
seq
(
from
=
log
(
min
(
temp
$
lambda
)),
to
=
log
(
max
(
temp
$
lambda
)),
length.out
=
30
))
lamL2.v
<-
seq
(
from
=
0
,
to
=
5
,
by
=
1
)
# TEMPORARY
lamL1.v
<-
exp
(
seq
(
from
=
log
(
10
),
to
=
log
(
20
),
length.out
=
11
))
lamL2.v
<-
seq
(
from
=
0
,
to
=
5
,
length.out
=
11
)
cv
<-
remMap
::
remMap.CV
(
X
=
X0
,
Y
=
y0s
,
lamL1.v
=
lamL1.v
,
lamL2.v
=
lamL2.v
)
#graphics::plot(x=lamL1.v,y=log(as.numeric(cv$ols.cv[,3])))
index
<-
which
(
cv
$
ols.cv
==
min
(
cv
$
ols.cv
),
arr.ind
=
TRUE
)
index
<-
which
(
cv
$
ols.cv
==
min
(
cv
$
ols.cv
),
arr.ind
=
TRUE
)[
1
,]
#cat(index,"\n")
object
<-
remMap
::
remMap
(
X.m
=
X0
,
Y.m
=
y0s
,
lamL1
=
lamL1.v
[
index
[
1
]],
lamL2
=
lamL2.v
[
index
[
2
]])
pred
$
map
[
foldid.ext
==
i
,]
<-
matrix
(
data
=
mean
,
nrow
=
nrow
(
X1
),
ncol
=
ncol
(
y0
),
byrow
=
TRUE
)
+
X1
%*%
object
$
phi
# alternative: groupRemMap
end
<-
Sys.time
()
time
$
map
<-
as.numeric
(
difftime
(
end
,
start
,
units
=
"secs"
))
}
if
(
FALSE
){
#"mrce" %in% compare
# bug?
lam1
<-
rev
(
10
^
seq
(
from
=
-2
,
to
=
0
,
by
=
0.5
))
lam2
<-
rev
(
10
^
seq
(
from
=
-2
,
to
=
0
,
by
=
0.5
))
#lam1 <- lam2 <- 10^seq(from=0,to=-0.7,length.out=5)
#lam2 <- rev(10^seq(from=0,to=2,by=0.5))
#lam2 <- c(2,1,0.5)
#lam1 <- 10^seq(from=0,to=-5,length.out=11)
#lam2 <- seq(from=1,to=0.1,by=-0.1)
object
<-
MRCE
::
mrce
(
X
=
X0
,
Y
=
y0
,
lam1.vec
=
lam1
,
lam2.vec
=
lam2
,
method
=
"cv"
,
silent
=
FALSE
,
cov.tol
=
0.1
,
tol.out
=
1e-10
)
pred
$
mrce
[
foldid.ext
==
i
,]
<-
matrix
(
object
$
muhat
,
nrow
=
nrow
(
X1
),
ncol
=
q
,
byrow
=
TRUE
)
+
x1
%*%
object
$
Bhat
}
if
(
FALSE
){
# "sier" %in% compare
stop
(
"SiER not yet implemented"
,
call.
=
FALSE
)
# slow!
object
<-
SiER
::
cv.SiER
(
X
=
X0
,
Y
=
y0
,
K.cv
=
5
,
thres
=
0.5
)
if
(
"sier"
%in%
compare
){
cat
(
"sier"
,
" "
)
start
<-
Sys.time
()
if
(
any
(
family
!=
"gaussian"
)){
stop
(
"SiER requires \"gaussian\" family."
,
call.
=
FALSE
)
}
invisible
(
utils
::
capture.output
(
object
<-
SiER
::
cv.SiER
(
X
=
X0
,
Y
=
y0
,
K.cv
=
5
,
upper.comp
=
10
,
thres
=
0.01
)))
# should be upper.comp=10 and thres=0.01
pred
$
sier
[
foldid.ext
==
i
,]
<-
SiER
::
pred.SiER
(
cv.fit
=
object
,
X.new
=
X1
)
end
<-
Sys.time
()
time
$
sier
<-
as.numeric
(
difftime
(
end
,
start
,
units
=
"secs"
))
}
if
(
FALSE
){
#"gpm" %in% compare
if
(
"gpm"
%in%
compare
){
cat
(
"gpm"
,
" "
)
start
<-
Sys.time
()
if
(
any
(
family
!=
"gaussian"
)){
stop
(
"GPM requires \"gaussian\" family."
,
call.
=
FALSE
)
}
object
<-
GPM
::
Fit
(
X
=
X0
,
Y
=
Y
0
)
object
<-
GPM
::
Fit
(
X
=
X0
,
Y
=
y
0
)
pred
$
gpm
[
foldid.ext
==
i
,]
<-
GPM
::
Predict
(
XF
=
X1
,
Model
=
object
)
$
YF
# Check why this does not work!
end
<-
Sys.time
()
time
$
gpm
<-
as.numeric
(
difftime
(
end
,
start
,
units
=
"secs"
))
}
if
(
"mrce"
%in%
compare
){
cat
(
"mrce"
,
" "
)
start
<-
Sys.time
()
if
(
any
(
family
!=
"gaussian"
)){
stop
(
"MRCE requires \"gaussian\" family."
,
call.
=
FALSE
)
# check this!
}
lam1
<-
lam2
<-
10
^
seq
(
from
=
1
,
to
=
-4
,
length.out
=
11
)
invisible
(
utils
::
capture.output
(
trials
<-
lapply
(
lam2
,
function
(
x
)
tryCatch
(
expr
=
MRCE
::
mrce
(
X
=
X0
,
Y
=
y0
,
lam1.vec
=
lam1
,
lam2.vec
=
x
,
method
=
"cv"
),
error
=
function
(
x
)
NULL
))))
cv.err
<-
sapply
(
trials
,
function
(
x
)
ifelse
(
is.null
(
x
),
Inf
,
min
(
x
$
cv.err
)))
object
<-
trials
[[
which.min
(
cv.err
)]]
pred
$
mrce
[
foldid.ext
==
i
,]
<-
matrix
(
object
$
muhat
,
nrow
=
nrow
(
X1
),
ncol
=
q
,
byrow
=
TRUE
)
+
X1
%*%
object
$
Bhat
end
<-
Sys.time
()
time
$
mrce
<-
as.numeric
(
difftime
(
end
,
start
,
units
=
"secs"
))
}
if
(
FALSE
){
"MSGLasso"
%in%
compare
if
(
!
is.null
(
compare
)){
cat
(
"\n"
)}
#--- development snippets ---
if
(
FALSE
){
# "ecc"
start
<-
Sys.time
()
cat
(
"ecc"
,
" "
)
if
(
any
(
family
!=
"binomial"
)){
stop
(
"MLPUGS requires \"binomial\" family."
,
call.
=
FALSE
)
}
X0f
<-
as.data.frame
(
X0
)
y0f
<-
as.data.frame
(
y0
)
X1f
<-
as.data.frame
(
X1
)
if
(
FALSE
){
# works!
object
<-
MLPUGS
::
ecc
(
x
=
X0f
,
y
=
y0f
,
.f
=
stats
::
glm.fit
,
family
=
stats
::
binomial
(
link
=
"logit"
))
pred_glm
<-
predict
(
object
,
X1f
,
.f
=
function
(
glm_fit
,
newdata
)
{
eta
<-
as.matrix
(
newdata
)
%*%
glm_fit
$
coef
output
<-
glm_fit
$
family
$
linkinv
(
eta
)
colnames
(
output
)
<-
"1"
return
(
output
)
},
n.iters
=
10
,
burn.in
=
0
,
thin
=
1
)
}
if
(
FALSE
){
# fails! respect same form for X0 and X1
object
<-
MLPUGS
::
ecc
(
x
=
X0
,
y
=
y0
,
.f
=
glmnet
::
cv.glmnet
,
family
=
"binomial"
)
#temp <- MLPUGS:::predict.ECC(object,newx=X1,.f=glmnet::predict.cv.glmnet,newdata=X1)
pred_glm
<-
predict
(
object
,
X1
,
.f
=
function
(
fit
,
newdata
)
{
newx
<-
as.matrix
(
newdata
)
pred
<-
glmnet
:::
predict.cv.glmnet
(
fit
,
newx
=
newx
,
s
=
"lambda.min"
,
type
=
"response"
)
return
(
as.matrix
(
pred
))
},
n.iters
=
10
,
burn.in
=
0
,
thin
=
1
)
}
object
<-
MLPUGS
::
ecc
(
x
=
X0f
,
y
=
y0f
,
.f
=
randomForest
::
randomForest
)
pred_ecc
<-
predict
(
object
,
newdata
=
X1f
,
n.iters
=
300
,
burn.in
=
100
,
thin
=
2
,
.f
=
function
(
rF
,
newdata
){
randomForest
:::
predict.randomForest
(
rF
,
newdata
,
type
=
"prob"
)})
# works with n.iter=2 , burn.int=0 and thin=1 (bad choices!)
pred
$
ecc
[
foldid.ext
==
i
,]
<-
summary
(
pred_ecc
,
type
=
"prob"
)
# default seems to work, too, but might also leads to bad results in the
# (unrealistic) simulation
end
<-
Sys.time
()
time
$
ecc
<-
as.numeric
(
difftime
(
end
,
start
,
units
=
"secs"
))
}
# if family=="binomial" test whether any pred outside unit interval
if
(
FALSE
){
# "MSGLasso" %in% compare
MSGLasso
::
MSGLasso.cv
(
X
=
X0
,
Y
=
Y0
)
# requires many user inputs
}
...
...
@@ -801,6 +964,9 @@ cv.joinet <- function(Y,X,family="gaussian",nfolds.ext=5,nfolds.int=10,foldid.ex
if
(
cvpred
){
loss
<-
list
(
loss
=
loss
,
cvpred
=
pred
$
meta
)
}
if
(
times
){
loss
<-
list
(
loss
=
loss
,
time
=
time
)
}
return
(
loss
)
}
...
...
docs/articles/joinet.html
View file @
20ba57e5
...
...
@@ -158,95 +158,13 @@
<div
class=
"sourceCode"
id=
"cb11"
><html><body><pre
class=
"r"
><span
class=
"fu"
><a
href=
"../reference/cv.joinet.html"
>
cv.joinet
</a></span>
(
<span
class=
"kw"
>
Y
</span><span
class=
"kw"
>
=
</span><span
class=
"no"
>
Y
</span>
,
<span
class=
"kw"
>
X
</span><span
class=
"kw"
>
=
</span><span
class=
"no"
>
X
</span>
,
<span
class=
"kw"
>
family
</span><span
class=
"kw"
>
=
</span><span
class=
"no"
>
family
</span>
)
</pre></body></html></div>
<pre><code>
## [,1] [,2]
## base 1.204741 1.523550
## meta 1.1
61487 1.283678
## meta 1.1
85200 1.278125
## none 3.206394 3.495571
</code></pre>
</div>
<div
id=
"reference"
class=
"section level2"
>
<h2
class=
"hasAnchor"
>
<a
href=
"#reference"
class=
"anchor"
></a>
Reference
</h2>
<p>
Armin Rauschenberger and Enrico Glaab (2020). “joinet: predicting correlated outcomes jointly to improve clinical prognosis”.
<em>
Manuscript in preparation.
</em></p>
<!--
```r
#install.packages("MTPS")
data("HIV",package="MTPS")
loss1 <- cv.joinet(Y=YY,X=XX,mnorm=TRUE,spls=TRUE,mtps=TRUE)
#install.packages("spls")
data(yeast,package="spls")
loss2 <- cv.joinet(Y=yeast$y,X=yeast$x,mnorm=TRUE,spls=TRUE,mtps=TRUE)
data(mice,package="spls")
loss3 <- cv.joinet(Y=mice$y,X=mice$x,mnorm=TRUE,spls=TRUE,mtps=TRUE)
# install.packages("MRCE")
data(stock04,package="MRCE",verbose=TRUE)
# otherwise simulated
#install.packages("SiER")
# simulated!
library(MASS)
total.noise <- 0.1
rho <- 0.3
rho.e <- 0.2
nvar=500
nvarq <- 3
sigma2 <- total.noise/nvarq
sigmaX=0.1
nvar.eff=150
Sigma=matrix(0,nvar.eff,nvar.eff)
for(i in 1:nvar.eff){
for(j in 1:nvar.eff){
Sigma[i,j]=rho^(abs(i-j))
}
}
Sigma2.y <- matrix(sigma2*rho.e,nvarq, nvarq)
diag(Sigma2.y) <- sigma2
betas.true <- matrix(0, nvar, 3)
betas.true[1:15,1]=rep(1,15)/sqrt(15)
betas.true[16:45,2]=rep(0.5,30)/sqrt(30)
betas.true[46:105,3]=rep(0.25,60)/sqrt(60)
ntest <- 500
ntrain <- 90
ntot <- ntest+ntrain
X <- matrix(0,ntot,nvar)
X[,1:nvar.eff] <- mvrnorm(n=ntot, rep(0, nvar.eff), Sigma)
X[,-(1:nvar.eff)] <- matrix(sigmaX*rnorm((nvar-nvar.eff)*dim(X)[1]),
dim(X)[1],(nvar-nvar.eff))
Y <- X%*%betas.true
Y <- Y+mvrnorm(n=ntot, rep(0,nvarq), Sigma2.y)
fold <- rep(c(0,1),times=c(ntrain,ntest))
loss4 <- cv.joinet(Y=Y,X=X,foldid.ext=fold,mnorm=TRUE,spls=TRUE,mtps=TRUE)
#install.pacakges("GPM")
# simulated!
#install.packages("RMTL")
# simulated!
data <- RMTL::Create_simulated_data(Regularization="L21", #type="Regression")
#Y <- (do.call(what="cbind",args=data$Y)+1)/2
#X <- data$X[[1]] # example
#loss2 <- cv.joinet(Y=Y,X=X,mnorm=TRUE,spls=TRUE,mtps=TRUE)
```
-->
<!--
```r
#install.packages("plsgenomics")
data(Ecoli,package="plsgenomics")
X <- Ecoli$CONNECdata
Y <- Ecoli$GEdata
loss2 <- cv.joinet(Y=Y,X=X,mnorm=TRUE,mtps=TRUE)
#install.packages("BiocManager")
#BiocManager::install("mixOmics")
data(liver.toxicity,package="mixOmics")
X <- as.matrix(liver.toxicity$gene)
Y <- as.matrix(liver.toxicity$clinic)
Y[,"Cholesterol.mg.dL."] <- -Y[,"Cholesterol.mg.dL."]
loss3 <- cv.joinet(Y=Y,X=X,mnorm=TRUE,mtps=TRUE)
```
-->
</div>
</div>
...
...
docs/pkgdown.yml
View file @
20ba57e5
...
...
@@ -4,5 +4,5 @@ pkgdown_sha: ~
articles
:
article
:
article.html
joinet
:
joinet.html
last_built
:
2020-06-
08T15:55
Z
last_built
:
2020-06-
29T15:24
Z
docs/reference/cv.joinet.html
View file @
20ba57e5
...
...
@@ -140,6 +140,7 @@
<span
class=
'kw'
>
compare
</span>
<span
class=
'kw'
>
=
</span>
<span
class=
'kw'
>
NULL
</span>
,
<span
class=
'kw'
>
mice
</span>
<span
class=
'kw'
>
=
</span>
<span
class=
'fl'
>
FALSE
</span>
,
<span
class=
'kw'
>
cvpred
</span>
<span
class=
'kw'
>
=
</span>
<span
class=
'fl'
>
FALSE
</span>
,
<span
class=
'kw'
>
times
</span>
<span
class=
'kw'
>
=
</span>
<span
class=
'fl'
>
FALSE
</span>
,
<span
class=
'no'
>
...
</span>
)
</pre>
...
...
docs/reference/joinet.html
View file @
20ba57e5
...
...
@@ -134,7 +134,7 @@
<span
class=
'kw'
>
foldid
</span>
<span
class=
'kw'
>
=
</span>
<span
class=
'kw'
>
NULL
</span>
,
<span
class=
'kw'
>
type.measure
</span>
<span
class=
'kw'
>
=
</span>
<span
class=
'st'
>
"deviance"
</span>
,
<span
class=
'kw'
>
alpha.base
</span>
<span
class=
'kw'
>
=
</span>
<span
class=
'fl'
>
1
</span>
,
<span
class=
'kw'
>
alpha.meta
</span>
<span
class=
'kw'
>
=
</span>
<span
class=
'fl'
>
0
</span>
,
<span
class=
'kw'
>
alpha.meta
</span>
<span
class=
'kw'
>
=
</span>
<span
class=
'fl'
>
1
</span>
,
<span
class=
'no'
>
...
</span>
)
</pre>
...
...
docs/reference/predict.joinet.html
View file @
20ba57e5
...
...
@@ -218,57 +218,57 @@ with \(n\) rows (samples) and \(q\) columns (variables).</p>
#
>
[50,] 0.32655598 -1.650189753 -0.59297402
#
>
#
>
$meta
#
>
[,1]
[,2] [,3]
#
>
[1,] 0.1
20026857 -2.63552370 -2.58637745
#
>
[2,] 0.
311517089 -1.57742790 -1.80530334
#
>
[3,] 0.9
70038575 2.83530787 2.50770389
#
>
[4,] 0.98
8790465 3.94203727 3.41749124
#
>
[5,] 0.
702397389 0.20191699 -0.07301232
#
>
[6,] 0.0
69874337 -3.01383050 -2.75113316
#
>
[7,] 0.
178293600 -1.79055373 -1.33607264
#
>
[8,] 0.99
4417547 4.65204823 4.42063500
#
>
[9,] 0.8254
54272 0.92712611 0.33068070
#
>
[10,] 0.1
22295626 -2.58218395 -2.83894610
#
>
[11,] 0.9
92972765 4.45732386 4.34260769
#
>
[12,] 0.
315048704 -0.99259865 -0.55695917
#
>
[13,] 0.0
06376865 -5.52584767 -5.43421552
#
>
[14,] 0.6
04946394 0.15758601 0.59296195
#
>
[15,] 0.
356113893 -0.95306415 -0.75195663
#
>
[16,] 0.4
96057561 -0.75220207 -1.2697836
0
#
>
[17,] 0.
320794509 -1.10958000 -1.05497247
#
>
[18,] 0.
804808814 0.64766735 0.02552338
#
>
[19,] 0.8
34600977 1.22772641 1.64662
367
#
>
[20,] 0.06
2637571 -3.37591257 -3.52530885
#
>
[21,] 0.2
71715919 -1.32688363 -0.85251362
#
>
[22,] 0.
507637975 -0.60258988 -0.83624877
#
>
[23,] 0.
694579538 0.19562106 -0.51989858
#
>
[24,] 0.
406609311 -0.57645076 -0.18326513
#
>
[25,] 0.01
2589218 -4.82289140 -4.55079851
#
>
[26,] 0.93
5270035 2.28086412 2.23449684
#
>
[27,] 0.1
43615352 -2.33101536 -2.18288391
#
>
[28,] 0.9
71697981 2.86526752 2.61585773
#
>
[29,] 0.0
13944811 -4.69966856 -4.55513448
#
>
[30,] 0.0157
68781 -4.71928228 -4.44410746
#
>
[31,] 0.8
91631228 1.50966480 1.39712353
#
>
[32,] 0.85
3434630 1.11696980 0.44705866
<