Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Armin Rauschenberger
joinet
Commits
0dc4a4b9
Commit
0dc4a4b9
authored
Sep 21, 2018
by
Rauschenberger
Browse files
automation
parent
9ac5bb34
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
R/functions.R
View file @
0dc4a4b9
...
...
@@ -30,7 +30,7 @@
#' #y[1] <- 0.5
#' #a <- glmnet::glmnet(y=y,x=x,family="binomial")
#' #b <- stats::glm(y~x,family="binomial")
colasso
<-
function
(
y
,
X
,
nfold
=
10
,
alpha
=
1
,
nfolds
=
10
){
colasso
<-
function
(
y
,
X
,
alpha
=
1
,
nfolds
=
10
){
# properties
n
<-
nrow
(
X
);
p
<-
ncol
(
X
)
...
...
@@ -137,6 +137,7 @@ colasso <- function(y,X,nfold=10,alpha=1,nfolds=10){
# obtain p-values --------------------------------------------------------------
# input: y, X; output: p-value
colasso_marginal_significance
<-
function
(
y
,
X
){
# X = scale(X)
x
<-
vector
()
for
(
i
in
seq_len
(
ncol
(
X
))){
if
(
stats
::
var
(
X
[,
i
])
==
0
){
...
...
@@ -179,6 +180,7 @@ colasso_covariate_weights <- function(x,max=0.05/length(x),min=0.05,version=1){
}
else
{
weight
<-
pmax
(
1
-
(
1
/
min
)
*
x
,
0
)
}
message
(
"non-zero weight: "
,
100
*
round
(
mean
(
weight
!=
0
),
digits
=
2
),
"%"
)
return
(
weight
)
}
...
...
@@ -351,6 +353,7 @@ colasso_compare <- function(y,X,plot=TRUE,nfolds.int=10){
fold
<-
sample
(
x
=
rep
(
x
=
seq_len
(
5
),
length.out
=
length
(
y
)))
pred
<-
matrix
(
data
=
NA
,
nrow
=
length
(
y
),
ncol
=
8
)
select
<-
list
()
for
(
i
in
sort
(
unique
(
fold
))){
cat
(
"i ="
,
i
,
"\n"
)
fit
<-
colasso
(
y
=
y
[
fold
!=
i
],
X
=
X
[
fold
!=
i
,],
alpha
=
1
,
nfolds
=
nfolds.int
)
...
...
@@ -360,11 +363,29 @@ colasso_compare <- function(y,X,plot=TRUE,nfolds.int=10){
s
=
fit
[[
j
]]
$
lambda.min
,
type
=
"response"
)
}
select
[[
i
]]
<-
lapply
(
fit
,
function
(
x
)
which
(
x
$
beta
[,
x
$
lambda
==
x
$
lambda.min
]
!=
0
))
pred
[
fold
==
i
,
8
]
<-
mean
(
y
[
fold
!=
i
])
# intercept-only model
}
colnames
(
pred
)
<-
c
(
names
(
fit
),
"intercept"
)
loss
<-
apply
(
X
=
pred
,
MARGIN
=
2
,
FUN
=
function
(
x
)
sum
((
y
-
x
)
^
2
))
### start temporary ###
#stability <- numeric()
#for(k in seq_along(fit)){
# matrix <- matrix(data=NA,nrow=5,ncol=5)
# for(i in seq_len(5)){
# for(j in seq_len(5)){
# a <- select[[i]][[k]]
# b <- select[[j]][[k]]
# matrix[i,j] <- length(intersect(a,b))/length(union(a,b))
# }
# }
# diag(matrix) <- NA
# stability[k] <- mean(matrix,na.rm=TRUE)
#}
#cat(stability)
### end temporary ###
if
(
plot
){
graphics
::
par
(
mar
=
c
(
3
,
3
,
1
,
1
))
col
<-
rep
(
x
=
0
,
times
=
length
(
loss
)
-1
)
...
...
docs/articles/vignette.html
View file @
0dc4a4b9
This diff is collapsed.
Click to expand it.
docs/reference/colasso.html
View file @
0dc4a4b9
...
...
@@ -120,7 +120,7 @@
</div>
<pre
class=
"usage"
><span
class=
'fu'
>
colasso
</span>
(
<span
class=
'no'
>
y
</span>
,
<span
class=
'no'
>
X
</span>
,
<span
class=
'kw'
>
nfold
</span>
<span
class=
'kw'
>
=
</span>
<span
class=
'fl'
>
10
</span>
,
<span
class=
'kw'
>
alpha
</span>
<span
class=
'kw'
>
=
</span>
<span
class=
'fl'
>
1
</span>
,
<span
class=
'kw'
>
nfolds
</span>
<span
class=
'kw'
>
=
</span>
<span
class=
'fl'
>
10
</span>
)
</pre>
<pre
class=
"usage"
><span
class=
'fu'
>
colasso
</span>
(
<span
class=
'no'
>
y
</span>
,
<span
class=
'no'
>
X
</span>
,
<span
class=
'kw'
>
alpha
</span>
<span
class=
'kw'
>
=
</span>
<span
class=
'fl'
>
1
</span>
,
<span
class=
'kw'
>
nfolds
</span>
<span
class=
'kw'
>
=
</span>
<span
class=
'fl'
>
10
</span>
)
</pre>
<h2
class=
"hasAnchor"
id=
"arguments"
><a
class=
"anchor"
href=
"#arguments"
></a>
Arguments
</h2>
<table
class=
"ref-arguments"
>
...
...
man/colasso.Rd
View file @
0dc4a4b9
...
...
@@ -5,7 +5,7 @@
\alias{colasso-package}
\title{colasso}
\usage{
colasso(y, X,
nfold = 10,
alpha = 1, nfolds = 10)
colasso(y, X, alpha = 1, nfolds = 10)
}
\arguments{
\item{y}{response\strong{:}
...
...
vignettes/vignette.Rmd
View file @
0dc4a4b9
...
...
@@ -15,7 +15,7 @@ https://en.wikipedia.org/wiki/List_of_datasets_for_machine_learning_research#Mic
```{r,eval=FALSE}
loss
=
NULL
loss
<-
NULL
for(rep in 1:4){
set.seed(rep)
...
...
@@ -28,18 +28,18 @@ for(rep in 1:4){
#y <- list$y; X <- list$X
### mice data ###
data(mice,package="BGLR")
nsel <- sort(sample(seq_len(1814),size=1
814
,replace=FALSE))
psel <- sort(sample(seq_len(10346),size=1000,replace=FALSE))
y <- mice.pheno$Obesity.BMI[nsel] # try different phenotypes
X <- mice.X[nsel,psel]
#
data(mice,package="BGLR")
#
nsel <- sort(sample(seq_len(1814),size=1
00
,replace=FALSE))
#
psel <- sort(sample(seq_len(10346),size=1000,replace=FALSE))
#
y <- mice.pheno$Obesity.BMI[nsel] # try different phenotypes
#
X <- mice.X[nsel,psel]
### wheat data ###
#
data(wheat,package="BGLR")
#
nsel <- seq_len(599)
#
psel <- seq_len(1279)
#
y <- as.numeric(wheat.Y[nsel,rep]) # try different phenotypes
#
X <- wheat.X[nsel,psel]
data(wheat,package="BGLR")
nsel <- seq_len(599)
psel <- seq_len(1279)
y <- as.numeric(wheat.Y[nsel,rep]) # try different phenotypes
X <- wheat.X[nsel,psel]
#----- CROSS-VALIDATE -----
...
...
@@ -57,38 +57,111 @@ lib <- "/virdir/Scratch/arauschenberger/library"
devtools::install_github("rauschenberger/colasso",lib=lib)
setwd("/mnt/virdir/Scratch/arauschenberger/colasso")
# load data
utils::data(metabolomics_RP3RP4_overlap,package="BBMRIomics")
utils::data(rnaSeqData_ReadCounts_BIOS_cleaned,package="BBMRIomics")
# selecting samples
#colData(counts)[,"biobank_id"]
samples <- intersect(colnames(counts),colnames(metabolomicData))
Y <- t(SummarizedExperiment::assays(metabolomicData[,samples])$measurements)
X <- t(SummarizedExperiment::assay(counts[,samples]))
# data matrices
Y <- t(SummarizedExperiment::assays(metabolomicData)$measurements)
X <- t(SummarizedExperiment::assay(counts))
# normalisation
X <- spliceQTL::adjust.samples(X)
X <- 2*sqrt(X+3/8)
# selecting samples #colData(counts)[,"biobank_id"]
samples <- intersect(rownames(Y),rownames(X))
Y <- Y[samples,]
X <- X[samples,]
# selecting covariates
names <- spliceQTL::map.genes(chr=NULL)
X <- X[,colnames(X) %in% names$gene_id]
# selecting variables
cond <- apply(Y,2,function(x) sd(x,na.rm=TRUE)!=0)
Y <- Y[,cond]
names <- spliceQTL::map.genes(chr=NULL)$gene_id
X <- X[,colnames(X) %in% names]
save("X","Y",file="data.RData")
load("data.RData")
loss <- NULL
for(j in seq_len(ncol(Y))){
# if(j < 2){next}
cat("|
j =",j,"
|
\n")
y <- Y[,j]
if(sd(y,na.rm=TRUE)==0){next}
if(TRUE){
#
p
sel <- sample(seq_len(
56515
),size=
2
000)
n
sel <- sample(seq_len(
2003
),size=
5
00)
y
<-
y
[nsel]
x
<- X[nsel,
] #
psel
file <- paste0("loss",j,".RData")
if(file.exists(file)){next}else{cat("metabolite
j =",j,"\n")
}
set.seed(j)
if(TRUE){
# set to FALSE
n
sel <- sample(seq_len(
nrow(X)
),size=
1
000)
p
sel <- sample(seq_len(
ncol(X)
),size=
20
00)
Y
<-
Y
[nsel
,
]
X
<- X[nsel,psel
]
}
cond <- !is.na(y)
y <- scale(y[cond])
x <- x[cond,]
loss <- rbind(loss,colasso::colasso_compare(y=y,X=x,nfolds=5))
cond_n <- !is.na(Y[,j])
cond_p <- apply(X,2,function(x) sd(x)!=0)
ys <- as.numeric(scale(Y[cond_n,j]))
Xs <- X[cond_n,cond_p]
loss <- rbind(loss,colasso::colasso_compare(y=ys,X=Xs,nfolds=5)) # increase to 10
save(loss,file=file)
}
############################
### parallel computation ### (trial)
############################
lib <- "/virdir/Scratch/arauschenberger/library"
.libPaths(lib)
devtools::install_github("rauschenberger/colasso",lib=lib)
setwd("/mnt/virdir/Scratch/arauschenberger/colasso")
check <- function(Y,X,j){
lib <- "/virdir/Scratch/arauschenberger/library"
.libPaths(lib)
file <- paste0("loss",j,".RData")
if(file.exists(file)){return(NULL)}else{cat("metabolite j =",j,"\n")}
set.seed(j)
if(FALSE){ # set to FALSE !
nsel <- sample(seq_len(nrow(X)),size=500)
psel <- sample(seq_len(ncol(X)),size=1000)
Y <- Y[nsel,]
X <- X[nsel,psel]
}
cond <- !is.na(Y[,j])
ys <- as.numeric(scale(Y[cond,j]))
Xs <- X[cond,]
loss <- colasso::colasso_compare(y=ys,X=Xs,nfolds=10) # set to 10 !
save(loss,file=file)
}
load("data.RData")
cluster <- parallel::makeCluster(20)
parallel::clusterExport(cluster,c("check","X","Y"),envir=environment())
parallel::parSapply(cluster,1:ncol(Y),function(j) tryCatch(check(Y=Y,X=X,j=j),error=function(x) NULL))
parallel::stopCluster(cluster)
rm(cluster)
frame <- NULL
for(i in seq_len(233)){
file = paste0("loss",i,".RData")
if(!file.exists(file)){next}
#file.copy(from=file,to=file.path("old",file))
file.remove(file)
#load(file)
#frame <- rbind(frame,loss)
}
frame <- as.data.frame(frame)
rownames(frame) = 1:nrow(frame)
frame <- frame[frame$standard < frame$intercept,]
mean(frame$standard < frame$select)
mean(frame$standard == frame$select)
mean(frame$standard > frame$select)
# DANGER: delete all files
#for(i in seq_len(233)){
# ### file.remove(paste0("loss",i,".RData"))
#}
```
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment