Commit 9ac5bb34 authored by Rauschenberger's avatar Rauschenberger
Browse files

automation

parent e4a11da7
......@@ -371,7 +371,7 @@ colasso_compare <- function(y,X,plot=TRUE,nfolds.int=10){
col[1] <- col[length(col)] <- 1
graphics::plot(y=loss[-length(loss)],
x=seq_len(length(loss)-1),
col=col+1,pch=col)
col=col+1,ylim=range(loss),pch=col)
graphics::abline(v=c(1.5,length(loss)-1.5),lty=2)
graphics::grid()
graphics::abline(h=loss[length(loss)],lty=2,col="red")
......
......@@ -94,10 +94,10 @@ https://archive.ics.uci.edu/ml/datasets.html?format=&task=&att=&area=&numAtt=&nu
https://en.wikipedia.org/wiki/List_of_datasets_for_machine_learning_research#Microbe
-->
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb1-1" data-line-number="1"><span class="cf">for</span>(rep <span class="cf">in</span> <span class="dv">1</span><span class="op">:</span><span class="dv">4</span>){</a>
<a class="sourceLine" id="cb1-2" data-line-number="2"> <span class="kw">set.seed</span>(rep)</a>
<a class="sourceLine" id="cb1-3" data-line-number="3"> </a>
<a class="sourceLine" id="cb1-4" data-line-number="4"></a>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb1-1" data-line-number="1">loss =<span class="st"> </span><span class="ot">NULL</span></a>
<a class="sourceLine" id="cb1-2" data-line-number="2"><span class="cf">for</span>(rep <span class="cf">in</span> <span class="dv">1</span><span class="op">:</span><span class="dv">4</span>){</a>
<a class="sourceLine" id="cb1-3" data-line-number="3"> <span class="kw">set.seed</span>(rep)</a>
<a class="sourceLine" id="cb1-4" data-line-number="4"> </a>
<a class="sourceLine" id="cb1-5" data-line-number="5"> <span class="co">#----- OBTAIN DATA -----</span></a>
<a class="sourceLine" id="cb1-6" data-line-number="6"> </a>
<a class="sourceLine" id="cb1-7" data-line-number="7"> ### simulated data <span class="al">###</span></a>
......@@ -107,54 +107,27 @@ https://en.wikipedia.org/wiki/List_of_datasets_for_machine_learning_research#Mic
<a class="sourceLine" id="cb1-11" data-line-number="11"> <span class="co">#y &lt;- list$y; X &lt;- list$X</span></a>
<a class="sourceLine" id="cb1-12" data-line-number="12"> </a>
<a class="sourceLine" id="cb1-13" data-line-number="13"> ### mice data <span class="al">###</span></a>
<a class="sourceLine" id="cb1-14" data-line-number="14"> <span class="co">#data(mice,package="BGLR")</span></a>
<a class="sourceLine" id="cb1-15" data-line-number="15"> <span class="co">#nsel &lt;- sort(sample(seq_len(1814),size=200,replace=FALSE))</span></a>
<a class="sourceLine" id="cb1-16" data-line-number="16"> <span class="co">#psel &lt;- sort(sample(seq_len(10346),size=10346,replace=FALSE))</span></a>
<a class="sourceLine" id="cb1-17" data-line-number="17"> <span class="co">#y &lt;- mice.pheno$Obesity.BMI[nsel] # try different phenotypes</span></a>
<a class="sourceLine" id="cb1-18" data-line-number="18"> <span class="co">#X &lt;- mice.X[nsel,psel]</span></a>
<a class="sourceLine" id="cb1-14" data-line-number="14"> <span class="kw">data</span>(mice,<span class="dt">package=</span><span class="st">"BGLR"</span>)</a>
<a class="sourceLine" id="cb1-15" data-line-number="15"> nsel &lt;-<span class="st"> </span><span class="kw">sort</span>(<span class="kw">sample</span>(<span class="kw">seq_len</span>(<span class="dv">1814</span>),<span class="dt">size=</span><span class="dv">1814</span>,<span class="dt">replace=</span><span class="ot">FALSE</span>))</a>
<a class="sourceLine" id="cb1-16" data-line-number="16"> psel &lt;-<span class="st"> </span><span class="kw">sort</span>(<span class="kw">sample</span>(<span class="kw">seq_len</span>(<span class="dv">10346</span>),<span class="dt">size=</span><span class="dv">1000</span>,<span class="dt">replace=</span><span class="ot">FALSE</span>))</a>
<a class="sourceLine" id="cb1-17" data-line-number="17"> y &lt;-<span class="st"> </span>mice.pheno<span class="op">$</span>Obesity.BMI[nsel] <span class="co"># try different phenotypes</span></a>
<a class="sourceLine" id="cb1-18" data-line-number="18"> X &lt;-<span class="st"> </span>mice.X[nsel,psel]</a>
<a class="sourceLine" id="cb1-19" data-line-number="19"> </a>
<a class="sourceLine" id="cb1-20" data-line-number="20"> ### wheat data <span class="al">###</span></a>
<a class="sourceLine" id="cb1-21" data-line-number="21"> <span class="kw">data</span>(wheat,<span class="dt">package=</span><span class="st">"BGLR"</span>)</a>
<a class="sourceLine" id="cb1-22" data-line-number="22"> nsel &lt;-<span class="st"> </span><span class="kw">seq_len</span>(<span class="dv">599</span>) <span class="co"># sort(sample(seq_len(599),size=200,replace=FALSE))</span></a>
<a class="sourceLine" id="cb1-23" data-line-number="23"> psel &lt;-<span class="st"> </span><span class="kw">seq_len</span>(<span class="dv">1279</span>) <span class="co"># sort(sample(seq_len(1279),size=200,replace=FALSE))</span></a>
<a class="sourceLine" id="cb1-24" data-line-number="24"> y &lt;-<span class="st"> </span><span class="kw">as.numeric</span>(wheat.Y[nsel,rep]) <span class="co"># try different phenotypes</span></a>
<a class="sourceLine" id="cb1-25" data-line-number="25"> X &lt;-<span class="st"> </span>wheat.X[nsel,psel]</a>
<a class="sourceLine" id="cb1-21" data-line-number="21"> <span class="co">#data(wheat,package="BGLR")</span></a>
<a class="sourceLine" id="cb1-22" data-line-number="22"> <span class="co">#nsel &lt;- seq_len(599)</span></a>
<a class="sourceLine" id="cb1-23" data-line-number="23"> <span class="co">#psel &lt;- seq_len(1279)</span></a>
<a class="sourceLine" id="cb1-24" data-line-number="24"> <span class="co">#y &lt;- as.numeric(wheat.Y[nsel,rep]) # try different phenotypes</span></a>
<a class="sourceLine" id="cb1-25" data-line-number="25"> <span class="co">#X &lt;- wheat.X[nsel,psel]</span></a>
<a class="sourceLine" id="cb1-26" data-line-number="26"> </a>
<a class="sourceLine" id="cb1-27" data-line-number="27"> <span class="co">#----- CROSS-VALIDATE -----</span></a>
<a class="sourceLine" id="cb1-28" data-line-number="28"> </a>
<a class="sourceLine" id="cb1-29" data-line-number="29"> loss &lt;-<span class="st"> </span><span class="kw"><a href="../reference/colasso_compare.html">colasso_compare</a></span>(<span class="dt">y=</span>y,<span class="dt">X=</span>X,<span class="dt">nfolds.int=</span><span class="dv">5</span>)</a>
<a class="sourceLine" id="cb1-29" data-line-number="29"> loss &lt;-<span class="st"> </span><span class="kw">rbind</span>(loss,<span class="kw"><a href="../reference/colasso_compare.html">colasso_compare</a></span>(<span class="dt">y=</span>y,<span class="dt">X=</span>X,<span class="dt">nfolds.int=</span><span class="dv">5</span>))</a>
<a class="sourceLine" id="cb1-30" data-line-number="30"> </a>
<a class="sourceLine" id="cb1-31" data-line-number="31"> <span class="co"># fold &lt;- sample(x=rep(x=seq_len(5),length.out=length(y)))</span></a>
<a class="sourceLine" id="cb1-32" data-line-number="32"> <span class="co"># pred &lt;- matrix(data=NA,nrow=length(y),ncol=8)</span></a>
<a class="sourceLine" id="cb1-33" data-line-number="33"> <span class="co"># for(i in sort(unique(fold))){</span></a>
<a class="sourceLine" id="cb1-34" data-line-number="34"> <span class="co"># cat("i =",i,"\n")</span></a>
<a class="sourceLine" id="cb1-35" data-line-number="35"> <span class="co"># fit &lt;- colasso(y=y[fold!=i],X=X[fold!=i,],alpha=1) # increase nfold? us</span></a>
<a class="sourceLine" id="cb1-36" data-line-number="36"> <span class="co"># # MEM[[i]] &lt;- fit # trial</span></a>
<a class="sourceLine" id="cb1-37" data-line-number="37"> <span class="co"># </span></a>
<a class="sourceLine" id="cb1-38" data-line-number="38"> <span class="co"># for(j in seq_along(fit)){</span></a>
<a class="sourceLine" id="cb1-39" data-line-number="39"> <span class="co"># pred[fold==i,j] &lt;- glmnet::predict.glmnet(object=fit[[j]],</span></a>
<a class="sourceLine" id="cb1-40" data-line-number="40"> <span class="co"># newx=X[fold==i,],</span></a>
<a class="sourceLine" id="cb1-41" data-line-number="41"> <span class="co"># s=fit[[j]]$lambda.min,</span></a>
<a class="sourceLine" id="cb1-42" data-line-number="42"> <span class="co"># type="response")</span></a>
<a class="sourceLine" id="cb1-43" data-line-number="43"> <span class="co"># }</span></a>
<a class="sourceLine" id="cb1-44" data-line-number="44"> <span class="co"># pred[fold==i,8] &lt;- mean(y[fold!=i]) # intercept-only model</span></a>
<a class="sourceLine" id="cb1-45" data-line-number="45"> <span class="co"># }</span></a>
<a class="sourceLine" id="cb1-46" data-line-number="46"> <span class="co"># loss &lt;- apply(X=pred,MARGIN=2,FUN=function(x) sum((y-x)^2))</span></a>
<a class="sourceLine" id="cb1-47" data-line-number="47"> </a>
<a class="sourceLine" id="cb1-48" data-line-number="48"> <span class="co">#graphics::par(mar=c(3,3,1,1))</span></a>
<a class="sourceLine" id="cb1-49" data-line-number="49"> <span class="co">#col &lt;- rep(x=0,times=length(loss)-1)</span></a>
<a class="sourceLine" id="cb1-50" data-line-number="50"> <span class="co">#col[1] &lt;- col[length(col)] &lt;- 1</span></a>
<a class="sourceLine" id="cb1-51" data-line-number="51"> <span class="co">#plot(y=loss[-length(loss)],x=seq_len(length(loss)-1),</span></a>
<a class="sourceLine" id="cb1-52" data-line-number="52"> <span class="co"># col=col+1,pch=col) # ,ylim=range(loss))</span></a>
<a class="sourceLine" id="cb1-53" data-line-number="53"> <span class="co">#abline(v=c(1.5,length(loss)-1.5),lty=2)</span></a>
<a class="sourceLine" id="cb1-54" data-line-number="54"> <span class="co">#graphics::grid()</span></a>
<a class="sourceLine" id="cb1-55" data-line-number="55"> <span class="co">#abline(h=loss[length(loss)],lty=2,col="red")</span></a>
<a class="sourceLine" id="cb1-56" data-line-number="56"> </a>
<a class="sourceLine" id="cb1-57" data-line-number="57">}</a></code></pre></div>
<a class="sourceLine" id="cb1-31" data-line-number="31">}</a></code></pre></div>
<div id="bbmri-data-important" class="section level1">
<h1 class="hasAnchor">
<a href="#bbmri-data-important" class="anchor"></a>BBMRI DATA (important!)</h1>
<p>Repeat this for all normally distributed responses, omit samples with missing response, save results to file.</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb2-1" data-line-number="1">lib &lt;-<span class="st"> "/virdir/Scratch/arauschenberger/library"</span></a>
<a class="sourceLine" id="cb2-2" data-line-number="2"><span class="kw">.libPaths</span>(lib)</a>
<a class="sourceLine" id="cb2-3" data-line-number="3">devtools<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/devtools/topics/install_github">install_github</a></span>(<span class="st">"rauschenberger/colasso"</span>,<span class="dt">lib=</span>lib)</a>
......
......@@ -15,10 +15,10 @@ https://en.wikipedia.org/wiki/List_of_datasets_for_machine_learning_research#Mic
```{r,eval=FALSE}
loss = NULL
for(rep in 1:4){
set.seed(rep)
#----- OBTAIN DATA -----
### simulated data ###
......@@ -28,48 +28,22 @@ for(rep in 1:4){
#y <- list$y; X <- list$X
### mice data ###
#data(mice,package="BGLR")
#nsel <- sort(sample(seq_len(1814),size=200,replace=FALSE))
#psel <- sort(sample(seq_len(10346),size=10346,replace=FALSE))
#y <- mice.pheno$Obesity.BMI[nsel] # try different phenotypes
#X <- mice.X[nsel,psel]
data(mice,package="BGLR")
nsel <- sort(sample(seq_len(1814),size=1814,replace=FALSE))
psel <- sort(sample(seq_len(10346),size=1000,replace=FALSE))
y <- mice.pheno$Obesity.BMI[nsel] # try different phenotypes
X <- mice.X[nsel,psel]
### wheat data ###
data(wheat,package="BGLR")
nsel <- seq_len(599) # sort(sample(seq_len(599),size=200,replace=FALSE))
psel <- seq_len(1279) # sort(sample(seq_len(1279),size=200,replace=FALSE))
y <- as.numeric(wheat.Y[nsel,rep]) # try different phenotypes
X <- wheat.X[nsel,psel]
#data(wheat,package="BGLR")
#nsel <- seq_len(599)
#psel <- seq_len(1279)
#y <- as.numeric(wheat.Y[nsel,rep]) # try different phenotypes
#X <- wheat.X[nsel,psel]
#----- CROSS-VALIDATE -----
loss <- colasso_compare(y=y,X=X,nfolds.int=5)
# fold <- sample(x=rep(x=seq_len(5),length.out=length(y)))
# pred <- matrix(data=NA,nrow=length(y),ncol=8)
# for(i in sort(unique(fold))){
# cat("i =",i,"\n")
# fit <- colasso(y=y[fold!=i],X=X[fold!=i,],alpha=1) # increase nfold? us
# # MEM[[i]] <- fit # trial
#
# for(j in seq_along(fit)){
# pred[fold==i,j] <- glmnet::predict.glmnet(object=fit[[j]],
# newx=X[fold==i,],
# s=fit[[j]]$lambda.min,
# type="response")
# }
# pred[fold==i,8] <- mean(y[fold!=i]) # intercept-only model
# }
# loss <- apply(X=pred,MARGIN=2,FUN=function(x) sum((y-x)^2))
#graphics::par(mar=c(3,3,1,1))
#col <- rep(x=0,times=length(loss)-1)
#col[1] <- col[length(col)] <- 1
#plot(y=loss[-length(loss)],x=seq_len(length(loss)-1),
# col=col+1,pch=col) # ,ylim=range(loss))
#abline(v=c(1.5,length(loss)-1.5),lty=2)
#graphics::grid()
#abline(h=loss[length(loss)],lty=2,col="red")
loss <- rbind(loss,colasso_compare(y=y,X=X,nfolds.int=5))
}
```
......@@ -77,8 +51,6 @@ for(rep in 1:4){
# BBMRI DATA (important!)
Repeat this for all normally distributed responses, omit samples with missing response, save results to file.
```{r,eval=FALSE}
lib <- "/virdir/Scratch/arauschenberger/library"
.libPaths(lib)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment