Commit 28293169 authored by Rauschenberger's avatar Rauschenberger
Browse files

automation

parent 10761758
......@@ -341,16 +341,19 @@ colasso_simulate <- function(n=100,p=500,cor="constant",plot=TRUE){
#' @param plot
#' logical
#'
#' @param nfolds.int
#' internal folds
#'
#' @examples
#' NA
#'
colasso_compare <- function(y,X,plot=TRUE){
colasso_compare <- function(y,X,plot=TRUE,nfolds.int=10){
fold <- sample(x=rep(x=seq_len(5),length.out=length(y)))
pred <- matrix(data=NA,nrow=length(y),ncol=8)
for(i in sort(unique(fold))){
cat("i =",i,"\n")
fit <- colasso(y=y[fold!=i],X=X[fold!=i,],alpha=1)
fit <- colasso(y=y[fold!=i],X=X[fold!=i,],alpha=1,nfolds=nfolds.int)
for(j in seq_along(fit)){
pred[fold==i,j] <- glmnet::predict.glmnet(object=fit[[j]],
newx=X[fold==i,],
......
......@@ -158,49 +158,38 @@ https://en.wikipedia.org/wiki/List_of_datasets_for_machine_learning_research#Mic
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb2-1" data-line-number="1">lib &lt;-<span class="st"> "/virdir/Scratch/arauschenberger/library"</span></a>
<a class="sourceLine" id="cb2-2" data-line-number="2"><span class="kw">.libPaths</span>(lib)</a>
<a class="sourceLine" id="cb2-3" data-line-number="3">devtools<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/devtools/topics/install_github">install_github</a></span>(<span class="st">"rauschenberger/colasso"</span>,<span class="dt">lib=</span>lib)</a>
<a class="sourceLine" id="cb2-4" data-line-number="4"></a>
<a class="sourceLine" id="cb2-5" data-line-number="5">utils<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/utils/topics/data">data</a></span>(metabolomics_RP3RP4_overlap,<span class="dt">package=</span><span class="st">"BBMRIomics"</span>)</a>
<a class="sourceLine" id="cb2-6" data-line-number="6">utils<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/utils/topics/data">data</a></span>(rnaSeqData_ReadCounts_BIOS_cleaned,<span class="dt">package=</span><span class="st">"BBMRIomics"</span>)</a>
<a class="sourceLine" id="cb2-7" data-line-number="7"></a>
<a class="sourceLine" id="cb2-8" data-line-number="8">samples &lt;-<span class="st"> </span><span class="kw">intersect</span>(<span class="kw">colnames</span>(counts),<span class="kw">colnames</span>(metabolomicData))</a>
<a class="sourceLine" id="cb2-9" data-line-number="9"></a>
<a class="sourceLine" id="cb2-10" data-line-number="10">Y &lt;-<span class="st"> </span><span class="kw">t</span>(SummarizedExperiment<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/SummarizedExperiment/topics/SummarizedExperiment-class">assays</a></span>(metabolomicData[,samples])<span class="op">$</span>measurements)</a>
<a class="sourceLine" id="cb2-11" data-line-number="11">X &lt;-<span class="st"> </span><span class="kw">t</span>(SummarizedExperiment<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/SummarizedExperiment/topics/SummarizedExperiment-class">assay</a></span>(counts[,samples]))</a>
<a class="sourceLine" id="cb2-12" data-line-number="12"></a>
<a class="sourceLine" id="cb2-13" data-line-number="13">loss &lt;-<span class="st"> </span><span class="ot">NULL</span></a>
<a class="sourceLine" id="cb2-14" data-line-number="14"><span class="cf">for</span>(j <span class="cf">in</span> <span class="kw">seq_len</span>(<span class="kw">ncol</span>(Y))){</a>
<a class="sourceLine" id="cb2-15" data-line-number="15"> <span class="kw">cat</span>(<span class="st">"j ="</span>,j,<span class="st">"</span><span class="ch">\n</span><span class="st">"</span>)</a>
<a class="sourceLine" id="cb2-16" data-line-number="16"> y &lt;-<span class="st"> </span>Y[,j]</a>
<a class="sourceLine" id="cb2-17" data-line-number="17"> <span class="cf">if</span>(<span class="kw">sd</span>(y,<span class="dt">na.rm=</span><span class="ot">TRUE</span>)<span class="op">==</span><span class="dv">0</span>){<span class="cf">next</span>}</a>
<a class="sourceLine" id="cb2-18" data-line-number="18"> <span class="cf">if</span>(<span class="ot">TRUE</span>){</a>
<a class="sourceLine" id="cb2-19" data-line-number="19"> psel &lt;-<span class="st"> </span><span class="kw">sample</span>(<span class="kw">seq_len</span>(<span class="dv">56515</span>),<span class="dt">size=</span><span class="dv">2000</span>)</a>
<a class="sourceLine" id="cb2-20" data-line-number="20"> nsel &lt;-<span class="st"> </span><span class="kw">sample</span>(<span class="kw">seq_len</span>(<span class="dv">2003</span>),<span class="dt">size=</span><span class="dv">500</span>)</a>
<a class="sourceLine" id="cb2-21" data-line-number="21"> y &lt;-<span class="st"> </span>y[nsel]</a>
<a class="sourceLine" id="cb2-22" data-line-number="22"> x &lt;-<span class="st"> </span>X[nsel,psel]</a>
<a class="sourceLine" id="cb2-23" data-line-number="23"> }</a>
<a class="sourceLine" id="cb2-24" data-line-number="24"> cond &lt;-<span class="st"> </span><span class="op">!</span><span class="kw">is.na</span>(y)</a>
<a class="sourceLine" id="cb2-25" data-line-number="25"> y &lt;-<span class="st"> </span><span class="kw">scale</span>(y[cond])</a>
<a class="sourceLine" id="cb2-26" data-line-number="26"> x &lt;-<span class="st"> </span>x[cond,]</a>
<a class="sourceLine" id="cb2-27" data-line-number="27"> loss &lt;-<span class="st"> </span><span class="kw">rbind</span>(loss,colasso<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/colasso/topics/colasso_compare">colasso_compare</a></span>(<span class="dt">y=</span>y,<span class="dt">X=</span>x))</a>
<a class="sourceLine" id="cb2-28" data-line-number="28">}</a>
<a class="sourceLine" id="cb2-29" data-line-number="29"></a>
<a class="sourceLine" id="cb2-30" data-line-number="30">min &lt;-<span class="st"> </span><span class="kw">apply</span>(traits,<span class="dv">2</span>,<span class="cf">function</span>(x) <span class="kw">min</span>(x,<span class="dt">na.rm=</span><span class="ot">TRUE</span>))</a>
<a class="sourceLine" id="cb2-31" data-line-number="31">max &lt;-<span class="st"> </span><span class="kw">apply</span>(traits,<span class="dv">2</span>,<span class="cf">function</span>(x) <span class="kw">max</span>(x,<span class="dt">na.rm=</span><span class="ot">TRUE</span>))</a>
<a class="sourceLine" id="cb2-32" data-line-number="32">var &lt;-<span class="st"> </span><span class="kw">apply</span>(traits,<span class="dv">2</span>,<span class="cf">function</span>(x) <span class="kw">var</span>(x,<span class="dt">na.rm=</span><span class="ot">TRUE</span>))</a>
<a class="sourceLine" id="cb2-33" data-line-number="33"></a>
<a class="sourceLine" id="cb2-34" data-line-number="34"></a>
<a class="sourceLine" id="cb2-35" data-line-number="35"></a>
<a class="sourceLine" id="cb2-36" data-line-number="36">psel &lt;-<span class="st"> </span><span class="kw">sample</span>(<span class="kw">seq_len</span>(<span class="dv">56515</span>),<span class="dt">size=</span><span class="dv">2000</span>)</a>
<a class="sourceLine" id="cb2-37" data-line-number="37">nsel &lt;-<span class="st"> </span><span class="kw">sample</span>(<span class="kw">seq_len</span>(<span class="dv">2003</span>),<span class="dt">size=</span><span class="dv">500</span>)</a>
<a class="sourceLine" id="cb2-38" data-line-number="38"></a>
<a class="sourceLine" id="cb2-39" data-line-number="39">y &lt;-<span class="st"> </span>YY[nsel,<span class="st">"totfa"</span>]</a>
<a class="sourceLine" id="cb2-40" data-line-number="40">X &lt;-<span class="st"> </span>XX[nsel,psel]</a>
<a class="sourceLine" id="cb2-41" data-line-number="41"></a>
<a class="sourceLine" id="cb2-42" data-line-number="42">net &lt;-<span class="st"> </span>glmnet<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/glmnet/topics/cv.glmnet">cv.glmnet</a></span>(<span class="dt">x=</span>X,<span class="dt">y=</span>y)</a>
<a class="sourceLine" id="cb2-43" data-line-number="43"></a>
<a class="sourceLine" id="cb2-44" data-line-number="44"><span class="kw">plot</span>(<span class="dt">x=</span>net<span class="op">$</span>lambda,<span class="dt">y=</span>net<span class="op">$</span>cvm)</a>
<a class="sourceLine" id="cb2-45" data-line-number="45"></a>
<a class="sourceLine" id="cb2-46" data-line-number="46"><span class="co"># then apply colasso function !</span></a></code></pre></div>
<a class="sourceLine" id="cb2-4" data-line-number="4"><span class="kw">setwd</span>(<span class="st">"/mnt/virdir/Scratch/arauschenberger/colasso"</span>)</a>
<a class="sourceLine" id="cb2-5" data-line-number="5"></a>
<a class="sourceLine" id="cb2-6" data-line-number="6">utils<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/utils/topics/data">data</a></span>(metabolomics_RP3RP4_overlap,<span class="dt">package=</span><span class="st">"BBMRIomics"</span>)</a>
<a class="sourceLine" id="cb2-7" data-line-number="7">utils<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/utils/topics/data">data</a></span>(rnaSeqData_ReadCounts_BIOS_cleaned,<span class="dt">package=</span><span class="st">"BBMRIomics"</span>)</a>
<a class="sourceLine" id="cb2-8" data-line-number="8"></a>
<a class="sourceLine" id="cb2-9" data-line-number="9"><span class="co"># selecting samples</span></a>
<a class="sourceLine" id="cb2-10" data-line-number="10"><span class="co">#colData(counts)[,"biobank_id"]</span></a>
<a class="sourceLine" id="cb2-11" data-line-number="11">samples &lt;-<span class="st"> </span><span class="kw">intersect</span>(<span class="kw">colnames</span>(counts),<span class="kw">colnames</span>(metabolomicData))</a>
<a class="sourceLine" id="cb2-12" data-line-number="12">Y &lt;-<span class="st"> </span><span class="kw">t</span>(SummarizedExperiment<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/SummarizedExperiment/topics/SummarizedExperiment-class">assays</a></span>(metabolomicData[,samples])<span class="op">$</span>measurements)</a>
<a class="sourceLine" id="cb2-13" data-line-number="13">X &lt;-<span class="st"> </span><span class="kw">t</span>(SummarizedExperiment<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/SummarizedExperiment/topics/SummarizedExperiment-class">assay</a></span>(counts[,samples]))</a>
<a class="sourceLine" id="cb2-14" data-line-number="14"></a>
<a class="sourceLine" id="cb2-15" data-line-number="15"><span class="co"># selecting covariates</span></a>
<a class="sourceLine" id="cb2-16" data-line-number="16">names &lt;-<span class="st"> </span>spliceQTL<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/spliceQTL/topics/map.genes">map.genes</a></span>(<span class="dt">chr=</span><span class="ot">NULL</span>)</a>
<a class="sourceLine" id="cb2-17" data-line-number="17">X &lt;-<span class="st"> </span>X[,<span class="kw">colnames</span>(X) <span class="op">%in%</span><span class="st"> </span>names<span class="op">$</span>gene_id]</a>
<a class="sourceLine" id="cb2-18" data-line-number="18"></a>
<a class="sourceLine" id="cb2-19" data-line-number="19">loss &lt;-<span class="st"> </span><span class="ot">NULL</span></a>
<a class="sourceLine" id="cb2-20" data-line-number="20"><span class="cf">for</span>(j <span class="cf">in</span> <span class="kw">seq_len</span>(<span class="kw">ncol</span>(Y))){</a>
<a class="sourceLine" id="cb2-21" data-line-number="21"> <span class="co"># if(j &lt; 2){next}</span></a>
<a class="sourceLine" id="cb2-22" data-line-number="22"> <span class="kw">cat</span>(<span class="st">"| j ="</span>,j,<span class="st">"|</span><span class="ch">\n</span><span class="st">"</span>)</a>
<a class="sourceLine" id="cb2-23" data-line-number="23"> y &lt;-<span class="st"> </span>Y[,j]</a>
<a class="sourceLine" id="cb2-24" data-line-number="24"> <span class="cf">if</span>(<span class="kw">sd</span>(y,<span class="dt">na.rm=</span><span class="ot">TRUE</span>)<span class="op">==</span><span class="dv">0</span>){<span class="cf">next</span>}</a>
<a class="sourceLine" id="cb2-25" data-line-number="25"> <span class="co">#if(TRUE){</span></a>
<a class="sourceLine" id="cb2-26" data-line-number="26"> <span class="co"># psel &lt;- sample(seq_len(56515),size=2000)</span></a>
<a class="sourceLine" id="cb2-27" data-line-number="27"> <span class="co"># nsel &lt;- sample(seq_len(2003),size=500)</span></a>
<a class="sourceLine" id="cb2-28" data-line-number="28"> <span class="co"># y &lt;- y[nsel]</span></a>
<a class="sourceLine" id="cb2-29" data-line-number="29"> <span class="co"># x &lt;- X[nsel,psel]</span></a>
<a class="sourceLine" id="cb2-30" data-line-number="30"> <span class="co">#}</span></a>
<a class="sourceLine" id="cb2-31" data-line-number="31"> cond &lt;-<span class="st"> </span><span class="op">!</span><span class="kw">is.na</span>(y)</a>
<a class="sourceLine" id="cb2-32" data-line-number="32"> y &lt;-<span class="st"> </span><span class="kw">scale</span>(y[cond])</a>
<a class="sourceLine" id="cb2-33" data-line-number="33"> x &lt;-<span class="st"> </span>X[cond,]</a>
<a class="sourceLine" id="cb2-34" data-line-number="34"> loss &lt;-<span class="st"> </span><span class="kw">rbind</span>(loss,colasso<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/colasso/topics/colasso_compare">colasso_compare</a></span>(<span class="dt">y=</span>y,<span class="dt">X=</span>x))</a>
<a class="sourceLine" id="cb2-35" data-line-number="35">}</a></code></pre></div>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb3-1" data-line-number="1">n &lt;-<span class="st"> </span><span class="dv">100</span>; p &lt;-<span class="st"> </span><span class="dv">10</span></a>
<a class="sourceLine" id="cb3-2" data-line-number="2">x &lt;-<span class="st"> </span><span class="kw">matrix</span>(<span class="kw">rnorm</span>(n<span class="op">*</span>p),<span class="dt">nrow=</span>n,<span class="dt">ncol=</span>p)</a>
<a class="sourceLine" id="cb3-3" data-line-number="3">y &lt;-<span class="st"> </span><span class="kw">rbinom</span>(<span class="dt">n=</span>n,<span class="dt">size=</span><span class="dv">1</span>,<span class="dt">prob=</span><span class="fl">0.2</span>)</a>
......
......@@ -120,7 +120,7 @@
</div>
<pre class="usage"><span class='fu'>colasso_compare</span>(<span class='no'>y</span>, <span class='no'>X</span>, <span class='kw'>plot</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>)</pre>
<pre class="usage"><span class='fu'>colasso_compare</span>(<span class='no'>y</span>, <span class='no'>X</span>, <span class='kw'>plot</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>, <span class='kw'>nfolds.int</span> <span class='kw'>=</span> <span class='fl'>10</span>)</pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments">
......@@ -137,6 +137,10 @@
<th>plot</th>
<td><p>logical</p></td>
</tr>
<tr>
<th>nfolds.int</th>
<td><p>internal folds</p></td>
</tr>
</table>
......
......@@ -4,7 +4,7 @@
\alias{colasso_compare}
\title{External cross-validation}
\usage{
colasso_compare(y, X, plot = TRUE)
colasso_compare(y, X, plot = TRUE, nfolds.int = 10)
}
\arguments{
\item{y}{response}
......@@ -12,6 +12,8 @@ colasso_compare(y, X, plot = TRUE)
\item{X}{covariates}
\item{plot}{logical}
\item{nfolds.int}{internal folds}
}
\description{
This function ...
......
......@@ -83,49 +83,39 @@ Repeat this for all normally distributed responses, omit samples with missing re
lib <- "/virdir/Scratch/arauschenberger/library"
.libPaths(lib)
devtools::install_github("rauschenberger/colasso",lib=lib)
setwd("/mnt/virdir/Scratch/arauschenberger/colasso")
utils::data(metabolomics_RP3RP4_overlap,package="BBMRIomics")
utils::data(rnaSeqData_ReadCounts_BIOS_cleaned,package="BBMRIomics")
# selecting samples
#colData(counts)[,"biobank_id"]
samples <- intersect(colnames(counts),colnames(metabolomicData))
Y <- t(SummarizedExperiment::assays(metabolomicData[,samples])$measurements)
X <- t(SummarizedExperiment::assay(counts[,samples]))
# selecting covariates
names <- spliceQTL::map.genes(chr=NULL)
X <- X[,colnames(X) %in% names$gene_id]
loss <- NULL
for(j in seq_len(ncol(Y))){
cat("j =",j,"\n")
# if(j < 2){next}
cat("| j =",j,"|\n")
y <- Y[,j]
if(sd(y,na.rm=TRUE)==0){next}
if(TRUE){
psel <- sample(seq_len(56515),size=2000)
nsel <- sample(seq_len(2003),size=500)
y <- y[nsel]
x <- X[nsel,psel]
}
#if(TRUE){
# psel <- sample(seq_len(56515),size=2000)
# nsel <- sample(seq_len(2003),size=500)
# y <- y[nsel]
# x <- X[nsel,psel]
#}
cond <- !is.na(y)
y <- scale(y[cond])
x <- x[cond,]
x <- X[cond,]
loss <- rbind(loss,colasso::colasso_compare(y=y,X=x))
}
min <- apply(traits,2,function(x) min(x,na.rm=TRUE))
max <- apply(traits,2,function(x) max(x,na.rm=TRUE))
var <- apply(traits,2,function(x) var(x,na.rm=TRUE))
psel <- sample(seq_len(56515),size=2000)
nsel <- sample(seq_len(2003),size=500)
y <- YY[nsel,"totfa"]
X <- XX[nsel,psel]
net <- glmnet::cv.glmnet(x=X,y=y)
plot(x=net$lambda,y=net$cvm)
# then apply colasso function !
```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment