code.Rmd 5.33 KB
Newer Older
Rauschenberger's avatar
Rauschenberger committed
1
---
Rauschenberger's avatar
Rauschenberger committed
2
title: rainbow (code)
Rauschenberger's avatar
Rauschenberger committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{vignette}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

This vignette requires both a local machine and the BBMRI virtual machine. On the virtual machine, execute this chunk to set the library path, update the R package spliceQTL, and set the working directory.

```{r virtual machine,eval=FALSE}
lib <- "/virdir/Scratch/arauschenberger/library"
.libPaths(lib)
devtools::install_github("rauschenberger/spliceQTL",lib=lib)
library("spliceQTL",lib.loc=lib)
path <- "/virdir/Scratch/arauschenberger/spliceQTL"
setwd(path)
```

On a local machine with PLINK, execute this chunk to obtain the Geuvadis SNP data. Then move the files from the local to the virtual machine.

```{r obtain (local),eval=FALSE}
for(chr in 1:22){ 
    spliceQTL::get.snps.geuvadis(chr=chr,data="N:/semisup/data/eQTLs",
                                 path="N:/spliceQTL/data/Geuvadis")
}
```

On the virtual machine, execute this chunk to obtain the BBMRI SNP data, the Geuvadis exon data, and the BBMRI exon data. Choose one out of the six biobanks (CODAM, LL, LLS, NTR, PAN, RS).

```{r obtain (remote),eval=FALSE}
for(chr in 1:22){
    spliceQTL::get.snps.bbmri(chr=chr,biobank="LLS",path=path,size=500*10^3)
}
spliceQTL::get.exons.geuvadis(path=path)
spliceQTL::get.exons.bbmri(path=path)
```

Rauschenberger's avatar
Rauschenberger committed
40
On the virtual machine, execute this chunk to prepare the data. See the documentation of the R package spliceQTL for further information. (It seems that lme4::lmer in spliceQTL::adjust.variables fails to release memory. Restart R after each chromosome.)
Rauschenberger's avatar
Rauschenberger committed
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

```{r prepare,eval=FALSE}
for(chr in 1:22){
  for(data in c("Geuvadis","LLS")){
      
    rm(list=setdiff(ls(),c("data","chr","path"))); gc()
    set.seed(1)
    
    cat("Analysing",data,chr,":",as.character(Sys.time()),"\n")
    if(data=="Geuvadis"){
      load(file.path(path,"Geuvadis.exons.RData"),verbose=TRUE)
    } else {
      load(file.path(path,"BBMRI.exons.RData"),verbose=TRUE)
      cond <- sapply(strsplit(x=rownames(exons),split=":"),function(x) x[[1]]==data)
      exons <- exons[cond,]
    }
    load(file.path(path,paste0(data,".chr",chr,".RData")),verbose=TRUE)
    
    cat("Matching samples:","\n")
    list <- spliceQTL::match.samples(exons,snps)
    exons <- list$exons; snps <- list$snps; rm(list)
    
    cat("Adjusting samples:","\n")
    exons <- spliceQTL::adjust.samples(x=exons) # slow!
    exons <- asinh(x=exons)
    
    cat("Adjusting covariates:","\n")
    names <- strsplit(x=colnames(exons),split="_") # exon names
    length <- sapply(names,function(x) as.integer(x[[3]])-as.integer(x[[2]])) # exon length
Rauschenberger's avatar
Rauschenberger committed
70
    exons <- spliceQTL::adjust.variables(x=exons,group=gene_id,offset=length) # slow!
Rauschenberger's avatar
Rauschenberger committed
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
    
    # subset chromosome
    cond <- sapply(strsplit(x=colnames(exons),split="_"),function(x) x[[1]]==chr)
    exons <- exons[,cond]
    gene_id <- gene_id[cond]
    
    cat("Mapping exons:","\n")
    map <- list()
    map$genes <- spliceQTL::map.genes(chr=chr,path=path)
    map$exons <- spliceQTL::map.exons(gene=as.character(map$genes$gene_id),exon=gene_id)
    
    cat("Mapping SNPs:","\n")
    names <- strsplit(x=colnames(snps),split=":")
    snp.chr <- sapply(names,function(x) as.integer(x[[1]]))
    snp.pos <- sapply(names,function(x) as.integer(x[[2]]))
    map$snps <- spliceQTL::map.snps(gene.chr=map$genes$chr,
                                    gene.start=map$genes$start,
                                    gene.end=map$genes$end,
                                    snp.chr=snp.chr,snp.pos=snp.pos)
    
    cat("Dropping genes:","\n")
    map <- spliceQTL::drop.trivial(map=map)
    
    cat("Testing:",as.character(Sys.time())," -> ")
    rm(list=setdiff(ls(),c("exons","snps","map","data","chr","path"))); gc()
    
    save(list=c("exons","snps","map"),file=file.path(path,paste0("temp.",data,".chr",chr,".RData")))
  }
#q()
#n
#exit
}
```

On the virtual machine, execute this chunk to test for alternative splicing.

```{r test,eval=FALSE}
for(chr in c(1:22)){
    for(data in c("Geuvadis","LLS")){
        cat("Analysing",data,chr,":",as.character(Sys.time()),"\n")
        
        rm(list=setdiff(ls(),c("data","chr","path"))); gc(); cat(".")
        load(file.path(path,paste0("temp.",data,".chr",chr,".RData"))); cat(".")
        pvalue <- spliceQTL::test.multiple(Y=exons,X=snps,map=map,rho=c(0,1),spec=16); cat(".")
        save(object=pvalue,file=file.path(path,paste0("pval.",data,".chr",chr,".RData"))); cat("\n")
    }
}
```

On the virtual machine, execute this chunk to compare the results between the Geuvadis and the BBMRI project.

```{r,eval=FALSE}
cor <- chisq <- rep(NA,length=22)
for(chr in 22:1){
    sel <- "rho=1"
    load(file.path(path,paste0("pval.Geuvadis.chr",chr,".RData")))
    a <- pvalue; pvalue <- NA
    load(file.path(path,paste0("pval.LLS.chr",chr,".RData")))
    b <- pvalue; pvalue <- NA

    names <- intersect(rownames(a),rownames(b))
    plot(jitter(-log(a[names,sel])),jitter(-log(b[names,sel])))
    cor[chr] <- stats::cor(a[names,sel],b[names,sel],method="spearman")

    a <- stats::p.adjust(a[names,sel])<0.05
    b <- stats::p.adjust(b[names,sel])<0.05
    print(paste0("chr",chr))
    print(table(a,b))
    chisq[chr] <- stats::chisq.test(table(a,b))$p.value
}
```

<!--
#wait <- TRUE
#while(wait){
#    wait <- !file.exists(...)
#    if(wait){Sys.sleep(60);cat(".")}
#}
memory and CPU usage Linux: htop
-->