code.Rmd 5.33 KB
Newer Older
Rauschenberger's avatar
Rauschenberger committed
1
---
Rauschenberger's avatar
Rauschenberger committed
2
title: code
Rauschenberger's avatar
Rauschenberger committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{vignette}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

This vignette requires both a local machine and the BBMRI virtual machine. On the virtual machine, execute this chunk to set the library path, update the R package spliceQTL, and set the working directory.

```{r virtual machine,eval=FALSE}
lib <- "/virdir/Scratch/arauschenberger/library"
.libPaths(lib)
devtools::install_github("rauschenberger/spliceQTL",lib=lib)
library("spliceQTL",lib.loc=lib)
path <- "/virdir/Scratch/arauschenberger/spliceQTL"
setwd(path)
```

On a local machine with PLINK, execute this chunk to obtain the Geuvadis SNP data. Then move the files from the local to the virtual machine.

```{r obtain (local),eval=FALSE}
for(chr in 1:22){ 
    spliceQTL::get.snps.geuvadis(chr=chr,data="N:/semisup/data/eQTLs",
                                 path="N:/spliceQTL/data/Geuvadis")
}
```

On the virtual machine, execute this chunk to obtain the BBMRI SNP data, the Geuvadis exon data, and the BBMRI exon data. Choose one out of the six biobanks (CODAM, LL, LLS, NTR, PAN, RS).

```{r obtain (remote),eval=FALSE}
for(chr in 1:22){
    spliceQTL::get.snps.bbmri(chr=chr,biobank="LLS",path=path,size=500*10^3)
}
spliceQTL::get.exons.geuvadis(path=path)
spliceQTL::get.exons.bbmri(path=path)
```

On the virtual machine, execute this chunk to prepare the data. See the documentation of the R package spliceQTL for further information. (It seems that lme4::lmer in spliceQTL::adjust.covariates fails to release memory. Restart R after each chromosome.)

```{r prepare,eval=FALSE}
for(chr in 1:22){
  for(data in c("Geuvadis","LLS")){
      
    rm(list=setdiff(ls(),c("data","chr","path"))); gc()
    set.seed(1)
    
    cat("Analysing",data,chr,":",as.character(Sys.time()),"\n")
    if(data=="Geuvadis"){
      load(file.path(path,"Geuvadis.exons.RData"),verbose=TRUE)
    } else {
      load(file.path(path,"BBMRI.exons.RData"),verbose=TRUE)
      cond <- sapply(strsplit(x=rownames(exons),split=":"),function(x) x[[1]]==data)
      exons <- exons[cond,]
    }
    load(file.path(path,paste0(data,".chr",chr,".RData")),verbose=TRUE)
    
    cat("Matching samples:","\n")
    list <- spliceQTL::match.samples(exons,snps)
    exons <- list$exons; snps <- list$snps; rm(list)
    
    cat("Adjusting samples:","\n")
    exons <- spliceQTL::adjust.samples(x=exons) # slow!
    exons <- asinh(x=exons)
    
    cat("Adjusting covariates:","\n")
    names <- strsplit(x=colnames(exons),split="_") # exon names
    length <- sapply(names,function(x) as.integer(x[[3]])-as.integer(x[[2]])) # exon length
    exons <- spliceQTL::adjust.covariates(x=exons,group=gene_id,offset=length) # slow!
    
    # subset chromosome
    cond <- sapply(strsplit(x=colnames(exons),split="_"),function(x) x[[1]]==chr)
    exons <- exons[,cond]
    gene_id <- gene_id[cond]
    
    cat("Mapping exons:","\n")
    map <- list()
    map$genes <- spliceQTL::map.genes(chr=chr,path=path)
    map$exons <- spliceQTL::map.exons(gene=as.character(map$genes$gene_id),exon=gene_id)
    
    cat("Mapping SNPs:","\n")
    names <- strsplit(x=colnames(snps),split=":")
    snp.chr <- sapply(names,function(x) as.integer(x[[1]]))
    snp.pos <- sapply(names,function(x) as.integer(x[[2]]))
    map$snps <- spliceQTL::map.snps(gene.chr=map$genes$chr,
                                    gene.start=map$genes$start,
                                    gene.end=map$genes$end,
                                    snp.chr=snp.chr,snp.pos=snp.pos)
    
    cat("Dropping genes:","\n")
    map <- spliceQTL::drop.trivial(map=map)
    
    cat("Testing:",as.character(Sys.time())," -> ")
    rm(list=setdiff(ls(),c("exons","snps","map","data","chr","path"))); gc()
    
    save(list=c("exons","snps","map"),file=file.path(path,paste0("temp.",data,".chr",chr,".RData")))
  }
#q()
#n
#exit
}
```

On the virtual machine, execute this chunk to test for alternative splicing.

```{r test,eval=FALSE}
for(chr in c(1:22)){
    for(data in c("Geuvadis","LLS")){
        cat("Analysing",data,chr,":",as.character(Sys.time()),"\n")
        
        rm(list=setdiff(ls(),c("data","chr","path"))); gc(); cat(".")
        load(file.path(path,paste0("temp.",data,".chr",chr,".RData"))); cat(".")
        pvalue <- spliceQTL::test.multiple(Y=exons,X=snps,map=map,rho=c(0,1),spec=16); cat(".")
        save(object=pvalue,file=file.path(path,paste0("pval.",data,".chr",chr,".RData"))); cat("\n")
    }
}
```

On the virtual machine, execute this chunk to compare the results between the Geuvadis and the BBMRI project.

```{r,eval=FALSE}
cor <- chisq <- rep(NA,length=22)
for(chr in 22:1){
    sel <- "rho=1"
    load(file.path(path,paste0("pval.Geuvadis.chr",chr,".RData")))
    a <- pvalue; pvalue <- NA
    load(file.path(path,paste0("pval.LLS.chr",chr,".RData")))
    b <- pvalue; pvalue <- NA

    names <- intersect(rownames(a),rownames(b))
    plot(jitter(-log(a[names,sel])),jitter(-log(b[names,sel])))
    cor[chr] <- stats::cor(a[names,sel],b[names,sel],method="spearman")

    a <- stats::p.adjust(a[names,sel])<0.05
    b <- stats::p.adjust(b[names,sel])<0.05
    print(paste0("chr",chr))
    print(table(a,b))
    chisq[chr] <- stats::chisq.test(table(a,b))$p.value
}
```

<!--
#wait <- TRUE
#while(wait){
#    wait <- !file.exists(...)
#    if(wait){Sys.sleep(60);cat(".")}
#}
memory and CPU usage Linux: htop
-->