Commit 30d4f58c authored by Rauschenberger's avatar Rauschenberger
Browse files

automation

parent f9683885
...@@ -9,31 +9,43 @@ ...@@ -9,31 +9,43 @@
#' @param chr #' @param chr
#' chromosome: integer \eqn{1-22} #' chromosome: integer \eqn{1-22}
#' #'
#' @param path #' @param data
#' local directory for VCF (variant call format) and SDRF (sample and data relationship format) files #' local directory for VCF (variant call format) and SDRF (sample and data relationship format) files
#' #'
#' @param path
#' local directory for output
#'
#' @examples #' @examples
#' path <- "C:/Users/a.rauschenbe/Desktop/spliceQTL/data" #' path <- "C:/Users/a.rauschenbe/Desktop/spliceQTL/data"
#' #'
get.snps.geuvadis <- function(chr,path=getwd()){ get.snps.geuvadis <- function(chr,data=NULL,path=getwd()){
# download SNP data
file <- paste0("GEUVADIS.chr",chr,".PH1PH2_465.IMPFRQFILT_BIALLELIC_PH.annotv2.genotypes.vcf.gz")
url <- paste0("http://www.ebi.ac.uk/arrayexpress/files/E-GEUV-1/genotypes/",file)
destfile <- file.path(path,file)
if(!file.exists(destfile)){
utils::download.file(url=url,destfile=destfile,method="auto")
}
# transform with PLINK if(is.null(data)){
setwd(path) data <- path
system(paste0("plink --vcf GEUVADIS.chr",chr,".PH1PH2_465.IMPFRQFILT_BIALLELIC_PH.annotv2.genotypes.vcf.gz", # download SNP data
file <- paste0("GEUVADIS.chr",chr,".PH1PH2_465.IMPFRQFILT_BIALLELIC_PH.annotv2.genotypes.vcf.gz")
url <- paste0("http://www.ebi.ac.uk/arrayexpress/files/E-GEUV-1/genotypes/",file)
destfile <- file.path(data,file)
if(!file.exists(destfile)){
utils::download.file(url=url,destfile=destfile,method="auto")
}
# transform with PLINK
setwd(data)
system(paste0("plink --vcf GEUVADIS.chr",chr,".PH1PH2_465.IMPFRQFILT_BIALLELIC_PH.annotv2.genotypes.vcf.gz",
" --maf 0.05 --geno 0 --make-bed --out snps",chr),invisible=FALSE) " --maf 0.05 --geno 0 --make-bed --out snps",chr),invisible=FALSE)
# obtain identifiers
file <- "E-GEUV-1.sdrf.txt"
url <- paste("http://www.ebi.ac.uk/arrayexpress/files/E-GEUV-1/",file,sep="")
destfile <- file.path(data,file)
if(!file.exists(destfile)){
utils::download.file(url=url,destfile=destfile,method="auto")
}
}
# read into R # read into R
bed <- file.path(path,paste("snps",chr,".bed",sep="")) bed <- file.path(data,paste("snps",chr,".bed",sep=""))
bim <- file.path(path,paste("snps",chr,".bim",sep="")) bim <- file.path(data,paste("snps",chr,".bim",sep=""))
fam <- file.path(path,paste("snps",chr,".fam",sep="")) fam <- file.path(data,paste("snps",chr,".fam",sep=""))
X <- snpStats::read.plink(bed=bed,bim=bim,fam=fam) X <- snpStats::read.plink(bed=bed,bim=bim,fam=fam)
X$fam <- NULL; all(diff(X$map$position) > 0) X$fam <- NULL; all(diff(X$map$position) > 0)
...@@ -49,13 +61,7 @@ get.snps.geuvadis <- function(chr,path=getwd()){ ...@@ -49,13 +61,7 @@ get.snps.geuvadis <- function(chr,path=getwd()){
class(snps) <- "integer" class(snps) <- "integer"
# change identifiers # change identifiers
file <- "E-GEUV-1.sdrf.txt" samples <- utils::read.delim(file=file.path(data,"E-GEUV-1.sdrf.txt"))
url <- paste("http://www.ebi.ac.uk/arrayexpress/files/E-GEUV-1/",file,sep="")
destfile <- file.path(path$data,file)
if(!file.exists(destfile)){
utils::download.file(url=url,destfile=destfile,method="auto")
}
samples <- utils::read.delim(file=file.path(path,"E-GEUV-1.sdrf.txt"))
match <- match(rownames(snps),samples$Source.Name) match <- match(rownames(snps),samples$Source.Name)
rownames(snps) <- samples$Comment.ENA_RUN.[match] rownames(snps) <- samples$Comment.ENA_RUN.[match]
snps <- snps[!is.na(rownames(snps)),] snps <- snps[!is.na(rownames(snps)),]
...@@ -100,7 +106,7 @@ get.snps.bbmri <- function(chr,biobank=NULL,path=getwd(),size=500*10^3){ ...@@ -100,7 +106,7 @@ get.snps.bbmri <- function(chr,biobank=NULL,path=getwd(),size=500*10^3){
study <- c("LLS0","LLS1") study <- c("LLS0","LLS1")
} else if(biobank=="NTR"){ } else if(biobank=="NTR"){
study <- c("NTR0","NTR1") study <- c("NTR0","NTR1")
} else if(!biobank %in% c("CODAM","LL","PAN","RS")){ } else if(biobank %in% c("CODAM","LL","PAN","RS")){
study <- biobank study <- biobank
} else{ } else{
stop("Invalid biobank.",call.=FALSE) stop("Invalid biobank.",call.=FALSE)
......
...@@ -120,7 +120,7 @@ ...@@ -120,7 +120,7 @@
</div> </div>
<pre class="usage"><span class='fu'>get.snps.geuvadis</span>(<span class='no'>chr</span>, <span class='kw'>path</span> <span class='kw'>=</span> <span class='fu'>getwd</span>())</pre> <pre class="usage"><span class='fu'>get.snps.geuvadis</span>(<span class='no'>chr</span>, <span class='kw'>data</span> <span class='kw'>=</span> <span class='kw'>NULL</span>, <span class='kw'>path</span> <span class='kw'>=</span> <span class='fu'>getwd</span>())</pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2> <h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments"> <table class="ref-arguments">
...@@ -130,9 +130,13 @@ ...@@ -130,9 +130,13 @@
<td><p>chromosome: integer \(1-22\)</p></td> <td><p>chromosome: integer \(1-22\)</p></td>
</tr> </tr>
<tr> <tr>
<th>path</th> <th>data</th>
<td><p>local directory for VCF (variant call format) and SDRF (sample and data relationship format) files</p></td> <td><p>local directory for VCF (variant call format) and SDRF (sample and data relationship format) files</p></td>
</tr> </tr>
<tr>
<th>path</th>
<td><p>local directory for output</p></td>
</tr>
</table> </table>
......
...@@ -4,12 +4,14 @@ ...@@ -4,12 +4,14 @@
\alias{get.snps.geuvadis} \alias{get.snps.geuvadis}
\title{Get SNP data (Geuvadis)} \title{Get SNP data (Geuvadis)}
\usage{ \usage{
get.snps.geuvadis(chr, path = getwd()) get.snps.geuvadis(chr, data = NULL, path = getwd())
} }
\arguments{ \arguments{
\item{chr}{chromosome: integer \eqn{1-22}} \item{chr}{chromosome: integer \eqn{1-22}}
\item{path}{local directory for VCF (variant call format) and SDRF (sample and data relationship format) files} \item{data}{local directory for VCF (variant call format) and SDRF (sample and data relationship format) files}
\item{path}{local directory for output}
} }
\description{ \description{
This function transforms SNP data (local machine). This function transforms SNP data (local machine).
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment