Commit d45df8d8 authored by Rauschenberger's avatar Rauschenberger
Browse files

automation

parent cbf93511
...@@ -22,7 +22,11 @@ match.samples <- function(...,message=TRUE){ ...@@ -22,7 +22,11 @@ match.samples <- function(...,message=TRUE){
list <- list(...) list <- list(...)
if(length(list)==1 & is.list(list[[1]])){list <- list[[1]]} if(length(list)==1 & is.list(list[[1]])){list <- list[[1]]}
names <- sapply(substitute(list(...))[-1],deparse) if(is.null(names(list))){
names(list) <- sapply(substitute(list(...))[-1],deparse)
}
names <- names(list)
# check input # check input
cond <- sapply(list,function(x) !is.matrix(x)) cond <- sapply(list,function(x) !is.matrix(x))
...@@ -38,7 +42,7 @@ match.samples <- function(...,message=TRUE){ ...@@ -38,7 +42,7 @@ match.samples <- function(...,message=TRUE){
duplic <- lapply(list,function(x) duplicated(x)) duplic <- lapply(list,function(x) duplicated(x))
for(i in seq_along(list)){ for(i in seq_along(list)){
percent <- round(100*mean(duplic[[i]])) percent <- round(100*mean(duplic[[i]]))
if(message){message(percent,"% duplicates in ",names[i])} if(message){message(percent,"% duplicates in \"",names[i],"\"")}
list[[i]] <- list[[i]][!duplic[[i]],] list[[i]] <- list[[i]][!duplic[[i]],]
} }
...@@ -46,7 +50,7 @@ match.samples <- function(...,message=TRUE){ ...@@ -46,7 +50,7 @@ match.samples <- function(...,message=TRUE){
all <- Reduce(f=intersect,x=lapply(list,rownames)) all <- Reduce(f=intersect,x=lapply(list,rownames))
for(i in seq_along(list)){ for(i in seq_along(list)){
percent <- round(100*mean(rownames(list[[i]]) %in% all)) percent <- round(100*mean(rownames(list[[i]]) %in% all))
if(message){message(percent,"% overlap in",names[i])} if(message){message(percent,"% overlap in \"",names[i],"\"")}
list[[i]] <- list[[i]][all,] list[[i]] <- list[[i]][all,]
} }
...@@ -217,8 +221,8 @@ adjust.covariates <- function(x,offset,group){ ...@@ -217,8 +221,8 @@ adjust.covariates <- function(x,offset,group){
#' NA #' NA
#' #'
map.genes <- function(chr,path=getwd(),release="GRCh37",build="71"){ map.genes <- function(chr,path=getwd(),release="GRCh37",build="71"){
file <- paste0("Homo_sampiens.",release,".",build,".gtf") file <- paste0("Homo_sapiens.",release,".",build,".gtf")
if(!file.exists(file)){ if(!file.exists(file.path(path,file))){
url <- paste0("ftp://ftp.ensembl.org/pub/release-",build, url <- paste0("ftp://ftp.ensembl.org/pub/release-",build,
"/gtf/homo_sapiens/",file,".gz") "/gtf/homo_sapiens/",file,".gz")
destfile <- file.path(path,paste0(file,".gz")) destfile <- file.path(path,paste0(file,".gz"))
...@@ -231,6 +235,7 @@ map.genes <- function(chr,path=getwd(),release="GRCh37",build="71"){ ...@@ -231,6 +235,7 @@ map.genes <- function(chr,path=getwd(),release="GRCh37",build="71"){
genes <- refGenome::getGenePositions(object=object,by="gene_id") genes <- refGenome::getGenePositions(object=object,by="gene_id")
genes <- genes[genes$seqid==chr & genes$gene_biotype=="protein_coding",] genes <- genes[genes$seqid==chr & genes$gene_biotype=="protein_coding",]
genes <- genes[,c("gene_id","seqid","start","end")] genes <- genes[,c("gene_id","seqid","start","end")]
rownames(genes) <- NULL
colnames(genes)[colnames(genes)=="seqid"] <- "chr" colnames(genes)[colnames(genes)=="seqid"] <- "chr"
return(genes) return(genes)
} }
...@@ -243,10 +248,11 @@ map.genes <- function(chr,path=getwd(),release="GRCh37",build="71"){ ...@@ -243,10 +248,11 @@ map.genes <- function(chr,path=getwd(),release="GRCh37",build="71"){
#' This function #' This function
#' #'
#' @param gene_id #' @param gene_id
#' gene names\strong{:} vector with one entry per gene #' gene names\strong{:} vector with one entry per gene (gene names!)
#' #'
#' @param exon_id #' @param exon_id
#' exon names\strong{:} vector with one entry per exon #' exon names\strong{:} vector with one entry per exon
#' (also gene names! separated by comma if multiple genes)
#' #'
#' @details #' @details
#' The exon names should contain the gene names. For each gene, this function #' The exon names should contain the gene names. For each gene, this function
...@@ -256,13 +262,12 @@ map.genes <- function(chr,path=getwd(),release="GRCh37",build="71"){ ...@@ -256,13 +262,12 @@ map.genes <- function(chr,path=getwd(),release="GRCh37",build="71"){
#' NA #' NA
#' #'
map.exons <- function(gene_id,exon_id){ map.exons <- function(gene_id,exon_id){
if(length(gene_id)!=length(exon_id)){stop("Invalid.",call.=FALSE)}
p <- length(gene_id) p <- length(gene_id)
exons <- list() exons <- list()
pb <- utils::txtProgressBar(min=0,max=p,style=3) pb <- utils::txtProgressBar(min=0,max=p,style=3)
for(i in seq_len(p)){ for(i in seq_len(p)){
utils::setTxtProgressBar(pb=pb,value=i) utils::setTxtProgressBar(pb=pb,value=i)
which <- as.integer(grep(pattern=gene_id[i],x=exon_id)) # Why not "=="? which <- as.integer(grep(pattern=gene_id[i],x=exon_id))
exons[[i]] <- which exons[[i]] <- which
} }
return(exons) return(exons)
......
...@@ -127,11 +127,12 @@ ...@@ -127,11 +127,12 @@
<colgroup><col class="name" /><col class="desc" /></colgroup> <colgroup><col class="name" /><col class="desc" /></colgroup>
<tr> <tr>
<th>gene_id</th> <th>gene_id</th>
<td><p>gene names<strong>:</strong> vector with one entry per gene</p></td> <td><p>gene names<strong>:</strong> vector with one entry per gene (gene names!)</p></td>
</tr> </tr>
<tr> <tr>
<th>exon_id</th> <th>exon_id</th>
<td><p>exon names<strong>:</strong> vector with one entry per exon</p></td> <td><p>exon names<strong>:</strong> vector with one entry per exon
(also gene names! separated by comma if multiple genes)</p></td>
</tr> </tr>
</table> </table>
......
...@@ -7,9 +7,10 @@ ...@@ -7,9 +7,10 @@
map.exons(gene_id, exon_id) map.exons(gene_id, exon_id)
} }
\arguments{ \arguments{
\item{gene_id}{gene names\strong{:} vector with one entry per gene} \item{gene_id}{gene names\strong{:} vector with one entry per gene (gene names!)}
\item{exon_id}{exon names\strong{:} vector with one entry per exon} \item{exon_id}{exon names\strong{:} vector with one entry per exon
(also gene names! separated by comma if multiple genes)}
} }
\description{ \description{
This function This function
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment