Skip to content
Snippets Groups Projects
Commit 52036d65 authored by Wei Gu's avatar Wei Gu
Browse files

tranSMART GWAS in new version of Galaxy

parent fac951b8
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
<?xml version='1.0' encoding='utf-8'?>
<toolbox>
<section id="getext" name="Get Data">
<tool file="data_source/upload.xml" />
<tool file="data_source/ucsc_tablebrowser.xml" />
<tool file="data_source/ucsc_tablebrowser_archaea.xml" />
<tool file="data_source/ebi_sra.xml" />
<tool file="data_source/biomart.xml" />
<tool file="data_source/gramene_mart.xml" />
<tool file="data_source/flymine.xml" />
<tool file="data_source/fly_modencode.xml" />
<tool file="data_source/modmine.xml" />
<tool file="data_source/mousemine.xml" />
<tool file="data_source/ratmine.xml" />
<tool file="data_source/yeastmine.xml" />
<tool file="data_source/worm_modencode.xml" />
<tool file="data_source/wormbase.xml" />
<tool file="data_source/eupathdb.xml" />
<tool file="genomespace/genomespace_file_browser_prod.xml" />
<tool file="genomespace/genomespace_importer.xml" />
</section>
<section id="tmscripts" name="tranSMART tools">
<tool file="tranSMART/tm_plink_input.xml" />
<tool file="tranSMART/run_plink.xml" />
</section>
<!--
<section id="send" name="Send Data">
<tool file="genomespace/genomespace_exporter.xml" />
</section>
<section id="liftOver" name="Lift-Over">
<tool file="extract/liftOver_wrapper.xml" />
</section>
-->
<section id="textutil" name="Text Manipulation">
<tool file="filters/fixedValueColumn.xml" />
<tool file="filters/catWrapper.xml" />
<tool file="filters/condense_characters.xml" />
<tool file="filters/convert_characters.xml" />
<tool file="filters/mergeCols.xml" />
<tool file="filters/CreateInterval.xml" />
<tool file="filters/cutWrapper.xml" />
<tool file="filters/changeCase.xml" />
<tool file="filters/pasteWrapper.xml" />
<tool file="filters/remove_beginning.xml" />
<tool file="filters/randomlines.xml" />
<tool file="filters/headWrapper.xml" />
<tool file="filters/tailWrapper.xml" />
<tool file="filters/trimmer.xml" />
<tool file="filters/wc_gnu.xml" />
<tool file="filters/secure_hash_message_digest.xml" />
</section>
<!--
<section id="convert" name="Convert Formats">
<tool file="filters/bed2gff.xml" />
<tool file="filters/gff2bed.xml" />
<tool file="maf/maf_to_bed.xml" />
<tool file="maf/maf_to_interval.xml" />
<tool file="maf/maf_to_fasta.xml" />
<tool file="filters/sff_extractor.xml" />
<tool file="filters/wig_to_bigwig.xml" />
<tool file="filters/bed_to_bigbed.xml" />
</section>
<section id="filter" name="Filter and Sort">
<tool file="stats/filtering.xml" />
<tool file="filters/sorter.xml" />
<tool file="filters/grep.xml" />
<label id="gff" text="GFF" />
<tool file="filters/gff/extract_GFF_Features.xml" />
<tool file="filters/gff/gff_filter_by_attribute.xml" />
<tool file="filters/gff/gff_filter_by_feature_count.xml" />
<tool file="filters/gff/gtf_filter_by_attribute_values_list.xml" />
</section>
<section id="group" name="Join, Subtract and Group">
<tool file="filters/joiner.xml" />
<tool file="filters/compare.xml" />
<tool file="stats/grouping.xml" />
</section>
<section id="features" name="Extract Features">
<tool file="filters/ucsc_gene_bed_to_exon_bed.xml" />
</section>
<section id="fetchSeq" name="Fetch Sequences">
<tool file="extract/extract_genomic_dna.xml" />
</section>
<section id="fetchAlign" name="Fetch Alignments">
<tool file="maf/interval2maf_pairwise.xml" />
<tool file="maf/interval2maf.xml" />
<tool file="maf/interval_maf_to_merged_fasta.xml" />
<tool file="maf/genebed_maf_to_fasta.xml" />
<tool file="maf/maf_stats.xml" />
<tool file="maf/maf_thread_for_species.xml" />
<tool file="maf/maf_limit_to_species.xml" />
<tool file="maf/maf_limit_size.xml" />
<tool file="maf/maf_by_block_number.xml" />
<tool file="maf/maf_filter.xml" />
<tool file="maf/maf_reverse_complement.xml" />
</section>
<section id="scores" name="Get Genomic Scores">
<tool file="filters/wiggle_to_simple.xml" />
<tool file="stats/aggregate_binned_scores_in_intervals.xml" />
</section>
<section id="stats" name="Statistics">
<tool file="stats/gsummary.xml" />
<tool file="filters/uniq.xml" />
</section>
<section id="plots" name="Graph/Display Data">
<tool file="plotting/boxplot.xml" />
<tool file="maf/vcf_to_maf_customtrack.xml" />
</section>
<section id="hgv" name="Phenotype Association">
<tool file="evolution/codingSnps.xml" />
<tool file="evolution/add_scores.xml" />
<tool file="phenotype_association/sift.xml" />
<tool file="phenotype_association/linkToGProfile.xml" />
<tool file="phenotype_association/linkToDavid.xml" />
<tool file="phenotype_association/ldtools.xml" />
<tool file="phenotype_association/pass.xml" />
<tool file="phenotype_association/gpass.xml" />
<tool file="phenotype_association/beam.xml" />
<tool file="phenotype_association/lps.xml" />
<tool file="phenotype_association/master2pg.xml" />
</section>
<label id="ngs" text="NGS Toolbox Beta" />
<section id="cshl_library_information" name="NGS: QC and manipulation">
<label id="illumina" text="Illumina data" />
<label id="454" text="Roche-454 data" />
<label id="solid" text="AB-SOLiD data" />
<tool file="next_gen_conversion/solid2fastq.xml" />
<tool file="solid_tools/solid_qual_stats.xml" />
<tool file="solid_tools/solid_qual_boxplot.xml" />
<label id="generic_fastq" text="Generic FASTQ manipulation" />
<label id="fastx_toolkit_fastq" text="FASTX-Toolkit for FASTQ data" />
</section>
<section id="ngs_mapping" name="NGS: Mapping">
<label id="illumina" text="Illumina" />
<label id="roche_454" text="Roche-454" />
<label id="ab_solid" text="AB-SOLiD" />
</section>
<section id="samtools" name="NGS: SAM Tools">
</section>
-->
</toolbox>
#!/usr/bin/Rscript --vanilla
##############################################################
## Name : prepare_plink.R ##
## Description: Creates plink input file from tranSMART ##
## cohort selection export (clinical data) ##
## Usage : ##
## prepare_plink.R jobInfo_FILE subset1_clinical_FILE ##
## subset2_clinical_FILE ##
## ##
## Author : eTRIKS WP4 UL team ##
## Contact : wei.gu@uni.lu ##
##############################################################
## ------
## ------ command line args ------ ##
## ------
args = commandArgs(trailingOnly = TRUE)
jobFile = as.vector(args[1])
subset1_file = as.vector(args[2])
subset2_file = as.vector(args[3])
## ------
## ------ deal with input files ------ ##
## ------
#
# read jobFile to get job info: studyAccessions -> study_id
#
con <- file(jobFile, open = "r")
while (length(oneLine <- readLines(con, n = 1, warn = FALSE)) > 0) {
myVector <- strsplit(oneLine, " -> ")
if (myVector[[1]][1]=="\tstudyAccessions") {
study_id <- gsub("\\]","",gsub("\\[","",myVector[[1]][2]))
}
}
close(con)
#
# read PATIENT ID in both subsets
#
cohortData <- read.table (subset1_file, header=T, sep="\t")
idS1 <- as.vector (cohortData$PATIENT.ID)
cohortData <- read.table (subset2_file, header=T, sep="\t")
idS2 <- as.vector (cohortData$PATIENT.ID)
#
# generate phenotype file
#
pheno <- cbind(idS1,idS1,"1")
pheno <- rbind(pheno,cbind(idS2,idS2,"2"))
write.table(pheno,file="transmart.pheno",quote=F,sep=" ",row.names=F,col.names=F)
## ------
## ------ now the database part ------ ##
## ------
#
# set up mongodb connection details
#
#options(warn=-1)
suppressMessages(library(rmongodb))
host <- "127.0.0.1:27017"
username <- ""
password <- ""
db <- "tm_hd_data"
#
# connect to mongo
#
# the create function has the following signature
#mongo.create(host="127.0.0.1", name="", username="", password="", db="admin", timeout=0L)
mongo <- mongo.create(host=host, db=db)
#
# query DB (GridFS) to get the bed, bim and fam file
#
getFile <- function (gridfsCon, inFile, outFile){
FileCon <- mongo.gridfs.find(gridfsCon,inFile )
if (!is.null(FileCon)) {
outFile<- file (outFile)
mongo.gridfile.pipe(FileCon,outFile)
mongo.gridfile.destroy(FileCon)
}
}
if (mongo.is.connected(mongo)){
mgrids <- mongo.gridfs.create(mongo,db)
if (!is.null(mgrids)){
getFile(mgrids,paste(study_id,".bed",sep=""),"transmart.bed")
getFile(mgrids,paste(study_id,".bim",sep=""),"transmart.bim")
getFile(mgrids,paste(study_id,".fam",sep=""),"transmart.fam")
}
mongo.gridfs.destroy (mgrids)
}
#
# disconnect from mongo
#
mongo.destroy(mongo)
<tool id="run_plink" name="Run GWAS with PLINK" version="0.1">
<command>
/usr/local/bin/plink --noweb --bed $bedfile --bim $bimfile --fam $famfile --pheno $phenofile --assoc --adjust --pfilter $p_cutoff
</command>
<inputs>
<param format="txt" name="bedfile" type="data" label="binary genotype file"/>
<param format="txt" name="bimfile" type="data" label="SNP mapping file"/>
<param format="txt" name="famfile" type="data" label="Sample info file" />
<param format="txt" name="phenofile" type="data" label="phenotype file" />
<param name="p_cutoff" type="float" value="0.05" min="0" max="1" label="P-value cutoff for unadjusted p-values" />
</inputs>
<outputs>
<data format="txt" name="log_File" label="log file" from_work_dir="plink.assoc" ></data>
<data format="txt" name="assoc_File" label="association result file" from_work_dir="plink.assoc"></data>
<data format="txt" name="adjusted_File" label="adjusted result file" from_work_dir="plink.assoc.adjusted"></data>
</outputs>
<help>
Perform GWAS analysis using PLINK.
</help>
</tool>
<?xml version="1.0"?>
<tool id="tm_plink_input" name="Create PLINK input from TM Export files" version="0.1">
<command>
${GALAXY_DATA_INDEX_DIR}/tranSMART/prepare_plink.R $jobInfo $subset_1 $subset_2
</command>
<inputs>
<param format="txt" name="jobInfo" type="data" label="job information"/>
<param format="tabular" name="subset_1" type="data" label="Subset 1"/>
<param format="tabular" name="subset_2" type="data" label="Subset 2" />
</inputs>
<outputs>
<data format="txt" name="bed_File" label="binary genotype file" from_work_dir="transmart.bed"></data>
<data format="txt" name="bim_File" label="SNP mapping file" from_work_dir="transmart.bim" ></data>
<data format="txt" name="fam_File" label="Sample info file" from_work_dir="transmart.fam"></data>
<data format="txt" name="pheno_File" label="phenotype file" from_work_dir="transmart.pheno"></data>
</outputs>
<help>
This tool reads exported clinical data as well as job information to build inputs for PLINK analysis.
</help>
</tool>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment