first commit

ba6a4983 · Wei Gu · ba6a4983 · ba6a4983 · ba6a4983 · ba6a4983
Commit ba6a4983 authored 10 years ago by Wei Gu
--- a/README
+++ b/README
--- a/tool-data/Rscripts/Create_MarkerSelection_inputfile.pl
+++ b/tool-data/Rscripts/Create_MarkerSelection_inputfile.pl
+#!/usr/bin/env perl
+
+##############################################################
+## Name: Create_GEX_outputfile.pl                           ##
+## Description: Merges the two subset mrna.tsv subset files ##
+## and puts the data into the correct format to be used by  ##
+## MarkerSelection.R                                        ##
+## Usage: Create_GEX_outputfile.pl -s1 <subset1_input_file> ##
+##        -s2 <subset2_input_file> -dir <in/output dir>     ##
+## Author: serge.eifes@uni.lu                               ##
+##############################################################
+
+
+###Modules loaded
+use strict;
+use warnings;
+
+
+
+#Argument variables:
+my $file_1=$ARGV[0];
+my $file_2=$ARGV[1];
+my $O_file=$ARGV[2];
+
+
+
+my @A_Subset_files;
+push(@A_Subset_files, $file_1);
+push(@A_Subset_files, $file_2);
+
+my $counter_subset=1;
+
+
+open(WH, ">$O_file") or die "Cannot open $O_file: $!";
+
+
+foreach my $file (@A_Subset_files){
+	
+	open(FH, "$file") or die "Cannot open $file: $!";
+	
+	
+	print WH "PATIENT.ID\tVALUE\tPROBE.ID\tGENE_SYMBOL\tSUBSET\n";
+	
+	while(<FH>){
+		
+		
+		my $Line = $_;
+		chomp($Line);
+	
+		next if($Line =~ /^PATIENT/);
+	
+		my (@A_Line) = split /\t/, $Line;
+		
+		my $Patient_ID = $A_Line[0];
+		my $Value = $A_Line[9];
+		my $Probe_ID = $A_Line[11];
+		my $Gene_Symbol = $A_Line[13];
+		
+		$Gene_Symbol="NA" if($Gene_Symbol eq "null");
+		
+		my $Subset;
+		$Subset = "S1" if($counter_subset==1);
+		$Subset = "S2" if($counter_subset==2);
+		
+		print WH $Subset . "_" . $Patient_ID . "\t" . $Value 
+					 . "\t" . $Probe_ID . "\t" . $Gene_Symbol 
+					 . "\t" . $Subset . "\n";
+					 
+	}
+	close FH;
+	
+	$counter_subset++;
+}
+
+close WH;
--- a/tool-data/Rscripts/Create_VCF_Allele_matrix.pl
+++ b/tool-data/Rscripts/Create_VCF_Allele_matrix.pl
+#!/usr/bin/env perl
+
+##############################################################
+## Name: Create_VCF_Allele_matrix.pl                        ##
+## Description: Creates input file for subset to            ##
+## perform the Fisher exact test to detect significant      ##
+## deviations in the proportions for minor and major        ##
+## alleles between two subsets. To perform the analysis,    ##
+##two subset files need to be generated by this script      ##
+## Usage: Create_VCF_Allele_matrix.pl <input_file>          ##
+##        <output_file>                                     ##
+## Author: serge.eifes@uni.lu                               ##
+##############################################################
+
+
+
+###Modules loaded
+use strict;
+use warnings;
+
+#Argument variables:
+my $file   = $ARGV[0];
+my $O_file = $ARGV[1];
+
+
+#To store the corresponding column num for given col
+my %H_col2colNum;
+
+#Hash to store data extracted from input file
+my %HoH_MarkerAndPatient2Variant;
+
+#Names of the columsn we need:
+my @A_colnames = ("PATIENT ID", "VARIANT", "CHROMOSOME", "POSITION");
+
+## START Input file parser ##
+open( FH, "$file" ) or die "Cannot open $file: $!";
+
+while (<FH>) {
+
+	my $Line = $_;
+	chomp($Line);
+	
+	my (@A_Line) = split /\t/, $Line;
+	
+	
+	
+	if(($Line=~/PATIENT ID/) && ($Line=~/VARIANT/) && ($Line=~/CHROMOSOME/) && ($Line=~/POSITION/)){
+		foreach my $colname (@A_colnames){
+			$H_col2colNum{$colname} = get_col_num($colname, \@A_Line);
+		}
+		next;
+	}
+	my $Marker = $A_Line[$H_col2colNum{"CHROMOSOME"}] . "_" . $A_Line[$H_col2colNum{"POSITION"}];
+	my $Patient = $A_Line[$H_col2colNum{"PATIENT ID"}];
+	my $Variant = $A_Line[$H_col2colNum{"VARIANT"}];
+	$HoH_MarkerAndPatient2Variant{$Marker}{$Patient}=$Variant;
+	
+}
+close FH;
+## END Input file parser ##
+
+
+## START Storing all Markers and patients in array ##
+my @A_markers;
+my @A_patients;
+
+for my $Marker (sort keys %HoH_MarkerAndPatient2Variant ) {
+	for my $Patient (sort keys %{$HoH_MarkerAndPatient2Variant{$Marker}}){
+	
+		push(@A_markers, $Marker);
+		push(@A_patients, $Patient);
+#		print $Patient . "\n";
+	}
+}
+
+my @A_uniq_markers = uniq(@A_markers);
+my @A_uniq_patients = uniq(@A_patients);
+
+#print $#A_patients . "\n";
+#print $#A_uniq_patients . "\n";
+#
+#print $#A_markers . "\n";
+#print $#A_uniq_markers . "\n";
+
+
+
+my @AoA;
+
+###Storing data in two dim array:
+#Rownames
+$AoA[0][0]="//";
+my $i=1;
+foreach my $Marker (@A_uniq_markers){
+	$AoA[$i][0]=$Marker;
+	$i++;
+}
+
+
+
+#Colnames
+my $j=1;
+foreach my $Patient (@A_uniq_patients){
+	$AoA[0][$j]=$Patient;
+	$j++;
+}
+
+
+
+##Filling the "matrix" with the variant data
+my $col = 1;
+
+
+foreach my $Patient (@A_uniq_patients){
+	my $row= 1;
+	foreach my $Marker (@A_uniq_markers){
+		$AoA[$row][$col] = $HoH_MarkerAndPatient2Variant{$Marker}{$Patient};
+#		print $row . "--" . $col . "::" . $HoH_MarkerAndPatient2Variant{$Marker}{$Patient} . "\n";
+		$row++;
+	}
+	$col++;
+}
+## END Storing all Markers and patients in array ##
+
+
+## START Printing out the results/matrix
+open(WH, ">$O_file") or die "Cannot create $O_file: $!";
+
+for $i ( 0 .. $#AoA ) {
+	print WH join("\t", @{$AoA[$i]}) . "\n";
+}
+
+close WH;
+## END Printing out the results/matrix
+
+
+
+### FUNCTIONS ###
+sub get_col_num{
+	my $colname2match = $_[0];
+	my @A_header_line = @{$_[1]};
+	
+	my $col_num=0;
+	foreach my $colname (@A_header_line){
+		if($colname2match eq $colname){
+			return($col_num);
+		} else{
+			$col_num++;
+		}
+	}
+	die "Column $colname2match not found. Please check the input file header!!!";
+}
+
+##Eliminate duplicate items in array:
+sub uniq {
+	my %seen;
+	return grep { !$seen{$_}++ } @_;
+}
\ No newline at end of file
--- a/tool-data/Rscripts/MarkerSelection.R
+++ b/tool-data/Rscripts/MarkerSelection.R
+#!/usr/bin/env Rscript        
+
+###########################################################################
+# Copyright 2008-2012 Janssen Research & Development, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###########################################################################
+
+###########################################################################
+#### Code rewritten by Wei and Serge
+###########################################################################
+
+
+###########################################################################
+#Comparative Marker Selection
+##########################################################################
+
+#Function to obtain the aligned positions all items in the vectorToAlign
+#in the vectorRef
+get.reordered_index = function(vectorToAlign, vectorRef){
+	
+	vectorToAlign = as.vector(vectorToAlign)
+	vectorRef = as.vector(vectorRef)
+
+	res = vector(mode="integer", length=length(vectorToAlign))
+	
+	for(i in 1:length(res)){
+		idx = which(vectorToAlign == vectorRef[i])
+		res[i]=idx
+	}
+	return(res)
+}
+
+
+## Main function to perform DEG analysis:
+MS.loader <- function(
+		input.filename="outputfile.txt",
+		output.file ="CMS.TXT",
+		numberOfMarkers = 100,
+		out.heatmap ="heatmapdata",
+		mhcCorrection = 2
+)
+{
+	##########################################
+	
+	#We need this to do the ddply below.
+	suppressMessages(library(plyr))
+	suppressMessages(library(reshape2))
+	
+	suppressMessages(library(limma))
+	
+	#---------------------
+	
+	#PREPARE RAW DATA
+	
+	#Pull the GEX data from the file.
+	mRNAData <- data.frame(read.delim(input.filename))
+	
+	#Trim the probe.id field.
+	mRNAData$PROBE.ID 		<- gsub("^\\s+|\\s+$", "",mRNAData$PROBE.ID)
+	mRNAData$GENE_SYMBOL 	<- gsub("^\\s+|\\s+$", "",mRNAData$GENE_SYMBOL)
+	mRNAData$PATIENT.ID   	<- gsub("^\\s+|\\s+$", "",mRNAData$PATIENT.ID)
+
+	#Getting rid of the probesets w/o associated gene symbol:
+	idx_wo_symbol = which(is.na(mRNAData$GENE_SYMBOL))
+	if(length(idx_wo_symbol)>0){	
+		mRNAData=mRNAData[-idx_wo_symbol,]
+	}
+	
+	#Create a data.frame with unique probe/gene ids.
+	geneStatsData <- data.frame(mRNAData$PROBE.ID,mRNAData$GENE_SYMBOL);
+	
+	#Add a column name to our data.frame.
+	colnames(geneStatsData) <- c('PROBE.ID','GENE_SYMBOL')	
+	
+	geneStatsData <- unique(geneStatsData[,c("PROBE.ID","GENE_SYMBOL")]);
+	
+	#---------------------
+	
+	#---------------------
+	#Prepare the casted raw data.
+	#Get a copy of the raw data.
+	coercedData <- mRNAData
+	
+	#Grab only the columns we need for doing the melt/cast.
+	coercedData <- coercedData[c('PATIENT.ID','VALUE','PROBE.ID','GENE_SYMBOL')]
+
+	#Melt the data, leaving 2 columns as the grouping fields.
+	meltedData <- melt(coercedData, id=c("PROBE.ID","GENE_SYMBOL","PATIENT.ID"))
+	
+	#Cast the data into a format that puts the PATIENT.ID in a column.
+	coercedData <- data.frame(dcast(meltedData, PROBE.ID + GENE_SYMBOL ~ PATIENT.ID))
+	
+	#The PATIENT.ID column needs to be removed if exists in the matrix!!!!!!!
+	idx.patient_id = which(colnames(coercedData)=="PATIENT.ID")
+	
+	if(length(idx.patient_id)==1){
+		coercedData = coercedData[,-idx.patient_id]
+	}
+	
+	#When we convert to a data frame the numeric columns get an x in front of them. Remove them here.
+	colnames(coercedData) <- sub("^X","",colnames(coercedData))
+	
+	#Get a gene list that we can use later to preserve the list of the genes.
+	geneList <- as.vector(coercedData$GENE_SYMBOL)
+	probeList <- as.vector(coercedData$PROBE.ID)
+	#---------------------
+	
+	#---------------------
+	#Fitting linear model with limma
+	
+	#Remove the gene_symbol and probe.id columns.
+	coercedDataWithoutGroup <- data.matrix(subset(coercedData, select=-c(GENE_SYMBOL,PROBE.ID)))
+	
+	rownames(coercedDataWithoutGroup)=coercedData$PROBE.ID # Rownames have to be added
+	
+	#Creating a named vector for mapping PROBE.ID to GENE.SYMBOL
+	gene.symbols=coercedData$GENE_SYMBOL
+	names(gene.symbols)=coercedData$PROBE.ID
+	
+	
+	#Get a vector representing our subsets.
+	classVector <- colnames(coercedDataWithoutGroup)
+	classVector <- gsub("^S1.*","0",classVector)
+	classVector <- gsub("^S2.*","1",classVector)
+	classVector <- as.numeric(classVector)
+	
+	
+	#Check the class vector to verify we have two subsets.
+	if(length(unique(classVector)) < 2) stop("||FRIENDLY||There is only one subset selected, please select two in order to run the comparative analysis.")
+	
+	
+	
+	if(mhcCorrection==0){
+		mhcMethod="none"
+	}else if(mhcCorrection==1){
+		mhcMethod="BH"
+	}else if(mhcCorrection==2){
+		mhcMethod="BY"
+	}else if(mhcCorrection==3){
+		mhcMethod="Holm"
+	}
+	
+	
+	#LINEAR MODEL FITTING
+	
+	#Creating Design matrix
+	S1 = integer(length(classVector))
+	S1[which(classVector==0)]=1
+	S2 = classVector
+	
+	design <- cbind(S1=S1,S2=S2)
+	
+	#... and contrast matrix
+	contrast.matrix = makeContrasts(S1-S2, levels=design)
+	
+	# Linear model fitting
+	fit <- lmFit(coercedDataWithoutGroup, design)
+	fit <- contrasts.fit(fit, contrast.matrix)
+	fit <- eBayes(fit)
+	
+	#The data structure of topTable for newer versions of limma has been modified.
+	#This causes errors during execution of the script. A workarround by creating a data structure identical to topTable compatible with tranSMART is created
+	
+	contr=1 #The contrast in the fitting to be used for the results table
+	
+	#Results data frame which provides all required statistics for output data
+	top.fit = data.frame(
+			ID=rownames(fit$coefficients), #Depending on limma version fit$genes doesn't exist anymore
+			logFC=fit$coefficients[,contr],
+			t=fit$t[,contr],
+			P.Value=fit$p.value[,contr],
+			adj.P.val=p.adjust(p=fit$p.value[,contr], method=mhcMethod),
+			B=fit$lods[,contr]
+	)
+	
+	top.fit.ranked.decr = top.fit[ order(top.fit$B, decreasing=T), ] #Ordering the data according to limma B statistic
+	rownames(top.fit.ranked.decr) = NULL
+	
+	top.fit.ranked.decr.filt = top.fit.ranked.decr[1:numberOfMarkers,] #Filtering down the results table to the number of genes selected by user
+	topgenes = cbind(gene.symbols[top.fit.ranked.decr.filt$ID], top.fit.ranked.decr.filt)
+	colnames(topgenes) = c("GENE_SYMBOL", "PROBE.ID", "logFC", "t", "P.value", "adj.P.val", "B")
+	
+	rownames(topgenes) = NULL #This is the final results data object for Marker Selection workflow containing all data displayed in output table
+	
+	#print(top.fit.ranked.decr.filt)
+	
+	# End Linear model fitting
+	#---------------------
+	
+	#---------------------
+	#HEATMAP
+	
+	## Generating heatmap output data:
+	heatmapData = coercedData[which(coercedData[,"PROBE.ID"] %in% topgenes[,"PROBE.ID"]), ]
+	finalHeatmapData = heatmapData[, -c(1,2)]
+	GROUP = paste(as.vector(heatmapData[, "GENE_SYMBOL"]), as.vector(heatmapData[, "PROBE.ID"]), sep=" ")
+	finalHeatmapData = cbind(GROUP, finalHeatmapData)
+	
+	#Here we align the lines in finalHeatmapData to topgenes according to
+	#the gene symbols in both matrices
+	s = strsplit(as.character(finalHeatmapData$GROUP), " ")
+	d = vector(mode="character", length=length(s))
+	for(i in 1:length(s)){
+		d[i] = s[[i]][1]
+	}
+	idx = get.reordered_index(d, topgenes$GENE_SYMBOL)
+	finalHeatmapData = finalHeatmapData[ idx, ]
+
+	#WRITE TO FILE
+	
+	#Write the file with the stats by gene. This will get read into the UI.
+	write.table(topgenes,output.file,sep = "\t",quote=F,row.names=F)
+	
+	#Write the data file we will use for the heatmap.
+	write.table(finalHeatmapData,out.heatmap,sep = "\t",quote=F,row.names=F)
+
+
+	##########################################
+}
+
+
+#### Executing the main job to obtain the DEG and corresponding heatmap file: ####
+
+#Getting command line arguments
+args = commandArgs(trailingOnly = TRUE)
+
+input_file=as.vector(args[1])
+output_file=as.vector(args[2])
+numberOfMarkers=as.vector(args[3])
+out.heatmap=as.vector(args[4])
+
+MS.loader(input_file, output_file,numberOfMarkers, out.heatmap)
+
+
+
--- a/tool-data/Rscripts/VCF_FE_test.R
+++ b/tool-data/Rscripts/VCF_FE_test.R
+#!/usr/bin/env Rscript
+
+##############################################################
+## Name: VCF_FE_test.R                                      ##
+## Description: Performs the Fisher exact test to detect    ##
+## significant deviations in the proportions for minor      ##
+## and major alleles between the two subsets provided as    ##
+## input files.
+## Usage: VCF_FE_test.R <input_file_1> <input_file_2>       ##
+##                      <output_file>                       ##
+## Author: serge.eifes@uni.lu                               ##
+##############################################################
+
+#######################################
+#######################################
+### START     FUNCTIONS            ####
+
+get_input_data = function(file){
+  data = as.matrix(read.delim(file, header=F))
+
+  #Adding column and row names
+  data_w_names = data[-1,-1]
+  rownames = data[-1,1]
+  colnames = t(data[1,-1]) 
+
+  rownames(data_w_names) = rownames
+  colnames(data_w_names) = colnames
+  
+  return(data_w_names)
+}
+
+split_alleles = function(row){
+  row = as.vector(row)
+
+  num_patients = length(row)
+  row.split.list = strsplit(row, "\\/", perl=T)
+  all_alleles_per_marker = vector(mode="character", length=num_patients*2)
+  
+  counter=1
+  
+  for(i in 1:num_patients){
+    patient_alleles = row.split.list[[i]]
+    for(j in 1:2){
+      allele = patient_alleles[j]
+      all_alleles_per_marker[counter] = allele
+      counter=counter+1
+    }
+  }
+  return(table(all_alleles_per_marker))
+}
+
+
+create_subset_count_list = function(data){
+  
+  subset = list()
+  markers = rownames(data)
+  
+  for(i in 1:dim(data)[1]){
+    
+    datarow = data[i,]
+    marker = markers[i]
+    nt_counts = split_alleles(datarow)
+    nt_count_matrix = as.matrix(nt_counts)
+    
+    subset[[marker]] = nt_count_matrix
+  }
+  
+  return(subset)
+}
+
+
+get_sorted_count_vec = function(subset_data){
+  
+  alleles_subset = rownames(subset_data)
+  subset_vec = as.vector(subset_data[,1])
+  names(subset_vec) = alleles_subset
+  subset_vec_sorted = sort(subset_vec, method="quick")
+  return(subset_vec_sorted)
+}
+
+
+#Checks sorted_count_vec if minor allele (with count 0) needs to be added
+check_sorted_count_vec_for_minor = function(sorted_count_vec_a){
+	if(length(sorted_count_vec_a)==1){
+		
+		#Need to add minor allele with count 0 if it exists in other subset
+		sorted_count_vec = vector(mode="integer", length=2)
+		sorted_count_vec[2] = sorted_count_vec_a[1]
+		sorted_count_vec[1] = 0
+		sorted_count_vec_a = sorted_count_vec
+	}
+	return(sorted_count_vec_a)
+}
+
+### END       FUNCTIONS          ######
+#######################################
+#######################################
+
+
+
+#######################################
+### START        MAIN CODE         ####
+
+
+#Getting command line arguments
+args = commandArgs(trailingOnly = TRUE)
+
+input_file_1=as.vector(args[1])
+input_file_2=as.vector(args[2])
+output_file=as.vector(args[3])
+
+data_1 = get_input_data(input_file_1)
+data_2 = get_input_data(input_file_2)
+
+#Test case for debugging
+# print(data_1["1_3396099",])
+# print("___")
+# print(data_2["1_3396099",])
+
+
+subset_list_1 = create_subset_count_list(data_1)
+subset_list_2 = create_subset_count_list(data_2)
+ 
+markers_subset_list_1 = names(subset_list_1)
+markers_subset_list_2 = names(subset_list_2)
+ 
+# In this matrix the results of the FE test for each marker
+#will be stored
+results = matrix(c("Chromosome", "Position", "P-value"), ncol=3, nrow=1)
+
+#Iterating over each marker and performing FE test to compare
+#minor and major allele frequencies in both subsets
+for(i in 1:length(markers_subset_list_1)){
+
+  
+  subset_1_data = subset_list_1[[markers_subset_list_1[i]]]
+  subset_2_data = subset_list_2[[markers_subset_list_1[i]]]
+  
+  
+  sorted_count_vec_1 = get_sorted_count_vec(subset_1_data)
+  sorted_count_vec_2 = get_sorted_count_vec(subset_2_data)
+  
+  if((length(sorted_count_vec_1)!=1) || (length(sorted_count_vec_2)!=1)){
+     
+	#sorted_count_vec_1 = check_sorted_count_vec_for_minor(sorted_count_vec_1)
+	#sorted_count_vec_2 = check_sorted_count_vec_for_minor(sorted_count_vec_2)
+    if(length(sorted_count_vec_1)==1){
+      
+      #Need to add minor allele with count 0 if it exists in other subset
+      cp_sorted_count_vec_2 = sorted_count_vec_2
+      cp_sorted_count_vec_2[2] = sorted_count_vec_1[1]
+      cp_sorted_count_vec_2[1] = 0
+      sorted_count_vec_1 = cp_sorted_count_vec_2
+    }
+    
+    if(length(sorted_count_vec_2)==1){
+      
+      #Need to add minor allele with count 0 if it exists in other subset
+      cp_sorted_count_vec_1 = sorted_count_vec_1
+      cp_sorted_count_vec_1[2] = sorted_count_vec_2[1]
+      cp_sorted_count_vec_1[1] = 0
+      sorted_count_vec_2 = cp_sorted_count_vec_1
+    }
+	
+	##2x2 contingency table
+    input.matrix = matrix(nrow=2, ncol=2)
+    input.matrix[1,] = sorted_count_vec_1
+    input.matrix[2,] = sorted_count_vec_2
+    rownames(input.matrix) = c("subset_1", "subset_2")
+    colnames(input.matrix) = names(sorted_count_vec_1)
+    
+	#FE test
+    p.value = fisher.test(input.matrix)$p.value
+
+	#Reformating and storing results in a matrix
+	marker_split = unlist(strsplit(markers_subset_list_1[i], "_"))
+	results = rbind(results, c(marker_split[1], marker_split[2], p.value))
+
+#	if(p.value<0.15){
+#		print(markers_subset_list_1[i])
+#		print(input.matrix)
+#		print(p.value)
+#		print("//////////////")
+#		}
+	}
+}
+
+#Printing results to file ordered by p-value
+results_header = as.vector(results[1,])
+results = results[-1,]
+colnames(results) = results_header
+results = results[order(results[,"P-value"]), ]
+write.table(results, output_file, quote=F, sep="\t", row.names=F, col.names=T)
+
+### END          MAIN CODE         ####
+#######################################
+
--- a/tool-data/Rscripts/test.R
+++ b/tool-data/Rscripts/test.R
+#!/usr/bin/Rscript --vanilla
+
+# user input
+fileName     <- "mrna.tsv"
+
+# check command line parameters
+args <- strsplit(commandArgs(TRUE), split='=')
+keys <- vector ("character")
+if (length(args) > 0) {
+  for (i in 1:length(args)) {
+    key <- args[[i]][1]
+    value <- args[[i]][2]
+    keys <- c(keys, key)
+    if (exists(key)) {
+      # replace default value of key with input value
+        assign(key, value)
+    }else {
+      cat("\n")
+      stop(paste("Unrecognized option [",key,"].\n\n", sep=""))
+    }
+  }
+}
+
+a<-read.table(fileName,sep="\t")
+write.table(head(a),file="outputfile.txt")
--- a/tool_conf.xml
+++ b/tool_conf.xml
+<?xml version='1.0' encoding='utf-8'?>
+<toolbox>
+  <section id="getext" name="Get Data">
+    <tool file="data_source/upload.xml" />
+    <tool file="data_source/ucsc_tablebrowser.xml" />
+</section>
+   <!-- <tool file="data_source/ucsc_tablebrowser_test.xml" />
+    <tool file="data_source/ucsc_tablebrowser_archaea.xml" />
+    <tool file="data_source/ebi_sra.xml" />
+    <tool file="data_source/microbial_import.xml" />
+    <tool file="data_source/biomart.xml" />
+    <tool file="data_source/biomart_test.xml" />
+    <tool file="data_source/cbi_rice_mart.xml" />
+    <tool file="data_source/gramene_mart.xml" />
+    <tool file="data_source/fly_modencode.xml" />
+    <tool file="data_source/worm_modencode.xml" />
+    <tool file="data_source/wormbase.xml" />
+    <tool file="data_source/wormbase_test.xml" />
+    <tool file="data_source/eupathdb.xml" />
+    <tool file="data_source/hbvar.xml" />
+    <tool file="genomespace/genomespace_file_browser_prod.xml" />
+    <tool file="genomespace/genomespace_importer.xml" />
+    <tool file="validation/fix_errors.xml" /> -->
+<section id="rscripts" name="R scripts">
+    <tool file="Rscripts/test.xml" />
+    <tool file="Rscripts/tm_create_input.xml" />
+    <tool file="Rscripts/marker_selection.xml" />
+    <tool file="Rscripts/tm_VCF_input.xml" />
+    <tool file="Rscripts/VCF_FE_test.xml" />
+</section>
+
+<section id="imp-pipeline" name="IMP pipeline">
+	<tool file="imp/imp-annotation.xml" />
+	<tool file="imp/imp-assemble_idba-mt.xml" />
+	<tool file="imp/imp-assemble_idba_ud.xml" />
+	<tool file="imp/imp-cufflink.xml" />
+	<tool file="imp/imp-de_duplicate.xml" />
+	<tool file="imp/imp-concatenate.xml" />
+	<tool file="imp/imp-mapping.xml" />
+	<tool file="imp/imp-extract_mapped.xml" />
+	<tool file="imp/imp-fq2fa.xml" />
+	<tool file="imp/imp-extract_unmapped.xml" />
+	<tool file="imp/imp-merge.xml" />
+	<tool file="imp/imp-trim.xml" />
+	<tool file="imp/imp-variantcalling.xml" />
+</section>
+
+
+  <!-- <section id="send" name="Send Data">
+    <tool file="genomespace/genomespace_exporter.xml" />
+  </section>
+  <section id="liftOver" name="Lift-Over">
+    <tool file="extract/liftOver_wrapper.xml" />
+  </section> -->
+  <!-- <section id="textutil" name="Text Manipulation">
+    <tool file="filters/fixedValueColumn.xml" />
+    <tool file="stats/column_maker.xml" />
+    <tool file="filters/catWrapper.xml" />
+    <tool file="filters/cutWrapper.xml" />
+    <tool file="filters/mergeCols.xml" />
+    <tool file="filters/convert_characters.xml" />
+    <tool file="filters/CreateInterval.xml" />
+    <tool file="filters/cutWrapper.xml" />
+    <tool file="filters/changeCase.xml" />
+    <tool file="filters/pasteWrapper.xml" />
+    <tool file="filters/remove_beginning.xml" />
+    <tool file="filters/randomlines.xml" />
+    <tool file="filters/headWrapper.xml" />
+    <tool file="filters/tailWrapper.xml" />
+    <tool file="filters/trimmer.xml" />
+    <tool file="filters/wc_gnu.xml" />
+    <tool file="filters/secure_hash_message_digest.xml" />
+    <tool file="stats/dna_filtering.xml" />
+    <tool file="fastax_toolkit/seqid_uncollapser.xml" />
+  </section> -->
+  <!-- <section id="filter" name="Filter and Sort">
+    <tool file="stats/filtering.xml" />
+    <tool file="filters/sorter.xml" /> -->
+  <!--  <tool file="filters/grep.xml" />
+    
+    <label id="gff" text="GFF" />
+    <tool file="filters/gff/extract_GFF_Features.xml" />
+    <tool file="filters/gff/gff_filter_by_attribute.xml" />
+    <tool file="filters/gff/gff_filter_by_feature_count.xml" />
+    <tool file="filters/gff/gtf_filter_by_attribute_values_list.xml" /> -->
+ 
+  <!-- <section id="group" name="Join, Subtract and Group">
+    <tool file="filters/joiner.xml" />
+    <tool file="filters/compare.xml" />
+    <tool file="stats/grouping.xml" />
+  </section> -->
+  <!-- <section id="convert" name="Convert Formats">
+    <tool file="filters/axt_to_concat_fasta.xml" />
+    <tool file="filters/axt_to_fasta.xml" />
+    <tool file="filters/axt_to_lav.xml" />
+    <tool file="filters/bed2gff.xml" />
+    <tool file="fasta_tools/fasta_to_tabular.xml" />
+    <tool file="filters/gff2bed.xml" />
+    <tool file="filters/lav_to_bed.xml" />
+    <tool file="maf/maf_to_bed.xml" />
+    <tool file="maf/maf_to_interval.xml" />
+    <tool file="maf/maf_to_fasta.xml" />
+    <tool file="fasta_tools/tabular_to_fasta.xml" />
+    <tool file="filters/wiggle_to_simple.xml" />
+    <tool file="filters/sff_extractor.xml" />
+    <tool file="filters/gtf2bedgraph.xml" />
+    <tool file="filters/wig_to_bigwig.xml" />
+    <tool file="filters/bed_to_bigbed.xml" />
+    <tool file="fastq_tools/fq2fa.xml" />
+  </section> -->
+  <!-- <section id="features" name="Extract Features">
+    <tool file="filters/ucsc_gene_bed_to_exon_bed.xml" />
+  </section>
+  <section id="fetchSeq" name="Fetch Sequences">
+    <tool file="extract/extract_genomic_dna.xml" />
+  </section>
+  <section id="fetchAlign" name="Fetch Alignments">
+    <tool file="maf/interval2maf_pairwise.xml" />
+    <tool file="maf/interval2maf.xml" />
+    <tool file="maf/maf_split_by_species.xml" />
+    <tool file="maf/interval_maf_to_merged_fasta.xml" />
+    <tool file="maf/genebed_maf_to_fasta.xml" />
+    <tool file="maf/maf_stats.xml" />
+    <tool file="maf/maf_thread_for_species.xml" />
+    <tool file="maf/maf_limit_to_species.xml" />
+    <tool file="maf/maf_limit_size.xml" />
+    <tool file="maf/maf_by_block_number.xml" />
+    <tool file="maf/maf_reverse_complement.xml" />
+    <tool file="maf/maf_filter.xml" />
+  </section>
+  <section id="scores" name="Get Genomic Scores">
+    <tool file="filters/wiggle_to_simple.xml" />
+    <tool file="stats/aggregate_binned_scores_in_intervals.xml" />
+  </section>
+  <section id="bxops" name="Operate on Genomic Intervals">
+    <tool file="annotation_profiler/annotation_profiler.xml" />
+  </section> -->
+  <!-- <section id="stats" name="Statistics">
+    <tool file="stats/gsummary.xml" />
+    <tool file="filters/uniq.xml" />
+    <tool file="stats/cor.xml" />
+    <tool file="stats/generate_matrix_for_pca_lda.xml" />
+    <tool file="stats/lda_analy.xml" />
+    <tool file="stats/plot_from_lda.xml" />
+    <tool file="stats/MINE.xml" />
+    
+    <label id="gff" text="GFF" />
+    <tool file="stats/count_gff_features.xml" />
+  </section> -->
+  <!-- <section id="plots" name="Graph/Display Data">
+    <tool file="plotting/histogram2.xml" />
+    <tool file="plotting/scatterplot.xml" />
+    <tool file="plotting/bar_chart.xml" />
+    <tool file="plotting/boxplot.xml" />
+    <tool file="visualization/LAJ.xml" />
+    <tool file="visualization/build_ucsc_custom_track.xml" />
+    <tool file="maf/vcf_to_maf_customtrack.xml" />
+    <tool file="mutation/visualize.xml" />
+  </section> -->
+  <!-- <section id="multVar" name="Multivariate Analysis">
+    <tool file="multivariate_stats/pca.xml" />
+    <tool file="multivariate_stats/cca.xml" />
+    <tool file="multivariate_stats/kpca.xml" />
+    <tool file="multivariate_stats/kcca.xml" />
+  </section> -->
+  <!-- <section id="hyphy" name="Evolution">
+    <tool file="evolution/codingSnps.xml" />
+    <tool file="evolution/add_scores.xml" />
+  </section> -->
+  <!-- <section id="motifs" name="Motif Tools">
+    <tool file="meme/meme.xml" />
+    <tool file="meme/fimo.xml" />
+  </section> -->
+  <!-- <section id="clustal" name="Multiple Alignments">
+  </section>
+  <section id="fasta_manipulation" name="FASTA manipulation">
+    <tool file="fasta_tools/fasta_compute_length.xml" />
+    <tool file="fasta_tools/fasta_filter_by_length.xml" />
+    <tool file="fasta_tools/fasta_concatenate_by_species.xml" />
+    <tool file="fasta_tools/fasta_to_tabular.xml" />
+    <tool file="fasta_tools/tabular_to_fasta.xml" />
+</section> -->
+  <!-- <section id="NGS_QC" name="NGS: QC and manipulation">
+    
+    <label id="fastqcsambam" text="FastQC: fastq/sam/bam" />
+    
+    <label id="illumina" text="Illumina fastq" />
+    
+    <label id="454" text="Roche-454 data" />
+    <tool file="metag_tools/short_reads_figure_score.xml" />
+    <tool file="metag_tools/short_reads_trim_seq.xml" />
+    
+    <label id="solid" text="AB-SOLiD data" />
+    <tool file="next_gen_conversion/solid2fastq.xml" />
+    <tool file="solid_tools/solid_qual_stats.xml" />
+    <tool file="solid_tools/solid_qual_boxplot.xml" />
+  
+  </section> -->
+<!-- <section name="FASTA/Q Information" id="cshl_library_information">
+    <tool file="fastax_toolkit/fastx_quality_statistics.xml" />
+    <tool file="fastax_toolkit/fastq_quality_boxplot.xml" />
+    <tool file="fastax_toolkit/fastx_nucleotides_distribution.xml" />
+    <tool file="fastax_toolkit/fasta_clipping_histogram.xml" />
+  </section> -->
+
+<!-- <section name="FASTA/Q Preprocessing" id="cshl_fastx_manipulation">    
+    <label id="fastax_toolkit" text="Tools for FASTQ data" />
+	<tool file="fastax_toolkit/fastq_to_fasta.xml" />
+        <tool file="fastax_toolkit/fastq_quality_converter.xml" />
+        <tool file="fastax_toolkit/fastx_clipper.xml" />
+        <tool file="fastax_toolkit/fastx_trimmer.xml" />
+        <tool file="fastax_toolkit/fastx_trimmer_from_end.xml" />
+        <tool file="fastax_toolkit/fastq_quality_trimmer.xml" />
+        <tool file="fastax_toolkit/fastx_renamer.xml" />
+        <tool file="fastax_toolkit/fastx_reverse_complement.xml" />
+        <tool file="fastax_toolkit/fasta_formatter.xml" />
+        <tool file="fastax_toolkit/fasta_nucleotide_changer.xml" />
+        <tool file="fastax_toolkit/fastx_artifacts_filter.xml" />
+        <tool file="fastax_toolkit/fastq_quality_filter.xml" />
+        <tool file="fastax_toolkit/fastq_masker.xml" />
+        <tool file="fastax_toolkit/fastx_collapser.xml" />
+        <tool file="fastax_toolkit/fastx_uncollapser.xml" />
+        <tool file="fastax_toolkit/fastx_barcode_splitter.xml" />
+        <tool file="fastq_tools/fastuniq.xml" />
+        <tool file="fastq_tools/trim-fastq.xml" />  
+ </section>-->
+  <!--
+  Keep this section commented until it includes tools that
+  will be hosted on test/main.  The velvet wrappers have been
+  included in the distribution but will not be hosted on our
+  public servers for the current time.
+  <section name="NGS: Assembly" id="ngs_assembly">
+  <label text="Velvet" id="velvet"/>
+  <tool file="sr_assembly/velvetg.xml" />
+  <tool file="sr_assembly/velveth.xml" />
+  </section>
+  -->
+  <!-- <section id="solexa_tools" name="NGS: Mapping">
+    <tool file="sr_mapping/bfast_wrapper.xml" />
+    <tool file="metag_tools/megablast_wrapper.xml" />
+    <tool file="metag_tools/megablast_xml_parser.xml" />
+    <tool file="sr_mapping/PerM.xml" />
+    <tool file="sr_mapping/srma_wrapper.xml" />
+    <tool file="sr_mapping/mosaik.xml" />
+  </section> -->
+  <!-- <section id="ngs-rna-tools" name="NGS: RNA Analysis">
+    
+    <label id="rna_seq" text="RNA-seq" />
+    <label id="filtering" text="Filtering" />
+  </section> -->
+ <!--  <section id="samtools" name="NGS: SAM Tools">
+  </section> -->
+  <!-- <section id="ngs-simulation" name="NGS: Simulation">
+    <tool file="ngs_simulation/ngs_simulation.xml" />
+  </section> -->
+  <!-- <section id="hgv" name="Phenotype Association">
+    <tool file="evolution/codingSnps.xml" />
+    <tool file="evolution/add_scores.xml" />
+    <tool file="phenotype_association/sift.xml" />
+    <tool file="phenotype_association/linkToGProfile.xml" />
+    <tool file="phenotype_association/linkToDavid.xml" />
+    <tool file="phenotype_association/snpFreq.xml" />
+    <tool file="phenotype_association/ldtools.xml" />
+    <tool file="phenotype_association/pass.xml" />
+    <tool file="phenotype_association/gpass.xml" />
+    <tool file="phenotype_association/beam.xml" />
+    <tool file="phenotype_association/lps.xml" />
+    <tool file="phenotype_association/master2pg.xml" />
+    <tool file="phenotype_association/vcf2pgSnp.xml" />
+  </section> -->
+<!-- <section id="imppipeline-previous" name="IMP pipeline previous">
+	<label id="preprocessing" text="Preprocessing" />
+	<tool file="fastax_toolkit/fastx_trimmer.xml" />
+        <tool file="fastq_tools/trim-fastq.xml" />
+	
+	<label id="ass" text="Assembly" />
+	<tool file="fastq_tools/fq2fa.xml" />
+	<tool file="assembly/idba_tran.xml" />
+	<tool file="assembly/idba_ud.xml" />
+	
+	<label id="visualization" text="Visualization" />
+	<tool file="visualization/vizbin.xml" />
+
+	<label id="map" text="Mapping" />
+	<tool id="bwa_index" file="mapping/bwa_index.xml" />
+	<tool id="sam_filter" file="samtools/sam_filter.xml" />
+	<tool id="bam_merge" file="samtools/bam_merge.xml" />
+</section> -->
+<!-- <section id="utilities" name="Utilities">
+	<tool id="checksum" file="utilities/checksum.xml" />
+</section> -->
+</toolbox>
--- a/tools/Rscripts/VCF_FE_test.xml
+++ b/tools/Rscripts/VCF_FE_test.xml
+<tool id="tm_fe_vcf_test" name="Do Fisher test on VCF files" version="0.1">
+    <command>
+        ${GALAXY_DATA_INDEX_DIR}/Rscripts/VCF_FE_test.R $input_s1 $input_s2 $output 
+    </command>
+
+    <inputs>
+        <param format="tabular" name="input_s1" type="data" label="vcf_subset_1"/>
+        <param format="tabular" name="input_s2" type="data" label="vcf_subset_2"/>
+    </inputs>
+
+    <outputs>
+        <!-- option q selected -->
+        <data format="tabular" name="output" ></data>
+    </outputs>
+
+    <help>
+        Do Fisher's test on VCF data
+    </help>
+
+</tool>
--- a/tools/Rscripts/marker_selection.xml
+++ b/tools/Rscripts/marker_selection.xml
+<tool id="tm_marker_selection_mrna" name="Do Marker Selection on TM Export mRNA files" version="0.1">
+    <command>
+        ${GALAXY_DATA_INDEX_DIR}/Rscripts/MarkerSelection.R $input $output $nmarker $heatmap
+    </command>
+
+    <inputs>
+        <param format="tabular" name="input" type="data" label="subset_1_and_2"/>
+        <param name="nmarker" type="integer" value="50" label="Number of Markers"/>
+    </inputs>
+
+    <outputs>
+        <!-- option q selected -->
+        <data format="tabular" name="output" ></data>
+        <data format="tabular" name="heatmap" ></data>
+    </outputs>
+
+    <help>
+        Do Marker Selection using Limma
+    </help>
+
+</tool>
--- a/tools/Rscripts/test.xml
+++ b/tools/Rscripts/test.xml
+<tool id="testrscript" name="Test R script" version="0.1">
+    <command>
+        ${GALAXY_DATA_INDEX_DIR}/Rscripts/test.R fileName=$input_1
+    </command>
+
+    <inputs>
+        <param format="tabular" name="input_1" type="data" label="Subset 1"/>
+    </inputs>
+
+    <outputs>
+        <!-- option q selected -->
+        <data format="tabular" name="output" label="label_written_to_the_user" from_work_dir="outputfile.txt"></data>
+
+    </outputs>
+
+    <help>
+        Some tool description
+    </help>
+
+</tool>
--- a/tools/Rscripts/tm_VCF_input.xml
+++ b/tools/Rscripts/tm_VCF_input.xml
+<tool id="tm_create_input_VCF" name="Create VCF input from TM Export files" version="0.1">
+    <command>
+        ${GALAXY_DATA_INDEX_DIR}/Rscripts/Create_VCF_Allele_matrix.pl $input $output
+    </command>
+
+    <inputs>
+        <param format="tabular" name="input" type="data" label="vcf_input"/>
+    </inputs>
+
+    <outputs>
+        <!-- option q selected -->
+<!--        <data format="tabular" name="output" label="label_written_to_the_user" from_work_dir="outputfile.txt"></data> -->
+        <data format="tabular" name="output" ></data>
+
+    </outputs>
+
+    <help>
+        Some tool description
+    </help>
+
+</tool>
--- a/tools/Rscripts/tm_create_input.xml
+++ b/tools/Rscripts/tm_create_input.xml
+<tool id="tm_create_input" name="Create input from TM Export files" version="0.1">
+    <command>
+        ${GALAXY_DATA_INDEX_DIR}/Rscripts/Create_MarkerSelection_inputfile.pl $input_s1 $input_s2 $output
+    </command>
+
+    <inputs>
+        <param format="tabular" name="input_s1" type="data" label="Subset 1"/>
+        <param format="tabular" name="input_s2" type="data" label="Subset 2" />
+    </inputs>
+
+    <outputs>
+        <!-- option q selected -->
+<!--        <data format="tabular" name="output" label="label_written_to_the_user" from_work_dir="outputfile.txt"></data> -->
+        <data format="tabular" name="output" ></data>
+
+    </outputs>
+
+    <help>
+        Some tool description
+    </help>
+
+</tool>