first commit

README 0 → 100644
#!/usr/bin/env perl
## Name: ##
## Description: Merges the two subset mrna.tsv subset files ##
## and puts the data into the correct format to be used by ##
## MarkerSelection.R ##
## Usage: -s1 <subset1_input_file> ##
## -s2 <subset2_input_file> -dir <in/output dir> ##
## Author: ##
###Modules loaded
use strict;
use warnings;
#Argument variables:
my $file_1=$ARGV[0];
my $file_2=$ARGV[1];
my $O_file=$ARGV[2];
my @A_Subset_files;
push(@A_Subset_files, $file_1);
push(@A_Subset_files, $file_2);
my $counter_subset=1;
open(WH, ">$O_file") or die "Cannot open $O_file: $!";
foreach my $file (@A_Subset_files){
open(FH, "$file") or die "Cannot open $file: $!";
my $Line = $_;
next if($Line =~ /^PATIENT/);
my (@A_Line) = split /\t/, $Line;
my $Patient_ID = $A_Line[0];
my $Value = $A_Line[9];
my $Probe_ID = $A_Line[11];
my $Gene_Symbol = $A_Line[13];
$Gene_Symbol="NA" if($Gene_Symbol eq "null");
my $Subset;
$Subset = "S1" if($counter_subset==1);
$Subset = "S2" if($counter_subset==2);
print WH $Subset . "_" . $Patient_ID . "\t" . $Value
. "\t" . $Probe_ID . "\t" . $Gene_Symbol
. "\t" . $Subset . "\n";
close FH;
close WH;
#!/usr/bin/env perl
## Name: ##
## Description: Creates input file for subset to ##
## perform the Fisher exact test to detect significant ##
## deviations in the proportions for minor and major ##
## alleles between two subsets. To perform the analysis, ##
##two subset files need to be generated by this script ##
## Usage: <input_file> ##
## <output_file> ##
## Author: ##
###Modules loaded
use strict;
use warnings;
#Argument variables:
my $file = $ARGV[0];
my $O_file = $ARGV[1];
#To store the corresponding column num for given col
my %H_col2colNum;
#Hash to store data extracted from input file
my %HoH_MarkerAndPatient2Variant;
#Names of the columsn we need:
## START Input file parser ##
open( FH, "$file" ) or die "Cannot open $file: $!";
while (<FH>) {
my $Line = $_;
my (@A_Line) = split /\t/, $Line;
if(($Line=~/PATIENT ID/) && ($Line=~/VARIANT/) && ($Line=~/CHROMOSOME/) && ($Line=~/POSITION/)){
foreach my $colname (@A_colnames){
$H_col2colNum{$colname} = get_col_num($colname, \@A_Line);
my $Marker = $A_Line[$H_col2colNum{"CHROMOSOME"}] . "_" . $A_Line[$H_col2colNum{"POSITION"}];
my $Patient = $A_Line[$H_col2colNum{"PATIENT ID"}];
my $Variant = $A_Line[$H_col2colNum{"VARIANT"}];
close FH;
## END Input file parser ##
## START Storing all Markers and patients in array ##
my @A_markers;
my @A_patients;
for my $Marker (sort keys %HoH_MarkerAndPatient2Variant ) {
for my $Patient (sort keys %{$HoH_MarkerAndPatient2Variant{$Marker}}){
push(@A_markers, $Marker);
push(@A_patients, $Patient);
# print $Patient . "\n";
my @A_uniq_markers = uniq(@A_markers);
my @A_uniq_patients = uniq(@A_patients);
#print $#A_patients . "\n";
#print $#A_uniq_patients . "\n";
#print $#A_markers . "\n";
#print $#A_uniq_markers . "\n";
my @AoA;
###Storing data in two dim array:
my $i=1;
foreach my $Marker (@A_uniq_markers){
my $j=1;
foreach my $Patient (@A_uniq_patients){
##Filling the "matrix" with the variant data
my $col = 1;
foreach my $Patient (@A_uniq_patients){
my $row= 1;
foreach my $Marker (@A_uniq_markers){
$AoA[$row][$col] = $HoH_MarkerAndPatient2Variant{$Marker}{$Patient};
# print $row . "--" . $col . "::" . $HoH_MarkerAndPatient2Variant{$Marker}{$Patient} . "\n";
## END Storing all Markers and patients in array ##
## START Printing out the results/matrix
open(WH, ">$O_file") or die "Cannot create $O_file: $!";
for $i ( 0 .. $#AoA ) {
print WH join("\t", @{$AoA[$i]}) . "\n";
close WH;
## END Printing out the results/matrix
sub get_col_num{
my $colname2match = $_[0];
my @A_header_line = @{$_[1]};
my $col_num=0;
foreach my $colname (@A_header_line){
if($colname2match eq $colname){
} else{
die "Column $colname2match not found. Please check the input file header!!!";
##Eliminate duplicate items in array:
sub uniq {
my %seen;
return grep { !$seen{$_}++ } @_;
\ No newline at end of file
#!/usr/bin/env Rscript
# Copyright 2008-2012 Janssen Research & Development, LLC.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
#### Code rewritten by Wei and Serge
#Comparative Marker Selection
#Function to obtain the aligned positions all items in the vectorToAlign
#in the vectorRef
get.reordered_index = function(vectorToAlign, vectorRef){
vectorToAlign = as.vector(vectorToAlign)
vectorRef = as.vector(vectorRef)
res = vector(mode="integer", length=length(vectorToAlign))
for(i in 1:length(res)){
idx = which(vectorToAlign == vectorRef[i])
## Main function to perform DEG analysis:
MS.loader <- function(
output.file ="CMS.TXT",
numberOfMarkers = 100,
out.heatmap ="heatmapdata",
mhcCorrection = 2
#We need this to do the ddply below.
#Pull the GEX data from the file.
mRNAData <- data.frame(read.delim(input.filename))
#Trim the field.
mRNAData$PROBE.ID <- gsub("^\\s+|\\s+$", "",mRNAData$PROBE.ID)
mRNAData$GENE_SYMBOL <- gsub("^\\s+|\\s+$", "",mRNAData$GENE_SYMBOL)
mRNAData$PATIENT.ID <- gsub("^\\s+|\\s+$", "",mRNAData$PATIENT.ID)
#Getting rid of the probesets w/o associated gene symbol:
idx_wo_symbol = which($GENE_SYMBOL))
#Create a data.frame with unique probe/gene ids.
geneStatsData <- data.frame(mRNAData$PROBE.ID,mRNAData$GENE_SYMBOL);
#Add a column name to our data.frame.
colnames(geneStatsData) <- c('PROBE.ID','GENE_SYMBOL')
geneStatsData <- unique(geneStatsData[,c("PROBE.ID","GENE_SYMBOL")]);
#Prepare the casted raw data.
#Get a copy of the raw data.
coercedData <- mRNAData
#Grab only the columns we need for doing the melt/cast.
coercedData <- coercedData[c('PATIENT.ID','VALUE','PROBE.ID','GENE_SYMBOL')]
#Melt the data, leaving 2 columns as the grouping fields.
meltedData <- melt(coercedData, id=c("PROBE.ID","GENE_SYMBOL","PATIENT.ID"))
#Cast the data into a format that puts the PATIENT.ID in a column.
coercedData <- data.frame(dcast(meltedData, PROBE.ID + GENE_SYMBOL ~ PATIENT.ID))
#The PATIENT.ID column needs to be removed if exists in the matrix!!!!!!!
idx.patient_id = which(colnames(coercedData)=="PATIENT.ID")
coercedData = coercedData[,-idx.patient_id]
#When we convert to a data frame the numeric columns get an x in front of them. Remove them here.
colnames(coercedData) <- sub("^X","",colnames(coercedData))
#Get a gene list that we can use later to preserve the list of the genes.
geneList <- as.vector(coercedData$GENE_SYMBOL)
probeList <- as.vector(coercedData$PROBE.ID)
#Fitting linear model with limma
#Remove the gene_symbol and columns.
coercedDataWithoutGroup <- data.matrix(subset(coercedData, select=-c(GENE_SYMBOL,PROBE.ID)))
rownames(coercedDataWithoutGroup)=coercedData$PROBE.ID # Rownames have to be added
#Creating a named vector for mapping PROBE.ID to GENE.SYMBOL
#Get a vector representing our subsets.
classVector <- colnames(coercedDataWithoutGroup)
classVector <- gsub("^S1.*","0",classVector)
classVector <- gsub("^S2.*","1",classVector)
classVector <- as.numeric(classVector)
#Check the class vector to verify we have two subsets.
if(length(unique(classVector)) < 2) stop("||FRIENDLY||There is only one subset selected, please select two in order to run the comparative analysis.")
}else if(mhcCorrection==1){
}else if(mhcCorrection==2){
}else if(mhcCorrection==3){
#Creating Design matrix
S1 = integer(length(classVector))
S2 = classVector
design <- cbind(S1=S1,S2=S2)
#... and contrast matrix
contrast.matrix = makeContrasts(S1-S2, levels=design)
# Linear model fitting
fit <- lmFit(coercedDataWithoutGroup, design)
fit <-, contrast.matrix)
fit <- eBayes(fit)
#The data structure of topTable for newer versions of limma has been modified.
#This causes errors during execution of the script. A workarround by creating a data structure identical to topTable compatible with tranSMART is created
contr=1 #The contrast in the fitting to be used for the results table
#Results data frame which provides all required statistics for output data = data.frame(
ID=rownames(fit$coefficients), #Depending on limma version fit$genes doesn't exist anymore
adj.P.val=p.adjust(p=fit$p.value[,contr], method=mhcMethod),
) =[ order($B, decreasing=T), ] #Ordering the data according to limma B statistic
rownames( = NULL =[1:numberOfMarkers,] #Filtering down the results table to the number of genes selected by user
topgenes = cbind(gene.symbols[$ID],
colnames(topgenes) = c("GENE_SYMBOL", "PROBE.ID", "logFC", "t", "P.value", "adj.P.val", "B")
rownames(topgenes) = NULL #This is the final results data object for Marker Selection workflow containing all data displayed in output table
# End Linear model fitting
## Generating heatmap output data:
heatmapData = coercedData[which(coercedData[,"PROBE.ID"] %in% topgenes[,"PROBE.ID"]), ]
finalHeatmapData = heatmapData[, -c(1,2)]
GROUP = paste(as.vector(heatmapData[, "GENE_SYMBOL"]), as.vector(heatmapData[, "PROBE.ID"]), sep=" ")
finalHeatmapData = cbind(GROUP, finalHeatmapData)
#Here we align the lines in finalHeatmapData to topgenes according to
#the gene symbols in both matrices
s = strsplit(as.character(finalHeatmapData$GROUP), " ")
d = vector(mode="character", length=length(s))
for(i in 1:length(s)){
d[i] = s[[i]][1]
idx = get.reordered_index(d, topgenes$GENE_SYMBOL)
finalHeatmapData = finalHeatmapData[ idx, ]
#Write the file with the stats by gene. This will get read into the UI.
write.table(topgenes,output.file,sep = "\t",quote=F,row.names=F)
#Write the data file we will use for the heatmap.
write.table(finalHeatmapData,out.heatmap,sep = "\t",quote=F,row.names=F)
#### Executing the main job to obtain the DEG and corresponding heatmap file: ####
#Getting command line arguments
args = commandArgs(trailingOnly = TRUE)
MS.loader(input_file, output_file,numberOfMarkers, out.heatmap)
#!/usr/bin/env Rscript
## Name: VCF_FE_test.R ##
## Description: Performs the Fisher exact test to detect ##
## significant deviations in the proportions for minor ##
## and major alleles between the two subsets provided as ##
## input files.
## Usage: VCF_FE_test.R <input_file_1> <input_file_2> ##
## <output_file> ##
## Author: ##
get_input_data = function(file){
data = as.matrix(read.delim(file, header=F))
#Adding column and row names
data_w_names = data[-1,-1]
rownames = data[-1,1]
colnames = t(data[1,-1])
rownames(data_w_names) = rownames
colnames(data_w_names) = colnames
split_alleles = function(row){
row = as.vector(row)
num_patients = length(row)
row.split.list = strsplit(row, "\\/", perl=T)
all_alleles_per_marker = vector(mode="character", length=num_patients*2)
for(i in 1:num_patients){
patient_alleles = row.split.list[[i]]
for(j in 1:2){
allele = patient_alleles[j]
all_alleles_per_marker[counter] = allele
create_subset_count_list = function(data){
subset = list()
markers = rownames(data)
for(i in 1:dim(data)[1]){
datarow = data[i,]
marker = markers[i]
nt_counts = split_alleles(datarow)
nt_count_matrix = as.matrix(nt_counts)
subset[[marker]] = nt_count_matrix
get_sorted_count_vec = function(subset_data){
alleles_subset = rownames(subset_data)
subset_vec = as.vector(subset_data[,1])
names(subset_vec) = alleles_subset
subset_vec_sorted = sort(subset_vec, method="quick")
#Checks sorted_count_vec if minor allele (with count 0) needs to be added
check_sorted_count_vec_for_minor = function(sorted_count_vec_a){
#Need to add minor allele with count 0 if it exists in other subset
sorted_count_vec = vector(mode="integer", length=2)
sorted_count_vec[2] = sorted_count_vec_a[1]
sorted_count_vec[1] = 0
sorted_count_vec_a = sorted_count_vec
### END FUNCTIONS ######
#Getting command line arguments
args = commandArgs(trailingOnly = TRUE)
data_1 = get_input_data(input_file_1)
data_2 = get_input_data(input_file_2)
#Test case for debugging
# print(data_1["1_3396099",])
# print("___")
# print(data_2["1_3396099",])
subset_list_1 = create_subset_count_list(data_1)
subset_list_2 = create_subset_count_list(data_2)
markers_subset_list_1 = names(subset_list_1)
markers_subset_list_2 = names(subset_list_2)
# In this matrix the results of the FE test for each marker
#will be stored
results = matrix(c("Chromosome", "Position", "P-value"), ncol=3, nrow=1)
#Iterating over each marker and performing FE test to compare
#minor and major allele frequencies in both subsets
for(i in 1:length(markers_subset_list_1)){
subset_1_data = subset_list_1[[markers_subset_list_1[i]]]
subset_2_data = subset_list_2[[markers_subset_list_1[i]]]
sorted_count_vec_1 = get_sorted_count_vec(subset_1_data)
sorted_count_vec_2 = get_sorted_count_vec(subset_2_data)
if((length(sorted_count_vec_1)!=1) || (length(sorted_count_vec_2)!=1)){
#sorted_count_vec_1 = check_sorted_count_vec_for_minor(sorted_count_vec_1)
#sorted_count_vec_2 = check_sorted_count_vec_for_minor(sorted_count_vec_2)
#Need to add minor allele with count 0 if it exists in other subset
cp_sorted_count_vec_2 = sorted_count_vec_2
cp_sorted_count_vec_2[2] = sorted_count_vec_1[1]
cp_sorted_count_vec_2[1] = 0
sorted_count_vec_1 = cp_sorted_count_vec_2
#Need to add minor allele with count 0 if it exists in other subset
cp_sorted_count_vec_1 = sorted_count_vec_1
cp_sorted_count_vec_1[2] = sorted_count_vec_2[1]
cp_sorted_count_vec_1[1] = 0
sorted_count_vec_2 = cp_sorted_count_vec_1
##2x2 contingency table
input.matrix = matrix(nrow=2, ncol=2)
input.matrix[1,] = sorted_count_vec_1
input.matrix[2,] = sorted_count_vec_2
rownames(input.matrix) = c("subset_1", "subset_2")
colnames(input.matrix) = names(sorted_count_vec_1)
#FE test
p.value = fisher.test(input.matrix)$p.value
#Reformating and storing results in a matrix
marker_split = unlist(strsplit(markers_subset_list_1[i], "_"))
results = rbind(results, c(marker_split[1], marker_split[2], p.value))
# if(p.value<0.15){
# print(markers_subset_list_1[i])
# print(input.matrix)
# print(p.value)
# print("//////////////")
# }
#Printing results to file ordered by p-value
results_header = as.vector(results[1,])
results = results[-1,]
colnames(results) = results_header
results = results[order(results[,"P-value"]), ]
write.table(results, output_file, quote=F, sep="\t", row.names=F, col.names=T)
### END MAIN CODE ####
#!/usr/bin/Rscript --vanilla
# user input
fileName <- "mrna.tsv"
# check command line parameters
args <- strsplit(commandArgs(TRUE), split='=')
keys <- vector ("character")
if (length(args) > 0) {
for (i in 1:length(args)) {
key <- args[[i]][1]
value <- args[[i]][2]
keys <- c(keys, key)
if (exists(key)) {
# replace default value of key with input value
assign(key, value)
}else {
stop(paste("Unrecognized option [",key,"].\n\n", sep=""))
<?xml version='1.0' encoding='utf-8'?>
<section id="getext" name="Get Data">
<tool file="data_source/upload.xml" />
<tool file="data_source/ucsc_tablebrowser.xml" />
<!-- <tool file="data_source/ucsc_tablebrowser_test.xml" />
<tool file="data_source/ucsc_tablebrowser_archaea.xml" />
<tool file="data_source/ebi_sra.xml" />
<tool file="data_source/microbial_import.xml" />
<tool file="data_source/biomart.xml" />
<tool file="data_source/biomart_test.xml" />
<tool file="data_source/cbi_rice_mart.xml" />
<tool file="data_source/gramene_mart.xml" />
<tool file="data_source/fly_modencode.xml" />
<tool file="data_source/worm_modencode.xml" />
<tool file="data_source/wormbase.xml" />
<tool file="data_source/wormbase_test.xml" />
<tool file="data_source/eupathdb.xml" />
<tool file="data_source/hbvar.xml" />
<tool file="genomespace/genomespace_file_browser_prod.xml" />
<tool file="genomespace/genomespace_importer.xml" />
<tool file="validation/fix_errors.xml" /> -->
<section id="rscripts" name="R scripts">
<tool file="Rscripts/test.xml" />
<tool file="Rscripts/tm_create_input.xml" />
<tool file="Rscripts/marker_selection.xml" />
<tool file="Rscripts/tm_VCF_input.xml" />
<tool file="Rscripts/VCF_FE_test.xml" />
<section id="imp-pipeline" name="IMP pipeline">
<tool file="imp/imp-annotation.xml" />
<tool file="imp/imp-assemble_idba-mt.xml" />
<tool file="imp/imp-assemble_idba_ud.xml" />
<tool file="imp/imp-cufflink.xml" />
<tool file="imp/imp-de_duplicate.xml" />
<tool file="imp/imp-concatenate.xml" />
<tool file="imp/imp-mapping.xml" />
<tool file="imp/imp-extract_mapped.xml" />
<tool file="imp/imp-fq2fa.xml" />
<tool file="imp/imp-extract_unmapped.xml" />
<tool file="imp/imp-merge.xml" />
<tool file="imp/imp-trim.xml" />
<tool file="imp/imp-variantcalling.xml" />
<!-- <section id="send" name="Send Data">
<tool file="genomespace/genomespace_exporter.xml" />
<section id="liftOver" name="Lift-Over">
<tool file="extract/liftOver_wrapper.xml" />
</section> -->
<!-- <section id="textutil" name="Text Manipulation">
<tool file="filters/fixedValueColumn.xml" />
<tool file="stats/column_maker.xml" />
<tool file="filters/catWrapper.xml" />
<tool file="filters/cutWrapper.xml" />
<tool file="filters/mergeCols.xml" />
<tool file="filters/convert_characters.xml" />
<tool file="filters/CreateInterval.xml" />
<tool file="filters/cutWrapper.xml" />
<tool file="filters/changeCase.xml" />
<tool file="filters/pasteWrapper.xml" />
<tool file="filters/remove_beginning.xml" />
<tool file="filters/randomlines.xml" />
<tool file="filters/headWrapper.xml" />
<tool file="filters/tailWrapper.xml" />
<tool file="filters/trimmer.xml" />
<tool file="filters/wc_gnu.xml" />
<tool file="filters/secure_hash_message_digest.xml" />
<tool file="stats/dna_filtering.xml" />
<tool file="fastax_toolkit/seqid_uncollapser.xml" />
</section> -->
<!-- <section id="filter" name="Filter and Sort">
<tool file="stats/filtering.xml" />
<tool file="filters/sorter.xml" /> -->
<!-- <tool file="filters/grep.xml" />
<label id="gff" text="GFF" />
<tool file="filters/gff/extract_GFF_Features.xml" />
<tool file="filters/gff/gff_filter_by_attribute.xml" />
<tool file="filters/gff/gff_filter_by_feature_count.xml" />
<tool file="filters/gff/gtf_filter_by_attribute_values_list.xml" /> -->
<!-- <section id="group" name="Join, Subtract and Group">
<tool file="filters/joiner.xml" />
<tool file="filters/compare.xml" />
<tool file="stats/grouping.xml" />
</section> -->
<!-- <section id="convert" name="Convert Formats">
<tool file="filters/axt_to_concat_fasta.xml" />
<tool file="filters/axt_to_fasta.xml" />
<tool file="filters/axt_to_lav.xml" />
<tool file="filters/bed2gff.xml" />
<tool file="fasta_tools/fasta_to_tabular.xml" />
<tool file="filters/gff2bed.xml" />
<tool file="filters/lav_to_bed.xml" />
<tool file="maf/maf_to_bed.xml" />
<tool file="maf/maf_to_interval.xml" />
<tool file="maf/maf_to_fasta.xml" />
<tool file="fasta_tools/tabular_to_fasta.xml" />
<tool file="filters/wiggle_to_simple.xml" />
<tool file="filters/sff_extractor.xml" />
<tool file="filters/gtf2bedgraph.xml" />
<tool file="filters/wig_to_bigwig.xml" />
<tool file="filters/bed_to_bigbed.xml" />
<tool file="fastq_tools/fq2fa.xml" />
</section> -->
<!-- <section id="features" name="Extract Features">
<tool file="filters/ucsc_gene_bed_to_exon_bed.xml" />
<section id="fetchSeq" name="Fetch Sequences">
<tool file="extract/extract_genomic_dna.xml" />
<section id="fetchAlign" name="Fetch Alignments">
<tool file="maf/interval2maf_pairwise.xml" />
<tool file="maf/interval2maf.xml" />
<tool file="maf/maf_split_by_species.xml" />
<tool file="maf/interval_maf_to_merged_fasta.xml" />
<tool file="maf/genebed_maf_to_fasta.xml" />
<tool file="maf/maf_stats.xml" />
<tool file="maf/maf_thread_for_species.xml" />
<tool file="maf/maf_limit_to_species.xml" />
<tool file="maf/maf_limit_size.xml" />
<tool file="maf/maf_by_block_number.xml" />
<tool file="maf/maf_reverse_complement.xml" />
<tool file="maf/maf_filter.xml" />
<section id="scores" name="Get Genomic Scores">
<tool file="filters/wiggle_to_simple.xml" />
<tool file="stats/aggregate_binned_scores_in_intervals.xml" />
<section id="bxops" name="Operate on Genomic Intervals">
<tool file="annotation_profiler/annotation_profiler.xml" />
</section> -->
<!-- <section id="stats" name="Statistics">
<tool file="stats/gsummary.xml" />
<tool file="filters/uniq.xml" />
<tool file="stats/cor.xml" />
<tool file="stats/generate_matrix_for_pca_lda.xml" />
<tool file="stats/lda_analy.xml" />
<tool file="stats/plot_from_lda.xml" />
<tool file="stats/MINE.xml" />
<label id="gff" text="GFF" />
<tool file="stats/count_gff_features.xml" />
</section> -->
<!-- <section id="plots" name="Graph/Display Data">
<tool file="plotting/histogram2.xml" />
<tool file="plotting/scatterplot.xml" />
<tool file="plotting/bar_chart.xml" />
<tool file="plotting/boxplot.xml" />
<tool file="visualization/LAJ.xml" />
<tool file="visualization/build_ucsc_custom_track.xml" />
<tool file="maf/vcf_to_maf_customtrack.xml" />
<tool file="mutation/visualize.xml" />
</section> -->
<!-- <section id="multVar" name="Multivariate Analysis">
<tool file="multivariate_stats/pca.xml" />
<tool file="multivariate_stats/cca.xml" />
<tool file="multivariate_stats/kpca.xml" />
<tool file="multivariate_stats/kcca.xml" />
</section> -->
<!-- <section id="hyphy" name="Evolution">
<tool file="evolution/codingSnps.xml" />
<tool file="evolution/add_scores.xml" />
</section> -->
<!-- <section id="motifs" name="Motif Tools">
<tool file="meme/meme.xml" />
<tool file="meme/fimo.xml" />
</section> -->
<!-- <section id="clustal" name="Multiple Alignments">
<section id="fasta_manipulation" name="FASTA manipulation">
<tool file="fasta_tools/fasta_compute_length.xml" />
<tool file="fasta_tools/fasta_filter_by_length.xml" />
<tool file="fasta_tools/fasta_concatenate_by_species.xml" />
<tool file="fasta_tools/fasta_to_tabular.xml" />
<tool file="fasta_tools/tabular_to_fasta.xml" />
</section> -->
<!-- <section id="NGS_QC" name="NGS: QC and manipulation">
<label id="fastqcsambam" text="FastQC: fastq/sam/bam" />
<label id="illumina" text="Illumina fastq" />
<label id="454" text="Roche-454 data" />
<tool file="metag_tools/short_reads_figure_score.xml" />
<tool file="metag_tools/short_reads_trim_seq.xml" />
<label id="solid" text="AB-SOLiD data" />
<tool file="next_gen_conversion/solid2fastq.xml" />
<tool file="solid_tools/solid_qual_stats.xml" />
<tool file="solid_tools/solid_qual_boxplot.xml" />
</section> -->
<!-- <section name="FASTA/Q Information" id="cshl_library_information">
<tool file="fastax_toolkit/fastx_quality_statistics.xml" />
<tool file="fastax_toolkit/fastq_quality_boxplot.xml" />
<tool file="fastax_toolkit/fastx_nucleotides_distribution.xml" />
<tool file="fastax_toolkit/fasta_clipping_histogram.xml" />
</section> -->
<!-- <section name="FASTA/Q Preprocessing" id="cshl_fastx_manipulation">
<label id="fastax_toolkit" text="Tools for FASTQ data" />
<tool file="fastax_toolkit/fastq_to_fasta.xml" />
<tool file="fastax_toolkit/fastq_quality_converter.xml" />
<tool file="fastax_toolkit/fastx_clipper.xml" />
<tool file="fastax_toolkit/fastx_trimmer.xml" />
<tool file="fastax_toolkit/fastx_trimmer_from_end.xml" />
<tool file="fastax_toolkit/fastq_quality_trimmer.xml" />
<tool file="fastax_toolkit/fastx_renamer.xml" />
<tool file="fastax_toolkit/fastx_reverse_complement.xml" />
<tool file="fastax_toolkit/fasta_formatter.xml" />
<tool file="fastax_toolkit/fasta_nucleotide_changer.xml" />
<tool file="fastax_toolkit/fastx_artifacts_filter.xml" />
<tool file="fastax_toolkit/fastq_quality_filter.xml" />
<tool file="fastax_toolkit/fastq_masker.xml" />
<tool file="fastax_toolkit/fastx_collapser.xml" />
<tool file="fastax_toolkit/fastx_uncollapser.xml" />
<tool file="fastax_toolkit/fastx_barcode_splitter.xml" />
<tool file="fastq_tools/fastuniq.xml" />
<tool file="fastq_tools/trim-fastq.xml" />
Keep this section commented until it includes tools that
will be hosted on test/main. The velvet wrappers have been
included in the distribution but will not be hosted on our
public servers for the current time.
<section name="NGS: Assembly" id="ngs_assembly">
<label text="Velvet" id="velvet"/>
<tool file="sr_assembly/velvetg.xml" />
<tool file="sr_assembly/velveth.xml" />
<!-- <section id="solexa_tools" name="NGS: Mapping">
<tool file="sr_mapping/bfast_wrapper.xml" />
<tool file="metag_tools/megablast_wrapper.xml" />
<tool file="metag_tools/megablast_xml_parser.xml" />
<tool file="sr_mapping/PerM.xml" />
<tool file="sr_mapping/srma_wrapper.xml" />
<tool file="sr_mapping/mosaik.xml" />
</section> -->
<!-- <section id="ngs-rna-tools" name="NGS: RNA Analysis">
<label id="rna_seq" text="RNA-seq" />
<label id="filtering" text="Filtering" />
</section> -->
<!-- <section id="samtools" name="NGS: SAM Tools">
</section> -->
<!-- <section id="ngs-simulation" name="NGS: Simulation">
<tool file="ngs_simulation/ngs_simulation.xml" />
</section> -->
<!-- <section id="hgv" name="Phenotype Association">
<tool file="evolution/codingSnps.xml" />
<tool file="evolution/add_scores.xml" />
<tool file="phenotype_association/sift.xml" />
<tool file="phenotype_association/linkToGProfile.xml" />
<tool file="phenotype_association/linkToDavid.xml" />
<tool file="phenotype_association/snpFreq.xml" />
<tool file="phenotype_association/ldtools.xml" />
<tool file="phenotype_association/pass.xml" />
<tool file="phenotype_association/gpass.xml" />
<tool file="phenotype_association/beam.xml" />
<tool file="phenotype_association/lps.xml" />
<tool file="phenotype_association/master2pg.xml" />
<tool file="phenotype_association/vcf2pgSnp.xml" />
</section> -->
<!-- <section id="imppipeline-previous" name="IMP pipeline previous">
<label id="preprocessing" text="Preprocessing" />
<tool file="fastax_toolkit/fastx_trimmer.xml" />
<tool file="fastq_tools/trim-fastq.xml" />
<label id="ass" text="Assembly" />
<tool file="fastq_tools/fq2fa.xml" />
<tool file="assembly/idba_tran.xml" />
<tool file="assembly/idba_ud.xml" />
<label id="visualization" text="Visualization" />
<tool file="visualization/vizbin.xml" />
<label id="map" text="Mapping" />
<tool id="bwa_index" file="mapping/bwa_index.xml" />
<tool id="sam_filter" file="samtools/sam_filter.xml" />
<tool id="bam_merge" file="samtools/bam_merge.xml" />
</section> -->
<!-- <section id="utilities" name="Utilities">
<tool id="checksum" file="utilities/checksum.xml" />
</section> -->
<tool id="tm_fe_vcf_test" name="Do Fisher test on VCF files" version="0.1">
${GALAXY_DATA_INDEX_DIR}/Rscripts/VCF_FE_test.R $input_s1 $input_s2 $output
<param format="tabular" name="input_s1" type="data" label="vcf_subset_1"/>
<param format="tabular" name="input_s2" type="data" label="vcf_subset_2"/>
<!-- option q selected -->
<data format="tabular" name="output" ></data>
Do Fisher's test on VCF data
<tool id="tm_marker_selection_mrna" name="Do Marker Selection on TM Export mRNA files" version="0.1">
${GALAXY_DATA_INDEX_DIR}/Rscripts/MarkerSelection.R $input $output $nmarker $heatmap
<param format="tabular" name="input" type="data" label="subset_1_and_2"/>
<param name="nmarker" type="integer" value="50" label="Number of Markers"/>
<!-- option q selected -->
<data format="tabular" name="output" ></data>
<data format="tabular" name="heatmap" ></data>
Do Marker Selection using Limma
<tool id="testrscript" name="Test R script" version="0.1">
${GALAXY_DATA_INDEX_DIR}/Rscripts/test.R fileName=$input_1
<param format="tabular" name="input_1" type="data" label="Subset 1"/>
<!-- option q selected -->
<data format="tabular" name="output" label="label_written_to_the_user" from_work_dir="outputfile.txt"></data>
Some tool description
<tool id="tm_create_input_VCF" name="Create VCF input from TM Export files" version="0.1">
${GALAXY_DATA_INDEX_DIR}/Rscripts/ $input $output
<param format="tabular" name="input" type="data" label="vcf_input"/>
<!-- option q selected -->
<!-- <data format="tabular" name="output" label="label_written_to_the_user" from_work_dir="outputfile.txt"></data> -->
<data format="tabular" name="output" ></data>
Some tool description
<tool id="tm_create_input" name="Create input from TM Export files" version="0.1">
${GALAXY_DATA_INDEX_DIR}/Rscripts/ $input_s1 $input_s2 $output
<param format="tabular" name="input_s1" type="data" label="Subset 1"/>
<param format="tabular" name="input_s2" type="data" label="Subset 2" />
<!-- option q selected -->
<!-- <data format="tabular" name="output" label="label_written_to_the_user" from_work_dir="outputfile.txt"></data> -->
<data format="tabular" name="output" ></data>
Some tool description
