Skip to content
Snippets Groups Projects
Commit ba6a4983 authored by Wei Gu's avatar Wei Gu
Browse files

first commit

parents
No related branches found
No related tags found
No related merge requests found
README 0 → 100644
#!/usr/bin/env perl
##############################################################
## Name: Create_GEX_outputfile.pl ##
## Description: Merges the two subset mrna.tsv subset files ##
## and puts the data into the correct format to be used by ##
## MarkerSelection.R ##
## Usage: Create_GEX_outputfile.pl -s1 <subset1_input_file> ##
## -s2 <subset2_input_file> -dir <in/output dir> ##
## Author: serge.eifes@uni.lu ##
##############################################################
###Modules loaded
use strict;
use warnings;
#Argument variables:
my $file_1=$ARGV[0];
my $file_2=$ARGV[1];
my $O_file=$ARGV[2];
my @A_Subset_files;
push(@A_Subset_files, $file_1);
push(@A_Subset_files, $file_2);
my $counter_subset=1;
open(WH, ">$O_file") or die "Cannot open $O_file: $!";
foreach my $file (@A_Subset_files){
open(FH, "$file") or die "Cannot open $file: $!";
print WH "PATIENT.ID\tVALUE\tPROBE.ID\tGENE_SYMBOL\tSUBSET\n";
while(<FH>){
my $Line = $_;
chomp($Line);
next if($Line =~ /^PATIENT/);
my (@A_Line) = split /\t/, $Line;
my $Patient_ID = $A_Line[0];
my $Value = $A_Line[9];
my $Probe_ID = $A_Line[11];
my $Gene_Symbol = $A_Line[13];
$Gene_Symbol="NA" if($Gene_Symbol eq "null");
my $Subset;
$Subset = "S1" if($counter_subset==1);
$Subset = "S2" if($counter_subset==2);
print WH $Subset . "_" . $Patient_ID . "\t" . $Value
. "\t" . $Probe_ID . "\t" . $Gene_Symbol
. "\t" . $Subset . "\n";
}
close FH;
$counter_subset++;
}
close WH;
#!/usr/bin/env perl
##############################################################
## Name: Create_VCF_Allele_matrix.pl ##
## Description: Creates input file for subset to ##
## perform the Fisher exact test to detect significant ##
## deviations in the proportions for minor and major ##
## alleles between two subsets. To perform the analysis, ##
##two subset files need to be generated by this script ##
## Usage: Create_VCF_Allele_matrix.pl <input_file> ##
## <output_file> ##
## Author: serge.eifes@uni.lu ##
##############################################################
###Modules loaded
use strict;
use warnings;
#Argument variables:
my $file = $ARGV[0];
my $O_file = $ARGV[1];
#To store the corresponding column num for given col
my %H_col2colNum;
#Hash to store data extracted from input file
my %HoH_MarkerAndPatient2Variant;
#Names of the columsn we need:
my @A_colnames = ("PATIENT ID", "VARIANT", "CHROMOSOME", "POSITION");
## START Input file parser ##
open( FH, "$file" ) or die "Cannot open $file: $!";
while (<FH>) {
my $Line = $_;
chomp($Line);
my (@A_Line) = split /\t/, $Line;
if(($Line=~/PATIENT ID/) && ($Line=~/VARIANT/) && ($Line=~/CHROMOSOME/) && ($Line=~/POSITION/)){
foreach my $colname (@A_colnames){
$H_col2colNum{$colname} = get_col_num($colname, \@A_Line);
}
next;
}
my $Marker = $A_Line[$H_col2colNum{"CHROMOSOME"}] . "_" . $A_Line[$H_col2colNum{"POSITION"}];
my $Patient = $A_Line[$H_col2colNum{"PATIENT ID"}];
my $Variant = $A_Line[$H_col2colNum{"VARIANT"}];
$HoH_MarkerAndPatient2Variant{$Marker}{$Patient}=$Variant;
}
close FH;
## END Input file parser ##
## START Storing all Markers and patients in array ##
my @A_markers;
my @A_patients;
for my $Marker (sort keys %HoH_MarkerAndPatient2Variant ) {
for my $Patient (sort keys %{$HoH_MarkerAndPatient2Variant{$Marker}}){
push(@A_markers, $Marker);
push(@A_patients, $Patient);
# print $Patient . "\n";
}
}
my @A_uniq_markers = uniq(@A_markers);
my @A_uniq_patients = uniq(@A_patients);
#print $#A_patients . "\n";
#print $#A_uniq_patients . "\n";
#
#print $#A_markers . "\n";
#print $#A_uniq_markers . "\n";
my @AoA;
###Storing data in two dim array:
#Rownames
$AoA[0][0]="//";
my $i=1;
foreach my $Marker (@A_uniq_markers){
$AoA[$i][0]=$Marker;
$i++;
}
#Colnames
my $j=1;
foreach my $Patient (@A_uniq_patients){
$AoA[0][$j]=$Patient;
$j++;
}
##Filling the "matrix" with the variant data
my $col = 1;
foreach my $Patient (@A_uniq_patients){
my $row= 1;
foreach my $Marker (@A_uniq_markers){
$AoA[$row][$col] = $HoH_MarkerAndPatient2Variant{$Marker}{$Patient};
# print $row . "--" . $col . "::" . $HoH_MarkerAndPatient2Variant{$Marker}{$Patient} . "\n";
$row++;
}
$col++;
}
## END Storing all Markers and patients in array ##
## START Printing out the results/matrix
open(WH, ">$O_file") or die "Cannot create $O_file: $!";
for $i ( 0 .. $#AoA ) {
print WH join("\t", @{$AoA[$i]}) . "\n";
}
close WH;
## END Printing out the results/matrix
### FUNCTIONS ###
sub get_col_num{
my $colname2match = $_[0];
my @A_header_line = @{$_[1]};
my $col_num=0;
foreach my $colname (@A_header_line){
if($colname2match eq $colname){
return($col_num);
} else{
$col_num++;
}
}
die "Column $colname2match not found. Please check the input file header!!!";
}
##Eliminate duplicate items in array:
sub uniq {
my %seen;
return grep { !$seen{$_}++ } @_;
}
\ No newline at end of file
#!/usr/bin/env Rscript
###########################################################################
# Copyright 2008-2012 Janssen Research & Development, LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###########################################################################
###########################################################################
#### Code rewritten by Wei and Serge
###########################################################################
###########################################################################
#Comparative Marker Selection
##########################################################################
#Function to obtain the aligned positions all items in the vectorToAlign
#in the vectorRef
get.reordered_index = function(vectorToAlign, vectorRef){
vectorToAlign = as.vector(vectorToAlign)
vectorRef = as.vector(vectorRef)
res = vector(mode="integer", length=length(vectorToAlign))
for(i in 1:length(res)){
idx = which(vectorToAlign == vectorRef[i])
res[i]=idx
}
return(res)
}
## Main function to perform DEG analysis:
MS.loader <- function(
input.filename="outputfile.txt",
output.file ="CMS.TXT",
numberOfMarkers = 100,
out.heatmap ="heatmapdata",
mhcCorrection = 2
)
{
##########################################
#We need this to do the ddply below.
suppressMessages(library(plyr))
suppressMessages(library(reshape2))
suppressMessages(library(limma))
#---------------------
#PREPARE RAW DATA
#Pull the GEX data from the file.
mRNAData <- data.frame(read.delim(input.filename))
#Trim the probe.id field.
mRNAData$PROBE.ID <- gsub("^\\s+|\\s+$", "",mRNAData$PROBE.ID)
mRNAData$GENE_SYMBOL <- gsub("^\\s+|\\s+$", "",mRNAData$GENE_SYMBOL)
mRNAData$PATIENT.ID <- gsub("^\\s+|\\s+$", "",mRNAData$PATIENT.ID)
#Getting rid of the probesets w/o associated gene symbol:
idx_wo_symbol = which(is.na(mRNAData$GENE_SYMBOL))
if(length(idx_wo_symbol)>0){
mRNAData=mRNAData[-idx_wo_symbol,]
}
#Create a data.frame with unique probe/gene ids.
geneStatsData <- data.frame(mRNAData$PROBE.ID,mRNAData$GENE_SYMBOL);
#Add a column name to our data.frame.
colnames(geneStatsData) <- c('PROBE.ID','GENE_SYMBOL')
geneStatsData <- unique(geneStatsData[,c("PROBE.ID","GENE_SYMBOL")]);
#---------------------
#---------------------
#Prepare the casted raw data.
#Get a copy of the raw data.
coercedData <- mRNAData
#Grab only the columns we need for doing the melt/cast.
coercedData <- coercedData[c('PATIENT.ID','VALUE','PROBE.ID','GENE_SYMBOL')]
#Melt the data, leaving 2 columns as the grouping fields.
meltedData <- melt(coercedData, id=c("PROBE.ID","GENE_SYMBOL","PATIENT.ID"))
#Cast the data into a format that puts the PATIENT.ID in a column.
coercedData <- data.frame(dcast(meltedData, PROBE.ID + GENE_SYMBOL ~ PATIENT.ID))
#The PATIENT.ID column needs to be removed if exists in the matrix!!!!!!!
idx.patient_id = which(colnames(coercedData)=="PATIENT.ID")
if(length(idx.patient_id)==1){
coercedData = coercedData[,-idx.patient_id]
}
#When we convert to a data frame the numeric columns get an x in front of them. Remove them here.
colnames(coercedData) <- sub("^X","",colnames(coercedData))
#Get a gene list that we can use later to preserve the list of the genes.
geneList <- as.vector(coercedData$GENE_SYMBOL)
probeList <- as.vector(coercedData$PROBE.ID)
#---------------------
#---------------------
#Fitting linear model with limma
#Remove the gene_symbol and probe.id columns.
coercedDataWithoutGroup <- data.matrix(subset(coercedData, select=-c(GENE_SYMBOL,PROBE.ID)))
rownames(coercedDataWithoutGroup)=coercedData$PROBE.ID # Rownames have to be added
#Creating a named vector for mapping PROBE.ID to GENE.SYMBOL
gene.symbols=coercedData$GENE_SYMBOL
names(gene.symbols)=coercedData$PROBE.ID
#Get a vector representing our subsets.
classVector <- colnames(coercedDataWithoutGroup)
classVector <- gsub("^S1.*","0",classVector)
classVector <- gsub("^S2.*","1",classVector)
classVector <- as.numeric(classVector)
#Check the class vector to verify we have two subsets.
if(length(unique(classVector)) < 2) stop("||FRIENDLY||There is only one subset selected, please select two in order to run the comparative analysis.")
if(mhcCorrection==0){
mhcMethod="none"
}else if(mhcCorrection==1){
mhcMethod="BH"
}else if(mhcCorrection==2){
mhcMethod="BY"
}else if(mhcCorrection==3){
mhcMethod="Holm"
}
#LINEAR MODEL FITTING
#Creating Design matrix
S1 = integer(length(classVector))
S1[which(classVector==0)]=1
S2 = classVector
design <- cbind(S1=S1,S2=S2)
#... and contrast matrix
contrast.matrix = makeContrasts(S1-S2, levels=design)
# Linear model fitting
fit <- lmFit(coercedDataWithoutGroup, design)
fit <- contrasts.fit(fit, contrast.matrix)
fit <- eBayes(fit)
#The data structure of topTable for newer versions of limma has been modified.
#This causes errors during execution of the script. A workarround by creating a data structure identical to topTable compatible with tranSMART is created
contr=1 #The contrast in the fitting to be used for the results table
#Results data frame which provides all required statistics for output data
top.fit = data.frame(
ID=rownames(fit$coefficients), #Depending on limma version fit$genes doesn't exist anymore
logFC=fit$coefficients[,contr],
t=fit$t[,contr],
P.Value=fit$p.value[,contr],
adj.P.val=p.adjust(p=fit$p.value[,contr], method=mhcMethod),
B=fit$lods[,contr]
)
top.fit.ranked.decr = top.fit[ order(top.fit$B, decreasing=T), ] #Ordering the data according to limma B statistic
rownames(top.fit.ranked.decr) = NULL
top.fit.ranked.decr.filt = top.fit.ranked.decr[1:numberOfMarkers,] #Filtering down the results table to the number of genes selected by user
topgenes = cbind(gene.symbols[top.fit.ranked.decr.filt$ID], top.fit.ranked.decr.filt)
colnames(topgenes) = c("GENE_SYMBOL", "PROBE.ID", "logFC", "t", "P.value", "adj.P.val", "B")
rownames(topgenes) = NULL #This is the final results data object for Marker Selection workflow containing all data displayed in output table
#print(top.fit.ranked.decr.filt)
# End Linear model fitting
#---------------------
#---------------------
#HEATMAP
## Generating heatmap output data:
heatmapData = coercedData[which(coercedData[,"PROBE.ID"] %in% topgenes[,"PROBE.ID"]), ]
finalHeatmapData = heatmapData[, -c(1,2)]
GROUP = paste(as.vector(heatmapData[, "GENE_SYMBOL"]), as.vector(heatmapData[, "PROBE.ID"]), sep=" ")
finalHeatmapData = cbind(GROUP, finalHeatmapData)
#Here we align the lines in finalHeatmapData to topgenes according to
#the gene symbols in both matrices
s = strsplit(as.character(finalHeatmapData$GROUP), " ")
d = vector(mode="character", length=length(s))
for(i in 1:length(s)){
d[i] = s[[i]][1]
}
idx = get.reordered_index(d, topgenes$GENE_SYMBOL)
finalHeatmapData = finalHeatmapData[ idx, ]
#WRITE TO FILE
#Write the file with the stats by gene. This will get read into the UI.
write.table(topgenes,output.file,sep = "\t",quote=F,row.names=F)
#Write the data file we will use for the heatmap.
write.table(finalHeatmapData,out.heatmap,sep = "\t",quote=F,row.names=F)
##########################################
}
#### Executing the main job to obtain the DEG and corresponding heatmap file: ####
#Getting command line arguments
args = commandArgs(trailingOnly = TRUE)
input_file=as.vector(args[1])
output_file=as.vector(args[2])
numberOfMarkers=as.vector(args[3])
out.heatmap=as.vector(args[4])
MS.loader(input_file, output_file,numberOfMarkers, out.heatmap)
#!/usr/bin/env Rscript
##############################################################
## Name: VCF_FE_test.R ##
## Description: Performs the Fisher exact test to detect ##
## significant deviations in the proportions for minor ##
## and major alleles between the two subsets provided as ##
## input files.
## Usage: VCF_FE_test.R <input_file_1> <input_file_2> ##
## <output_file> ##
## Author: serge.eifes@uni.lu ##
##############################################################
#######################################
#######################################
### START FUNCTIONS ####
get_input_data = function(file){
data = as.matrix(read.delim(file, header=F))
#Adding column and row names
data_w_names = data[-1,-1]
rownames = data[-1,1]
colnames = t(data[1,-1])
rownames(data_w_names) = rownames
colnames(data_w_names) = colnames
return(data_w_names)
}
split_alleles = function(row){
row = as.vector(row)
num_patients = length(row)
row.split.list = strsplit(row, "\\/", perl=T)
all_alleles_per_marker = vector(mode="character", length=num_patients*2)
counter=1
for(i in 1:num_patients){
patient_alleles = row.split.list[[i]]
for(j in 1:2){
allele = patient_alleles[j]
all_alleles_per_marker[counter] = allele
counter=counter+1
}
}
return(table(all_alleles_per_marker))
}
create_subset_count_list = function(data){
subset = list()
markers = rownames(data)
for(i in 1:dim(data)[1]){
datarow = data[i,]
marker = markers[i]
nt_counts = split_alleles(datarow)
nt_count_matrix = as.matrix(nt_counts)
subset[[marker]] = nt_count_matrix
}
return(subset)
}
get_sorted_count_vec = function(subset_data){
alleles_subset = rownames(subset_data)
subset_vec = as.vector(subset_data[,1])
names(subset_vec) = alleles_subset
subset_vec_sorted = sort(subset_vec, method="quick")
return(subset_vec_sorted)
}
#Checks sorted_count_vec if minor allele (with count 0) needs to be added
check_sorted_count_vec_for_minor = function(sorted_count_vec_a){
if(length(sorted_count_vec_a)==1){
#Need to add minor allele with count 0 if it exists in other subset
sorted_count_vec = vector(mode="integer", length=2)
sorted_count_vec[2] = sorted_count_vec_a[1]
sorted_count_vec[1] = 0
sorted_count_vec_a = sorted_count_vec
}
return(sorted_count_vec_a)
}
### END FUNCTIONS ######
#######################################
#######################################
#######################################
### START MAIN CODE ####
#Getting command line arguments
args = commandArgs(trailingOnly = TRUE)
input_file_1=as.vector(args[1])
input_file_2=as.vector(args[2])
output_file=as.vector(args[3])
data_1 = get_input_data(input_file_1)
data_2 = get_input_data(input_file_2)
#Test case for debugging
# print(data_1["1_3396099",])
# print("___")
# print(data_2["1_3396099",])
subset_list_1 = create_subset_count_list(data_1)
subset_list_2 = create_subset_count_list(data_2)
markers_subset_list_1 = names(subset_list_1)
markers_subset_list_2 = names(subset_list_2)
# In this matrix the results of the FE test for each marker
#will be stored
results = matrix(c("Chromosome", "Position", "P-value"), ncol=3, nrow=1)
#Iterating over each marker and performing FE test to compare
#minor and major allele frequencies in both subsets
for(i in 1:length(markers_subset_list_1)){
subset_1_data = subset_list_1[[markers_subset_list_1[i]]]
subset_2_data = subset_list_2[[markers_subset_list_1[i]]]
sorted_count_vec_1 = get_sorted_count_vec(subset_1_data)
sorted_count_vec_2 = get_sorted_count_vec(subset_2_data)
if((length(sorted_count_vec_1)!=1) || (length(sorted_count_vec_2)!=1)){
#sorted_count_vec_1 = check_sorted_count_vec_for_minor(sorted_count_vec_1)
#sorted_count_vec_2 = check_sorted_count_vec_for_minor(sorted_count_vec_2)
if(length(sorted_count_vec_1)==1){
#Need to add minor allele with count 0 if it exists in other subset
cp_sorted_count_vec_2 = sorted_count_vec_2
cp_sorted_count_vec_2[2] = sorted_count_vec_1[1]
cp_sorted_count_vec_2[1] = 0
sorted_count_vec_1 = cp_sorted_count_vec_2
}
if(length(sorted_count_vec_2)==1){
#Need to add minor allele with count 0 if it exists in other subset
cp_sorted_count_vec_1 = sorted_count_vec_1
cp_sorted_count_vec_1[2] = sorted_count_vec_2[1]
cp_sorted_count_vec_1[1] = 0
sorted_count_vec_2 = cp_sorted_count_vec_1
}
##2x2 contingency table
input.matrix = matrix(nrow=2, ncol=2)
input.matrix[1,] = sorted_count_vec_1
input.matrix[2,] = sorted_count_vec_2
rownames(input.matrix) = c("subset_1", "subset_2")
colnames(input.matrix) = names(sorted_count_vec_1)
#FE test
p.value = fisher.test(input.matrix)$p.value
#Reformating and storing results in a matrix
marker_split = unlist(strsplit(markers_subset_list_1[i], "_"))
results = rbind(results, c(marker_split[1], marker_split[2], p.value))
# if(p.value<0.15){
# print(markers_subset_list_1[i])
# print(input.matrix)
# print(p.value)
# print("//////////////")
# }
}
}
#Printing results to file ordered by p-value
results_header = as.vector(results[1,])
results = results[-1,]
colnames(results) = results_header
results = results[order(results[,"P-value"]), ]
write.table(results, output_file, quote=F, sep="\t", row.names=F, col.names=T)
### END MAIN CODE ####
#######################################
#!/usr/bin/Rscript --vanilla
# user input
fileName <- "mrna.tsv"
# check command line parameters
args <- strsplit(commandArgs(TRUE), split='=')
keys <- vector ("character")
if (length(args) > 0) {
for (i in 1:length(args)) {
key <- args[[i]][1]
value <- args[[i]][2]
keys <- c(keys, key)
if (exists(key)) {
# replace default value of key with input value
assign(key, value)
}else {
cat("\n")
stop(paste("Unrecognized option [",key,"].\n\n", sep=""))
}
}
}
a<-read.table(fileName,sep="\t")
write.table(head(a),file="outputfile.txt")
<?xml version='1.0' encoding='utf-8'?>
<toolbox>
<section id="getext" name="Get Data">
<tool file="data_source/upload.xml" />
<tool file="data_source/ucsc_tablebrowser.xml" />
</section>
<!-- <tool file="data_source/ucsc_tablebrowser_test.xml" />
<tool file="data_source/ucsc_tablebrowser_archaea.xml" />
<tool file="data_source/ebi_sra.xml" />
<tool file="data_source/microbial_import.xml" />
<tool file="data_source/biomart.xml" />
<tool file="data_source/biomart_test.xml" />
<tool file="data_source/cbi_rice_mart.xml" />
<tool file="data_source/gramene_mart.xml" />
<tool file="data_source/fly_modencode.xml" />
<tool file="data_source/worm_modencode.xml" />
<tool file="data_source/wormbase.xml" />
<tool file="data_source/wormbase_test.xml" />
<tool file="data_source/eupathdb.xml" />
<tool file="data_source/hbvar.xml" />
<tool file="genomespace/genomespace_file_browser_prod.xml" />
<tool file="genomespace/genomespace_importer.xml" />
<tool file="validation/fix_errors.xml" /> -->
<section id="rscripts" name="R scripts">
<tool file="Rscripts/test.xml" />
<tool file="Rscripts/tm_create_input.xml" />
<tool file="Rscripts/marker_selection.xml" />
<tool file="Rscripts/tm_VCF_input.xml" />
<tool file="Rscripts/VCF_FE_test.xml" />
</section>
<section id="imp-pipeline" name="IMP pipeline">
<tool file="imp/imp-annotation.xml" />
<tool file="imp/imp-assemble_idba-mt.xml" />
<tool file="imp/imp-assemble_idba_ud.xml" />
<tool file="imp/imp-cufflink.xml" />
<tool file="imp/imp-de_duplicate.xml" />
<tool file="imp/imp-concatenate.xml" />
<tool file="imp/imp-mapping.xml" />
<tool file="imp/imp-extract_mapped.xml" />
<tool file="imp/imp-fq2fa.xml" />
<tool file="imp/imp-extract_unmapped.xml" />
<tool file="imp/imp-merge.xml" />
<tool file="imp/imp-trim.xml" />
<tool file="imp/imp-variantcalling.xml" />
</section>
<!-- <section id="send" name="Send Data">
<tool file="genomespace/genomespace_exporter.xml" />
</section>
<section id="liftOver" name="Lift-Over">
<tool file="extract/liftOver_wrapper.xml" />
</section> -->
<!-- <section id="textutil" name="Text Manipulation">
<tool file="filters/fixedValueColumn.xml" />
<tool file="stats/column_maker.xml" />
<tool file="filters/catWrapper.xml" />
<tool file="filters/cutWrapper.xml" />
<tool file="filters/mergeCols.xml" />
<tool file="filters/convert_characters.xml" />
<tool file="filters/CreateInterval.xml" />
<tool file="filters/cutWrapper.xml" />
<tool file="filters/changeCase.xml" />
<tool file="filters/pasteWrapper.xml" />
<tool file="filters/remove_beginning.xml" />
<tool file="filters/randomlines.xml" />
<tool file="filters/headWrapper.xml" />
<tool file="filters/tailWrapper.xml" />
<tool file="filters/trimmer.xml" />
<tool file="filters/wc_gnu.xml" />
<tool file="filters/secure_hash_message_digest.xml" />
<tool file="stats/dna_filtering.xml" />
<tool file="fastax_toolkit/seqid_uncollapser.xml" />
</section> -->
<!-- <section id="filter" name="Filter and Sort">
<tool file="stats/filtering.xml" />
<tool file="filters/sorter.xml" /> -->
<!-- <tool file="filters/grep.xml" />
<label id="gff" text="GFF" />
<tool file="filters/gff/extract_GFF_Features.xml" />
<tool file="filters/gff/gff_filter_by_attribute.xml" />
<tool file="filters/gff/gff_filter_by_feature_count.xml" />
<tool file="filters/gff/gtf_filter_by_attribute_values_list.xml" /> -->
<!-- <section id="group" name="Join, Subtract and Group">
<tool file="filters/joiner.xml" />
<tool file="filters/compare.xml" />
<tool file="stats/grouping.xml" />
</section> -->
<!-- <section id="convert" name="Convert Formats">
<tool file="filters/axt_to_concat_fasta.xml" />
<tool file="filters/axt_to_fasta.xml" />
<tool file="filters/axt_to_lav.xml" />
<tool file="filters/bed2gff.xml" />
<tool file="fasta_tools/fasta_to_tabular.xml" />
<tool file="filters/gff2bed.xml" />
<tool file="filters/lav_to_bed.xml" />
<tool file="maf/maf_to_bed.xml" />
<tool file="maf/maf_to_interval.xml" />
<tool file="maf/maf_to_fasta.xml" />
<tool file="fasta_tools/tabular_to_fasta.xml" />
<tool file="filters/wiggle_to_simple.xml" />
<tool file="filters/sff_extractor.xml" />
<tool file="filters/gtf2bedgraph.xml" />
<tool file="filters/wig_to_bigwig.xml" />
<tool file="filters/bed_to_bigbed.xml" />
<tool file="fastq_tools/fq2fa.xml" />
</section> -->
<!-- <section id="features" name="Extract Features">
<tool file="filters/ucsc_gene_bed_to_exon_bed.xml" />
</section>
<section id="fetchSeq" name="Fetch Sequences">
<tool file="extract/extract_genomic_dna.xml" />
</section>
<section id="fetchAlign" name="Fetch Alignments">
<tool file="maf/interval2maf_pairwise.xml" />
<tool file="maf/interval2maf.xml" />
<tool file="maf/maf_split_by_species.xml" />
<tool file="maf/interval_maf_to_merged_fasta.xml" />
<tool file="maf/genebed_maf_to_fasta.xml" />
<tool file="maf/maf_stats.xml" />
<tool file="maf/maf_thread_for_species.xml" />
<tool file="maf/maf_limit_to_species.xml" />
<tool file="maf/maf_limit_size.xml" />
<tool file="maf/maf_by_block_number.xml" />
<tool file="maf/maf_reverse_complement.xml" />
<tool file="maf/maf_filter.xml" />
</section>
<section id="scores" name="Get Genomic Scores">
<tool file="filters/wiggle_to_simple.xml" />
<tool file="stats/aggregate_binned_scores_in_intervals.xml" />
</section>
<section id="bxops" name="Operate on Genomic Intervals">
<tool file="annotation_profiler/annotation_profiler.xml" />
</section> -->
<!-- <section id="stats" name="Statistics">
<tool file="stats/gsummary.xml" />
<tool file="filters/uniq.xml" />
<tool file="stats/cor.xml" />
<tool file="stats/generate_matrix_for_pca_lda.xml" />
<tool file="stats/lda_analy.xml" />
<tool file="stats/plot_from_lda.xml" />
<tool file="stats/MINE.xml" />
<label id="gff" text="GFF" />
<tool file="stats/count_gff_features.xml" />
</section> -->
<!-- <section id="plots" name="Graph/Display Data">
<tool file="plotting/histogram2.xml" />
<tool file="plotting/scatterplot.xml" />
<tool file="plotting/bar_chart.xml" />
<tool file="plotting/boxplot.xml" />
<tool file="visualization/LAJ.xml" />
<tool file="visualization/build_ucsc_custom_track.xml" />
<tool file="maf/vcf_to_maf_customtrack.xml" />
<tool file="mutation/visualize.xml" />
</section> -->
<!-- <section id="multVar" name="Multivariate Analysis">
<tool file="multivariate_stats/pca.xml" />
<tool file="multivariate_stats/cca.xml" />
<tool file="multivariate_stats/kpca.xml" />
<tool file="multivariate_stats/kcca.xml" />
</section> -->
<!-- <section id="hyphy" name="Evolution">
<tool file="evolution/codingSnps.xml" />
<tool file="evolution/add_scores.xml" />
</section> -->
<!-- <section id="motifs" name="Motif Tools">
<tool file="meme/meme.xml" />
<tool file="meme/fimo.xml" />
</section> -->
<!-- <section id="clustal" name="Multiple Alignments">
</section>
<section id="fasta_manipulation" name="FASTA manipulation">
<tool file="fasta_tools/fasta_compute_length.xml" />
<tool file="fasta_tools/fasta_filter_by_length.xml" />
<tool file="fasta_tools/fasta_concatenate_by_species.xml" />
<tool file="fasta_tools/fasta_to_tabular.xml" />
<tool file="fasta_tools/tabular_to_fasta.xml" />
</section> -->
<!-- <section id="NGS_QC" name="NGS: QC and manipulation">
<label id="fastqcsambam" text="FastQC: fastq/sam/bam" />
<label id="illumina" text="Illumina fastq" />
<label id="454" text="Roche-454 data" />
<tool file="metag_tools/short_reads_figure_score.xml" />
<tool file="metag_tools/short_reads_trim_seq.xml" />
<label id="solid" text="AB-SOLiD data" />
<tool file="next_gen_conversion/solid2fastq.xml" />
<tool file="solid_tools/solid_qual_stats.xml" />
<tool file="solid_tools/solid_qual_boxplot.xml" />
</section> -->
<!-- <section name="FASTA/Q Information" id="cshl_library_information">
<tool file="fastax_toolkit/fastx_quality_statistics.xml" />
<tool file="fastax_toolkit/fastq_quality_boxplot.xml" />
<tool file="fastax_toolkit/fastx_nucleotides_distribution.xml" />
<tool file="fastax_toolkit/fasta_clipping_histogram.xml" />
</section> -->
<!-- <section name="FASTA/Q Preprocessing" id="cshl_fastx_manipulation">
<label id="fastax_toolkit" text="Tools for FASTQ data" />
<tool file="fastax_toolkit/fastq_to_fasta.xml" />
<tool file="fastax_toolkit/fastq_quality_converter.xml" />
<tool file="fastax_toolkit/fastx_clipper.xml" />
<tool file="fastax_toolkit/fastx_trimmer.xml" />
<tool file="fastax_toolkit/fastx_trimmer_from_end.xml" />
<tool file="fastax_toolkit/fastq_quality_trimmer.xml" />
<tool file="fastax_toolkit/fastx_renamer.xml" />
<tool file="fastax_toolkit/fastx_reverse_complement.xml" />
<tool file="fastax_toolkit/fasta_formatter.xml" />
<tool file="fastax_toolkit/fasta_nucleotide_changer.xml" />
<tool file="fastax_toolkit/fastx_artifacts_filter.xml" />
<tool file="fastax_toolkit/fastq_quality_filter.xml" />
<tool file="fastax_toolkit/fastq_masker.xml" />
<tool file="fastax_toolkit/fastx_collapser.xml" />
<tool file="fastax_toolkit/fastx_uncollapser.xml" />
<tool file="fastax_toolkit/fastx_barcode_splitter.xml" />
<tool file="fastq_tools/fastuniq.xml" />
<tool file="fastq_tools/trim-fastq.xml" />
</section>-->
<!--
Keep this section commented until it includes tools that
will be hosted on test/main. The velvet wrappers have been
included in the distribution but will not be hosted on our
public servers for the current time.
<section name="NGS: Assembly" id="ngs_assembly">
<label text="Velvet" id="velvet"/>
<tool file="sr_assembly/velvetg.xml" />
<tool file="sr_assembly/velveth.xml" />
</section>
-->
<!-- <section id="solexa_tools" name="NGS: Mapping">
<tool file="sr_mapping/bfast_wrapper.xml" />
<tool file="metag_tools/megablast_wrapper.xml" />
<tool file="metag_tools/megablast_xml_parser.xml" />
<tool file="sr_mapping/PerM.xml" />
<tool file="sr_mapping/srma_wrapper.xml" />
<tool file="sr_mapping/mosaik.xml" />
</section> -->
<!-- <section id="ngs-rna-tools" name="NGS: RNA Analysis">
<label id="rna_seq" text="RNA-seq" />
<label id="filtering" text="Filtering" />
</section> -->
<!-- <section id="samtools" name="NGS: SAM Tools">
</section> -->
<!-- <section id="ngs-simulation" name="NGS: Simulation">
<tool file="ngs_simulation/ngs_simulation.xml" />
</section> -->
<!-- <section id="hgv" name="Phenotype Association">
<tool file="evolution/codingSnps.xml" />
<tool file="evolution/add_scores.xml" />
<tool file="phenotype_association/sift.xml" />
<tool file="phenotype_association/linkToGProfile.xml" />
<tool file="phenotype_association/linkToDavid.xml" />
<tool file="phenotype_association/snpFreq.xml" />
<tool file="phenotype_association/ldtools.xml" />
<tool file="phenotype_association/pass.xml" />
<tool file="phenotype_association/gpass.xml" />
<tool file="phenotype_association/beam.xml" />
<tool file="phenotype_association/lps.xml" />
<tool file="phenotype_association/master2pg.xml" />
<tool file="phenotype_association/vcf2pgSnp.xml" />
</section> -->
<!-- <section id="imppipeline-previous" name="IMP pipeline previous">
<label id="preprocessing" text="Preprocessing" />
<tool file="fastax_toolkit/fastx_trimmer.xml" />
<tool file="fastq_tools/trim-fastq.xml" />
<label id="ass" text="Assembly" />
<tool file="fastq_tools/fq2fa.xml" />
<tool file="assembly/idba_tran.xml" />
<tool file="assembly/idba_ud.xml" />
<label id="visualization" text="Visualization" />
<tool file="visualization/vizbin.xml" />
<label id="map" text="Mapping" />
<tool id="bwa_index" file="mapping/bwa_index.xml" />
<tool id="sam_filter" file="samtools/sam_filter.xml" />
<tool id="bam_merge" file="samtools/bam_merge.xml" />
</section> -->
<!-- <section id="utilities" name="Utilities">
<tool id="checksum" file="utilities/checksum.xml" />
</section> -->
</toolbox>
<tool id="tm_fe_vcf_test" name="Do Fisher test on VCF files" version="0.1">
<command>
${GALAXY_DATA_INDEX_DIR}/Rscripts/VCF_FE_test.R $input_s1 $input_s2 $output
</command>
<inputs>
<param format="tabular" name="input_s1" type="data" label="vcf_subset_1"/>
<param format="tabular" name="input_s2" type="data" label="vcf_subset_2"/>
</inputs>
<outputs>
<!-- option q selected -->
<data format="tabular" name="output" ></data>
</outputs>
<help>
Do Fisher's test on VCF data
</help>
</tool>
<tool id="tm_marker_selection_mrna" name="Do Marker Selection on TM Export mRNA files" version="0.1">
<command>
${GALAXY_DATA_INDEX_DIR}/Rscripts/MarkerSelection.R $input $output $nmarker $heatmap
</command>
<inputs>
<param format="tabular" name="input" type="data" label="subset_1_and_2"/>
<param name="nmarker" type="integer" value="50" label="Number of Markers"/>
</inputs>
<outputs>
<!-- option q selected -->
<data format="tabular" name="output" ></data>
<data format="tabular" name="heatmap" ></data>
</outputs>
<help>
Do Marker Selection using Limma
</help>
</tool>
<tool id="testrscript" name="Test R script" version="0.1">
<command>
${GALAXY_DATA_INDEX_DIR}/Rscripts/test.R fileName=$input_1
</command>
<inputs>
<param format="tabular" name="input_1" type="data" label="Subset 1"/>
</inputs>
<outputs>
<!-- option q selected -->
<data format="tabular" name="output" label="label_written_to_the_user" from_work_dir="outputfile.txt"></data>
</outputs>
<help>
Some tool description
</help>
</tool>
<tool id="tm_create_input_VCF" name="Create VCF input from TM Export files" version="0.1">
<command>
${GALAXY_DATA_INDEX_DIR}/Rscripts/Create_VCF_Allele_matrix.pl $input $output
</command>
<inputs>
<param format="tabular" name="input" type="data" label="vcf_input"/>
</inputs>
<outputs>
<!-- option q selected -->
<!-- <data format="tabular" name="output" label="label_written_to_the_user" from_work_dir="outputfile.txt"></data> -->
<data format="tabular" name="output" ></data>
</outputs>
<help>
Some tool description
</help>
</tool>
<tool id="tm_create_input" name="Create input from TM Export files" version="0.1">
<command>
${GALAXY_DATA_INDEX_DIR}/Rscripts/Create_MarkerSelection_inputfile.pl $input_s1 $input_s2 $output
</command>
<inputs>
<param format="tabular" name="input_s1" type="data" label="Subset 1"/>
<param format="tabular" name="input_s2" type="data" label="Subset 2" />
</inputs>
<outputs>
<!-- option q selected -->
<!-- <data format="tabular" name="output" label="label_written_to_the_user" from_work_dir="outputfile.txt"></data> -->
<data format="tabular" name="output" ></data>
</outputs>
<help>
Some tool description
</help>
</tool>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment