Skip to content
Snippets Groups Projects
Commit 0aa8c28e authored by Wei Gu's avatar Wei Gu
Browse files

marker selection with RNAseq data

parent f03e2d48
No related branches found
No related tags found
No related merge requests found
......@@ -3,11 +3,16 @@
##############################################################
## Name: Create_GEX_outputfile.pl ##
## Description: Merges the two subset mrna.tsv subset files ##
## and puts the data into the correct format to be used by ##
## MarkerSelection.R ##
## and puts the data into the correct format to be used ##
## as input by MarkerSelection.R ##
## Usage: Create_GEX_outputfile.pl -s1 <subset1_input_file> ##
## -s2 <subset2_input_file> -dir <in/output dir> ##
## Author: serge.eifes@uni.lu ##
## Email: serge.eifes@uni.lu ##
## ##
## This work is licensed under the Creative Commons ##
## Attribution-NonCommercial-ShareAlike 4.0 ##
## International License. To view a copy of this license, ##
## visit http://creativecommons.org/licenses/by-nc-sa/4.0/. ##
##############################################################
......@@ -36,6 +41,12 @@ open(WH, ">$O_file") or die "Cannot open $O_file: $!";
foreach my $file (@A_Subset_files){
#Idx numbers for data columns we want to extract
my $Patient_ID_idx;
my $Value_idx;
my $Probe_ID_idx;
my $Gene_Symbol_idx;
open(FH, "$file") or die "Cannot open $file: $!";
......@@ -47,14 +58,30 @@ foreach my $file (@A_Subset_files){
my $Line = $_;
chomp($Line);
next if($Line =~ /^PATIENT/);
my (@A_Line) = split /\t/, $Line;
my $Patient_ID = $A_Line[0];
my $Value = $A_Line[9];
my $Probe_ID = $A_Line[11];
my $Gene_Symbol = $A_Line[13];
#Retrieving col indices from header for the data we need
if($Line=~/PATIENT/){
for(my $i=0; $i<=$#A_Line; $i++){
if($A_Line[$i] eq "PATIENT ID"){
$Patient_ID_idx = $i;
} elsif($A_Line[$i] eq "LOG2E"){
$Value_idx = $i;
} elsif(($A_Line[$i] eq "PROBE") || ($A_Line[$i] eq "ANNOTATIONID")){
$Probe_ID_idx = $i;
} elsif($A_Line[$i] eq "GENE SYMBOL"){
$Gene_Symbol_idx = $i;
}
}
next;
}
my $Patient_ID = $A_Line[$Patient_ID_idx];
my $Value = $A_Line[$Value_idx];
my $Probe_ID = $A_Line[$Probe_ID_idx];
my $Gene_Symbol = $A_Line[$Gene_Symbol_idx];
$Gene_Symbol="NA" if($Gene_Symbol eq "null");
......
......@@ -16,22 +16,21 @@
# limitations under the License.
###########################################################################
###########################################################################
#### Code rewritten by Wei and Serge
###########################################################################
#Supress printing of warnings
options(warn=-1)
###########################################################################
#Comparative Marker Selection
##########################################################################
#Function to obtain the aligned positions all items in the vectorToAlign
#in the vectorRef
#Function to obtain the aligned positions between all items in the vectorToAlign
#relative to the vectorRef
get.reordered_index = function(vectorToAlign, vectorRef){
vectorToAlign = as.vector(vectorToAlign)
vectorRef = as.vector(vectorRef)
res = vector(mode="integer", length=length(vectorToAlign))
for(i in 1:length(res)){
......@@ -73,10 +72,18 @@ MS.loader <- function(
#Getting rid of the probesets w/o associated gene symbol:
idx_wo_symbol = which(is.na(mRNAData$GENE_SYMBOL))
#print(length(idx_wo_symbol))
if(length(idx_wo_symbol)>0){
mRNAData=mRNAData[-idx_wo_symbol,]
}
idx_wo_symbol = which(mRNAData$GENE_SYMBOL=="null")
#print(length(idx_wo_symbol))
if(length(idx_wo_symbol)>0){
mRNAData=mRNAData[-idx_wo_symbol,]
}
#Create a data.frame with unique probe/gene ids.
geneStatsData <- data.frame(mRNAData$PROBE.ID,mRNAData$GENE_SYMBOL);
......@@ -212,9 +219,9 @@ MS.loader <- function(
s = strsplit(as.character(finalHeatmapData$GROUP), " ")
d = vector(mode="character", length=length(s))
for(i in 1:length(s)){
d[i] = s[[i]][1]
d[i] = s[[i]][2]
}
idx = get.reordered_index(d, topgenes$GENE_SYMBOL)
idx = get.reordered_index(d, topgenes$PROBE.ID)
finalHeatmapData = finalHeatmapData[ idx, ]
#WRITE TO FILE
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment