stringsAsFactors=F)%>%.[!is.na(.$entrez),]## identify weird codes which do not have entrez ID
expreset_raw<-expreset_raw[rownames(expreset_raw)%in%identifiers_df$id,]## here we clean the dataset from pseudogenes and rows with none entrez ids which will not be used by Hipathia
print("read and clean from non entrez codes ...done")
# Explore how our data is organized
hist(as.numeric(expreset_raw[2,]),breaks=100)
var(as.numeric(expreset_raw[2,]))
getVari<-apply(expreset_raw,1,var)
hist(getVari,100)
# Normalise by TMM with "edgeR" package
dge<-DGEList(counts=expreset_raw)
print("dge...done")
tmm<-calcNormFactors(dge,method="TMM")
print("tmm...done")
logcpm<-cpm(tmm,prior.count=3,log=TRUE)
print("logcpm...done")
### Normalise by Hipathia
gExp=logcpm
gExp=normalize.quantiles(gExp)
rownames(gExp)<-rownames(logcpm)
colnames(gExp)<-colnames(logcpm)
### Construct the metadata file with the info from GEO GSE152075 (Virus vs Control)
metadata<-data.frame(fileName=colnames(gExp),
type=c(rep("Virus",430),rep("Control",54)))
### Translate by Hipathia
trans_data<-translate_data(gExp,"hsa")
#### Export results table for the Cov-Hipathia Disease Maps differential signaling example.