Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
BDS
GeneDER
GeneDER_core
Commits
fe85f14c
Commit
fe85f14c
authored
Nov 13, 2020
by
Leon-Charles Tranchevent
Browse files
Refactoring step 06 to process all datasets at once.
parent
96c62eee
Changes
54
Hide whitespace changes
Inline
Side-by-side
04-Prepare_datasets/Makefile
View file @
fe85f14c
...
...
@@ -9,10 +9,6 @@ data:
@
sbatch
${CODE_FOLDER}
clean_datasets.sh
check
:
@
sbatch
${CODE_FOLDER}
check.sh
names
:
@
sbatch
${CODE_FOLDER}
get_names.sh
ps
:
@
sbatch
${CODE_FOLDER}
create_probelists.sh
match
:
@
sbatch
${CODE_FOLDER}
match_probes.sh
doc
:
...
...
04-Prepare_datasets/create_probelists.sh
deleted
100755 → 0
View file @
96c62eee
#!/bin/bash -l
#SBATCH -J geneder:04:probelists
#SBATCH --mail-type=all
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
#SBATCH -n 4
#SBATCH --time=0-00:05:00
#SBATCH -p batch
#SBATCH --qos=normal
echo
"== Starting run at
$(
date
)
"
echo
"== Job ID:
${
SLURM_JOBID
}
"
echo
"== Node list:
${
SLURM_NODELIST
}
"
echo
"== Submit dir. :
${
SLURM_SUBMIT_DIR
}
"
echo
""
# Defining global parameters.
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/04/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/04-Prepare_datasets/
# Loading modules.
module load lang/R/3.6.0-foss-2019a-bare
# Actual job.
Rscript
--vanilla
${
CODE_FOLDER
}
create_probelists.R
>
${
OUTPUT_FOLDER
}
create_ps_log.out 2>
${
OUTPUT_FOLDER
}
create_ps_log.err
# Moving the slurm log file to data
mv
${
CODE_FOLDER
}
slurm-
*
out
${
OUTPUT_FOLDER
}
04-Prepare_datasets/get_names.sh
deleted
100755 → 0
View file @
96c62eee
#!/bin/bash -l
#SBATCH -J geneder:04:getnames
#SBATCH --mail-type=all
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
#SBATCH -n 2
#SBATCH --time=0-00:10:00
#SBATCH -p batch
#SBATCH --qos=normal
echo
"== Starting run at
$(
date
)
"
echo
"== Job ID:
${
SLURM_JOBID
}
"
echo
"== Node list:
${
SLURM_NODELIST
}
"
echo
"== Submit dir. :
${
SLURM_SUBMIT_DIR
}
"
echo
""
# Defining global parameters.
GEO_PLAFORM_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Original/Platforms/
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/04/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/04-Prepare_datasets/
# Loading modules.
module load lang/R/3.6.0-foss-2019a-bare
# Load configuration
source
../libs/conf/confSH.sh
create_variables ../Confs/datasets_config.yml
create_variables ../Confs/platforms_config.yml
# Get the biomart data.
nbPlatforms
=
${#
platforms__platform_name
[@]
}
for
((
i
=
0
;
i<
$nbPlatforms
;
i++
))
do
platformName
=
${
platforms__platform_name
[
$i
]
}
platformBiomartName
=
${
platforms__biomart_name
[
$i
]
}
platformGEOName
=
${
platforms__geo_name
[
$i
]
}
if
[
"
${
platformBiomartName
}
"
!=
"NA"
]
then
# Get the official gene names.
wget
-O
${
OUTPUT_FOLDER
}${
platformName
}
_genenames_raw.tsv
'http://www.ensembl.org/biomart/martservice?query=<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE Query><Query virtualSchemaName = "default" formatter = "TSV" header = "0" uniqueRows = "1" count = "" datasetConfigVersion = "0.6"><Dataset name = "hsapiens_gene_ensembl" interface = "default"><Attribute name = "'
${
platformBiomartName
}
'"/><Attribute name = "external_gene_name"/></Dataset></Query>'
awk
'BEGIN{FS=OFS="\t"}{if ($1 != "") {print $0}}'
${
OUTPUT_FOLDER
}${
platformName
}
_genenames_raw.tsv |
awk
'{if (t[$1]) {t[$1]=t[$1]"|"$2} else {t[$1]=$2}} END{for (i in t) {if (i != "") {print i"\t"t[i]}}}'
|
sort
-u
>
${
OUTPUT_FOLDER
}${
platformName
}
_genenames.tsv
rm
${
OUTPUT_FOLDER
}${
platformName
}
_genenames_raw.tsv
sleep
2s
else
if
[
"
${
platformGEOName
}
"
!=
"NA"
]
then
# We use the GEO data
cut
-f
-2
${
GEO_PLAFORM_FOLDER
}${
platformGEOName
}
_gene_official.tsv |
grep
-v
OFFICIAL |
sort
-u
>
${
OUTPUT_FOLDER
}${
platformName
}
_genenames.tsv
else
# We use manually curated data.
cut
-f
-2
${
GEO_PLAFORM_FOLDER
}${
platformName
}
_gene_official.tsv |
grep
-v
OFFICIAL |
sort
-u
>
${
OUTPUT_FOLDER
}${
platformName
}
_genenames.tsv
fi
fi
done
# Moving the slurm log file to data
mv
${
CODE_FOLDER
}
slurm-
*
out
${
OUTPUT_FOLDER
}
04-Prepare_datasets/match_probes.sh
View file @
fe85f14c
...
...
@@ -4,7 +4,7 @@
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
#SBATCH -n 4
#SBATCH --time=0-0:
20
:00
#SBATCH --time=0-0:
35
:00
#SBATCH -p batch
#SBATCH --qos=normal
...
...
@@ -15,13 +15,46 @@ echo "== Submit dir. : ${SLURM_SUBMIT_DIR}"
echo
""
# Defining global parameters.
GEO_PLAFORM_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Original/Platforms/
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/04/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/04-Prepare_datasets/
# Loading modules.
module load lang/R/3.6.0-foss-2019a-bare
# Actual job
# Load configuration
source
../libs/conf/confSH.sh
create_variables ../Confs/datasets_config.yml
create_variables ../Confs/platforms_config.yml
# Get the biomart data.
nbPlatforms
=
${#
platforms__platform_name
[@]
}
for
((
i
=
0
;
i<
$nbPlatforms
;
i++
))
do
platformName
=
${
platforms__platform_name
[
$i
]
}
platformBiomartName
=
${
platforms__biomart_name
[
$i
]
}
platformGEOName
=
${
platforms__geo_name
[
$i
]
}
if
[
"
${
platformBiomartName
}
"
!=
"NA"
]
then
# Get the official gene names.
wget
-O
${
OUTPUT_FOLDER
}${
platformName
}
_genenames_raw.tsv
'http://www.ensembl.org/biomart/martservice?query=<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE Query><Query virtualSchemaName = "default" formatter = "TSV" header = "0" uniqueRows = "1" count = "" datasetConfigVersion = "0.6"><Dataset name = "hsapiens_gene_ensembl" interface = "default"><Attribute name = "'
${
platformBiomartName
}
'"/><Attribute name = "external_gene_name"/></Dataset></Query>'
awk
'BEGIN{FS=OFS="\t"}{if ($1 != "") {print $0}}'
${
OUTPUT_FOLDER
}${
platformName
}
_genenames_raw.tsv |
awk
'{if (t[$1]) {t[$1]=t[$1]"|"$2} else {t[$1]=$2}} END{for (i in t) {if (i != "") {print i"\t"t[i]}}}'
|
sort
-u
>
${
OUTPUT_FOLDER
}${
platformName
}
_genenames.tsv
rm
${
OUTPUT_FOLDER
}${
platformName
}
_genenames_raw.tsv
sleep
2s
else
if
[
"
${
platformGEOName
}
"
!=
"NA"
]
then
# We use the GEO data
cut
-f
-2
${
GEO_PLAFORM_FOLDER
}${
platformGEOName
}
_gene_official.tsv |
grep
-v
OFFICIAL |
sort
-u
>
${
OUTPUT_FOLDER
}${
platformName
}
_genenames.tsv
else
# We use manually curated data.
cut
-f
-2
${
GEO_PLAFORM_FOLDER
}${
platformName
}
_gene_official.tsv |
grep
-v
OFFICIAL |
sort
-u
>
${
OUTPUT_FOLDER
}${
platformName
}
_genenames.tsv
fi
fi
done
# Refining the biomart matchings.
Rscript
--vanilla
${
CODE_FOLDER
}
create_probelists.R
>
${
OUTPUT_FOLDER
}
create_ps_log.out 2>
${
OUTPUT_FOLDER
}
create_ps_log.err
Rscript
--vanilla
${
CODE_FOLDER
}
match_probes.R
>
${
OUTPUT_FOLDER
}
match_log.out 2>
${
OUTPUT_FOLDER
}
match_log.err
# Moving the slurm log file to data
...
...
05-Get_DEGs/plot_biomarkers.sh
View file @
fe85f14c
#!/bin/bash -l
#SBATCH -J geneder:0
4
:plotbm
#SBATCH -J geneder:0
5
:plotbm
#SBATCH --mail-type=all
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
...
...
@@ -15,8 +15,8 @@ echo "== Submit dir. : ${SLURM_SUBMIT_DIR}"
echo
""
# Defining global parameters.
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/0
4
/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/0
4-Prepare_dataset
s/
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/0
5
/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/0
5-Get_DEG
s/
# Loading modules.
module load lang/R/3.6.0-foss-2019a-bare
...
...
1
6-Data_integration
_all
/Makefile
→
0
6-Data_integration/Makefile
View file @
fe85f14c
INPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/04/
RS_INPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/15/
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/16/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/16-Data_integration_all/
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/06/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/06-Data_integration/
SHELL
=
/bin/bash
clean
:
@
rm
-rf
*
~
clean_outputs
:
@
rm
-rf
${OUTPUT_FOLDER}
*
@
paste
<
(
cut
-f
-2
${RS_INPUT_FOLDER}
NBB_matchingdata_formatted.tsv |
head
-n
1
)
<
(
head
-n
1
${INPUT_FOLDER}
Combined_probe_matching.tsv
)
<
(
echo
"RNAseq_EG"
)
|
cut
-f
2,5-
>
${OUTPUT_FOLDER}
Combined_probe_matching.tsv
@
grep
-wv
genes
${INPUT_FOLDER}
Combined_probe_matching.tsv |
sort
-t
$$
'\t'
-k2
,2
>
${OUTPUT_FOLDER}
MA_combined_probe_matching.tsv
@
cut
-f
-2
${RS_INPUT_FOLDER}
NBB_matchingdata_formatted.tsv |
grep
-wv
SYMBOL |
awk
'{if (t[$$2]) {t[$$2]=t[$$2]"|"$$1} else {t[$$2]=$$1}} END{for (i in t) {if (i != "") {print i"\t"t[i]}}}'
|
sort
-t
$$
'\t'
-k1
,1
>
${OUTPUT_FOLDER}
RS_combined_probe_matching.tsv
@
join
-a
1
-a
2
-e
"NA"
-o
auto
-1
2
-2
1
-t
$$
'\t'
${OUTPUT_FOLDER}
MA_combined_probe_matching.tsv
${OUTPUT_FOLDER}
RS_combined_probe_matching.tsv |
cut
-f
1,3-
>>
${OUTPUT_FOLDER}
Combined_probe_matching.tsv
@
cp
-rf
${INPUT_FOLDER}
Combined_probe_matching.tsv
${OUTPUT_FOLDER}
Combined_probe_matching.tsv
summarize
:
@
sbatch
${CODE_FOLDER}
summarize.sh
integrate
:
...
...
1
6-Data_integration
_all
/README.md
→
0
6-Data_integration/README.md
View file @
fe85f14c
File moved
1
6-Data_integration
_all
/analyse.sh
→
0
6-Data_integration/analyse.sh
View file @
fe85f14c
#!/bin/bash -l
#SBATCH -J geneder:
1
6:analyse
#SBATCH -J geneder:
0
6:analyse
#SBATCH --mail-type=all
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
#SBATCH -n 4
#SBATCH --time=0-2:
30
:00
#SBATCH --time=0-2:
45
:00
#SBATCH -p batch
#SBATCH --qos=
qos-batch
#SBATCH --qos=
normal
echo
"== Starting run at
$(
date
)
"
echo
"== Job ID:
${
SLURM_JOBID
}
"
...
...
@@ -15,8 +15,8 @@ echo "== Submit dir. : ${SLURM_SUBMIT_DIR}"
echo
""
# Defining global parameters.
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/
1
6/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/
1
6-Data_integration
_all
/
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/
0
6/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/
0
6-Data_integration/
# Loading modules.
module load lang/R/3.6.0-foss-2019a-bare
...
...
1
6-Data_integration
_all
/analyse_integration_results.R
→
0
6-Data_integration/analyse_integration_results.R
View file @
fe85f14c
File moved
1
6-Data_integration
_all
/analyse_integration_results_figures.R
→
0
6-Data_integration/analyse_integration_results_figures.R
View file @
fe85f14c
File moved
1
6-Data_integration
_all
/check.sh
→
0
6-Data_integration/check.sh
View file @
fe85f14c
#!/bin/bash -l
#SBATCH -J geneder:
1
6:check
#SBATCH -J geneder:
0
6:check
#SBATCH --mail-type=all
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
#SBATCH -n 4
#SBATCH --time=0-0:0
3
:00
#SBATCH --time=0-0:0
5
:00
#SBATCH -p batch
#SBATCH --qos=
qos-batch
#SBATCH --qos=
normal
echo
"== Starting run at
$(
date
)
"
echo
"== Job ID:
${
SLURM_JOBID
}
"
...
...
@@ -15,8 +15,8 @@ echo "== Submit dir. : ${SLURM_SUBMIT_DIR}"
echo
""
# Defining global parameters.
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/
1
6/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/
1
6-Data_integration
_all
/
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/
0
6/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/
0
6-Data_integration/
# Loading modules.
module load lang/R/3.6.0-foss-2019a-bare
...
...
1
6-Data_integration
_all
/check_PD_enrichment.R
→
0
6-Data_integration/check_PD_enrichment.R
View file @
fe85f14c
File moved
1
6-Data_integration
_all
/compare_integrations.R
→
0
6-Data_integration/compare_integrations.R
View file @
fe85f14c
File moved
1
6-Data_integration
_all
/compute_dataset_overlap.R
→
0
6-Data_integration/compute_dataset_overlap.R
View file @
fe85f14c
File moved
1
6-Data_integration
_all
/compute_gender_specificity.R
→
0
6-Data_integration/compute_gender_specificity.R
View file @
fe85f14c
File moved
1
6-Data_integration
_all
/create_gene_expr_matrices.R
→
0
6-Data_integration/create_gene_expr_matrices.R
View file @
fe85f14c
File moved
1
6-Data_integration
_all
/create_gene_expr_matrices.sh
→
0
6-Data_integration/create_gene_expr_matrices.sh
View file @
fe85f14c
#!/bin/bash -l
#SBATCH -J geneder:
1
6:gexpr
#SBATCH -J geneder:
0
6:gexpr
#SBATCH --mail-type=all
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
#SBATCH -n 5
#SBATCH --time=0-00:
2
5:00
#SBATCH --time=0-00:
3
5:00
#SBATCH -p batch
#SBATCH --qos=
qos-batch
#SBATCH --qos=
normal
echo
"== Starting run at
$(
date
)
"
echo
"== Job ID:
${
SLURM_JOBID
}
"
...
...
@@ -15,8 +15,8 @@ echo "== Submit dir. : ${SLURM_SUBMIT_DIR}"
echo
""
# Defining global parameters.
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/
1
6/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/
1
6-Data_integration
_all
/
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/
0
6/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/
0
6-Data_integration/
# Loading modules.
module load lang/R/3.6.0-foss-2019a-bare
...
...
1
6-Data_integration
_all
/create_heatmaps.R
→
0
6-Data_integration/create_heatmaps.R
View file @
fe85f14c
File moved
1
6-Data_integration
_all
/create_heatmaps.sh
→
0
6-Data_integration/create_heatmaps.sh
View file @
fe85f14c
#!/bin/bash -l
#SBATCH -J geneder:
1
6:heatmaps
#SBATCH -J geneder:
0
6:heatmaps
#SBATCH --mail-type=all
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
#SBATCH -n 1
#SBATCH --time=0-0:0
5
:00
#SBATCH --time=0-0:
1
0:00
#SBATCH -p batch
#SBATCH --qos=
qos-batch
#SBATCH --qos=
normal
echo
"== Starting run at
$(
date
)
"
echo
"== Job ID:
${
SLURM_JOBID
}
"
...
...
@@ -15,8 +15,8 @@ echo "== Submit dir. : ${SLURM_SUBMIT_DIR}"
echo
""
# Defining global parameters.
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/
1
6/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/
1
6-Data_integration
_all
/
OUTPUT_FOLDER
=
/home/users/ltranchevent/Data/GeneDER/Analysis/
0
6/
CODE_FOLDER
=
/home/users/ltranchevent/Projects/GeneDER/Analysis/
0
6-Data_integration/
# Loading modules.
module load lang/R/3.6.0-foss-2019a-bare
...
...
1
6-Data_integration
_all
/create_individual_heatmaps.R
→
0
6-Data_integration/create_individual_heatmaps.R
View file @
fe85f14c
File moved
Prev
1
2
3
Next
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment