... | ... | @@ -205,6 +205,70 @@ cd /scratch/users/sbusi/ONT/cedric_ont_basecalling |
|
|
snakemake -np -s [updated_SNAKEFILE](https://git-r3lab.uni.lu/susheel.busi/ont_pilot_gitlab/-/blob/checkpoint_snakefile/updated_SNAKEFILEurl)
|
|
|
```
|
|
|
|
|
|
## Chapter VII - Validating the analyses and workflows on a different machine
|
|
|
- In the wetlab, one repeats experiments, especially a protocol which is tested across several labs
|
|
|
- So too, in the computational world, it is ideal to validate the working nature of the workflow on a different machine
|
|
|
- In light of the this, we call this part of the story, "The Terminal Transfer"
|
|
|
|
|
|
##### Transfer to ESB #####
|
|
|
- Transferred the files to ESB to work there due to 'batch' and 'bigmem limitations
|
|
|
```
|
|
|
# Transferring the files from IRIS to esb-compute-01
|
|
|
cd /scratch/users/sbusi/ONT/cedric_ont_basecalling/
|
|
|
si10 # interactive session on IRIS for 10 hours, set in the bash_profile
|
|
|
rsync --rsh='ssh -p 8022' -avp MODULAR_SNAKEFILE susheel.busi@lcsb-cdc-esb-compute-01.uni.lux:/mnt/data/sbusi/
|
|
|
rsync --rsh='ssh -p 8022' -avp results susheel.busi@lcsb-cdc-esb-compute-01.uni.lux:/mnt/data/sbusi/MODULAR_SNAKEFILE/
|
|
|
rsync --rsh='ssh -p 8022' -avp /scratch/users/claczny/fecal_pilot_testing/data \
|
|
|
susheel.busi@lcsb-cdc-esb-compute-01.uni.lux:/mnt/data/sbusi/MODULAR_SNAKEFILE/.
|
|
|
rsync --rsh='ssh -p 8022' -avp /scratch/users/claczny/ont/fecal_pilot/data/raw \
|
|
|
susheel.busi@lcsb-cdc-esb-compute-01.uni.lux:/mnt/data/sbusi/MODULAR_SNAKEFILE/data/.
|
|
|
rsync --rsh='ssh -p 8022' -avp /scratch/users/claczny/ont/fecal_pilot_testing/dbs \
|
|
|
susheel.busi@lcsb-cdc-esb-compute-01.uni.lux:/mnt/data/sbusi/MODULAR_SNAKEFILE/.
|
|
|
rsync --rsh='ssh -p 8022' -avp /home/users/sbusi/apps/DAS_Tool-master/db \
|
|
|
susheel.busi@lcsb-cdc-esb-compute-01.uni.lux:/home/susheel.busi/databases/
|
|
|
rsync --rsh='ssh -p 8022' -avp /scratch/users/sbusi/ONT/cedric_ont_basecalling/metaT \
|
|
|
susheel.busi@lcsb-cdc-esb-compute-01.uni.lux:/mnt/data/sbusi/MODULAR_SNAKEFILE/
|
|
|
rsync --rsh='ssh -p 8022' -avp /home/users/sbusi/apps/db/gtdbtk/release89 \
|
|
|
susheel.busi@lcsb-cdc-esb-compute-01.uni.lux:/home/susheel.busi/databases/
|
|
|
rsync --rsh='ssh -p 8022' -avp /mnt/isilon/projects/ecosystem_biology/NOMIS/DIAMOND/new_nr.dmnd \
|
|
|
susheel.busi@lcsb-cdc-esb-compute-01.uni.lux:/mnt/data/sbusi/MODULAR_SNAKEFILE/dbs/
|
|
|
|
|
|
# on ESB - # This was a pain - have to download diamond, gtdbtk database, dastool database, paths etc
|
|
|
cd /mnt/data/sbusi/MODULAR_SNAKEFILE
|
|
|
|
|
|
mv /home/susheel.busi/databases/db /home/susheel.busi/databases/DASTool_db
|
|
|
mv /home/susheel.busi/databases/release89 /home/susheel.busi/databases/gtdbtk_release89
|
|
|
|
|
|
# edited the launcher files to remove the "cluster" part
|
|
|
cp src/snakemake_run_use_conda_FINAL.sh src/snakemake_run_on_ESB.sh # edited the 'snakemake_run_on_ESB.sh' file
|
|
|
|
|
|
cd /mnt/data/sbusi/MODULAR_SNAKEFILE
|
|
|
conda activate snakemake
|
|
|
./src/snakemake_run_on_ESB.sh (OR) snakemake -j 4 -p -s updated_SNAKEFILE --rerun-incomplete --use-cond
|
|
|
|
|
|
# Running into diamond database issues
|
|
|
- So extracted sequences and re-made the diamond database
|
|
|
- updated the config/CONFIG.yaml file
|
|
|
.snakemake/conda/4fa897e7/bin/diamond getseq -d /mnt/data/sbusi/MODULAR_SNAKEFILE/dbs/new_nr.dmnd | \
|
|
|
.snakemake/conda/4fa897e7/bin/diamond makedb -d /mnt/data/sbusi/MODULAR_SNAKEFILE/dbs/diamond_db.dmnd
|
|
|
|
|
|
# 'activate' was missing so exported the PATH as follows:
|
|
|
export PATH="/home/susheel.busi/miniconda3/bin:$PATH"
|
|
|
./src/snakemake_run_on_ESB.sh # to run the snakefile
|
|
|
grep 'steps' default.log # to check the progress quickly without having to 'less' the file each time
|
|
|
|
|
|
# Error in checkM databases. Missing!!
|
|
|
# setting up the checkM databaases
|
|
|
cd /home/susheel.busi/.checkm
|
|
|
curl -L -O curl -L -O https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz
|
|
|
tar xzf checkm_data_2015_01_16.tar.gz
|
|
|
/mnt/data/sbusi/MODULAR_SNAKEFILE/.snakemake/conda/0dfd8768/bin/checkm data setRoot /home/susheel.busi/.checkm/
|
|
|
|
|
|
# re-running only the Taxonomy part of the pipeline since everything up to DASTool was done
|
|
|
# did this by editing the config file
|
|
|
# Finally, everything ran beautifully - MACHINE VALIDATION COMPLETE!!
|
|
|
```
|
|
|
|
|
|
## Chapter IX - The miscellaneous or nearly-forgotten side projects
|
|
|
- Due to the multifaceted nature of the best, i.e. the modular workflow, we tested several aspects separately
|
|
|
- For example: since we used two mappers bwa-mem and minimap for the reads, we binned each sample separtely based on the mapper
|
... | ... | |