... | ... | @@ -565,15 +565,102 @@ mkdir slurm_files |
|
|
mv slurm* slurm_files/.
|
|
|
```
|
|
|
|
|
|
##############################
|
|
|
# Notes - for 2018_GDB data #
|
|
|
##############################
|
|
|
## Chapter XII - Work(s) in Progress
|
|
|
##### CIRCULATORY POTENTIAL #####
|
|
|
- This is where the brain doesn't know when to shut off, and we went on a tanget
|
|
|
- The question being - does flye or metaspades_hybrid allow for circulisation of the contigs, especially for binning
|
|
|
```
|
|
|
############
|
|
|
# CIRCULAR # "TO-DO"
|
|
|
############
|
|
|
# checking how many contigs in each bin
|
|
|
cd /scratch/users/sbusi/ONT/cedric_ont_basecalling/results/Binning
|
|
|
# made a list with the following. excluded 'flye' 'cos bins are not real
|
|
|
bwa_lr_metaspades
|
|
|
bwa_merged_metaspades
|
|
|
bwa_sr_metaspades
|
|
|
mmi_lr_metaspades
|
|
|
mmi_merged_metaspades
|
|
|
mmi_sr_metaspades
|
|
|
megahit
|
|
|
|
|
|
for i in `cat list`
|
|
|
do
|
|
|
for file in "$i"/dastool_output/"$i"_DASTool_bins/*.fa
|
|
|
do
|
|
|
echo "$file"
|
|
|
grep -c '>' "$file"
|
|
|
done >> "$i"_bins_contigs
|
|
|
done
|
|
|
# None of the bins came from a single contig
|
|
|
|
|
|
# checking the circularity of the bins
|
|
|
cd /scratch/users/sbusi/ONT/cedric_ont_basecalling/
|
|
|
mkdir circular
|
|
|
cd circular
|
|
|
|
|
|
conda activate circlator
|
|
|
conda install canu
|
|
|
circlator all ../results/assembly/flye/lr/merged/barcode07/assembly.fna ../results/basecalled/merged/barcode07.fna flye_circlator_output
|
|
|
|
|
|
# failing in 10-hour interactive so submitted as bash script
|
|
|
sbatch -p bigmem -t 2-00:00:00 -N 1 -n 24 run_circlator.sh (OR) sbatch -p bigmem -t 0-16:00:00 -N 1 -n 24 run_circlator.sh # failed after a while, so going to manual methods below
|
|
|
|
|
|
# Checking graph with Bandage
|
|
|
conda activate bandage
|
|
|
Bandage info ../results/assembly/metaspades_hybrid/lr_barcode07-sr_NEB2_MG_S17/assembly_graph_with_scaffolds.gfa
|
|
|
|
|
|
# removing singletons == DOES NOT WORK yet..
|
|
|
cd /scratch/users/sbusi/ONT/cedric_ont_basecalling/circular
|
|
|
ml swenv/bioinfo-env/v0.1-20170602-production
|
|
|
ml swenv/default-env/v0.1-20170602-production
|
|
|
ml bio/BioPerl/1.7.1-foss-2017a-Perl-5.24.1
|
|
|
|
|
|
perl -0076 -ne ‘{chomp; m/(.*):(.*);/; print “>$_” if $1 ne $2};’ < /scratch/users/sbusi/ONT/cedric_ont_basecalling/results/assembly/metaspades_hybrid/lr_barcode07-sr_NEB2_MG_S17/assembly_graph.fastg > megahit_singleton_removed.fastg
|
|
|
```
|
|
|
|
|
|
##### GraphBin #####
|
|
|
- https://github.com/Vini2/GraphBin - for binning from graphs
|
|
|
- downloaded the "methylation-aware" flye assembly and .gfa files to desktop
|
|
|
- also downloaded the flye_maxbin_scaffolds2bin.tsv and saved as .csv
|
|
|
- working on the desktop (MAC)
|
|
|
```
|
|
|
cd ~/Documents/Nanopore_ONT
|
|
|
git clone https://github.com/Vini2/GraphBin.git
|
|
|
cd GraphBin/
|
|
|
conda env create -f environment.yml
|
|
|
conda activate graphbin
|
|
|
|
|
|
python graphbin.py --assembler flye --graph assembly_graph.gfa --paths assembly.fasta \
|
|
|
--binned flye_maxbin.scaffolds2bin.csv --output flye_methylation_graphbin --max_iteration 10
|
|
|
|
|
|
# downloaded the spades contigs.fasta, contigs.paths, assembly_graph_with_scaffolds.gfa, and the scaffolds2bin.tsv file from bwa_sr_metaspades_hybrid to desktop
|
|
|
cd ~/Documents/Nanopore_ONT/GraphBin/spades_input
|
|
|
conda activate graphbin
|
|
|
|
|
|
# removed the "maxbin_output." from the .tsv file and saved as .csv
|
|
|
cd /Users/susheel.busi/Documents/Nanopore_ONT/GraphBin
|
|
|
python graphbin.py --assembler spades --graph spades_input/assembly_graph_with_scaffolds.gfa \
|
|
|
--paths spades_input/contigs.paths --binned spades_input/bwa_sr_metaspades_hybrid_maxbin.scaffolds2bin.csv \
|
|
|
--output spades_graphbin_output
|
|
|
```
|
|
|
|
|
|
##### AGB #####
|
|
|
- Trying AGB: https://almiheenko.github.io/AGB/manual.html
|
|
|
```
|
|
|
cd /scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB
|
|
|
conda create -c almiheenko -c bioconda -n AGB agb
|
|
|
chmod -R 775 /home/users/sbusi/apps/miniconda3/envs/AGB/
|
|
|
|
|
|
si
|
|
|
conda activate AGB
|
|
|
agb.py -i methylation_aware_results/assembly/flye/lr/merged/no_barcode/ -a Flye -o flye_methylation_agb_output
|
|
|
```
|
|
|
|
|
|
##### Notes - for 2018_GDB data #####
|
|
|
1. Flye does not produce metabat bins. Reason == only 2 contigs (less than 1500 bp) have coverages over 1.
|
|
|
2. Should we adjust the "minCV" parameter for metabat?
|
|
|
3. Added the snakemake "--keep-going" or "-k" flag to the launcher to finish the independent jobs, and restarted
|
|
|
4. Need to fix checkm and gtdbktk runs
|
|
|
5. CheckM was throwing file missing erros, so did the following:
|
|
|
```
|
|
|
cp /home/users/sbusi/apps/checkm/* /home/users/sbusi/.checkm/.
|
|
|
```
|
|
|
5. CheckM was throwing file missing erros, so did the following: "cp /home/users/sbusi/apps/checkm/* /home/users/sbusi/.checkm/."
|
|
|
6. Use "checkm data setRoot" to specify the location of CheckM database files. |
|
|
\ No newline at end of file |