mv slurm* slurm_files/.
# Notes - for 2018_GDB data #
## Chapter XII - Work(s) in Progress
- This is where the brain doesn't know when to shut off, and we went on a tanget
- The question being - does flye or metaspades_hybrid allow for circulisation of the contigs, especially for binning
# checking how many contigs in each bin
cd /scratch/users/sbusi/ONT/cedric_ont_basecalling/results/Binning
# made a list with the following. excluded 'flye' 'cos bins are not real
for i in `cat list`
for file in "$i"/dastool_output/"$i"_DASTool_bins/*.fa
echo "$file"
grep -c '>' "$file"
done >> "$i"_bins_contigs
# None of the bins came from a single contig
# checking the circularity of the bins
cd /scratch/users/sbusi/ONT/cedric_ont_basecalling/
mkdir circular
cd circular
conda activate circlator
conda install canu
circlator all ../results/assembly/flye/lr/merged/barcode07/assembly.fna ../results/basecalled/merged/barcode07.fna flye_circlator_output
# failing in 10-hour interactive so submitted as bash script
sbatch -p bigmem -t 2-00:00:00 -N 1 -n 24 run_circlator.sh (OR) sbatch -p bigmem -t 0-16:00:00 -N 1 -n 24 run_circlator.sh # failed after a while, so going to manual methods below
# Checking graph with Bandage
conda activate bandage
Bandage info ../results/assembly/metaspades_hybrid/lr_barcode07-sr_NEB2_MG_S17/assembly_graph_with_scaffolds.gfa
# removing singletons == DOES NOT WORK yet..
cd /scratch/users/sbusi/ONT/cedric_ont_basecalling/circular
ml swenv/bioinfo-env/v0.1-20170602-production
ml swenv/default-env/v0.1-20170602-production
ml bio/BioPerl/1.7.1-foss-2017a-Perl-5.24.1
perl -0076 -ne ‘{chomp; m/(.*):(.*);/; print “>$_” if $1 ne $2};’ < /scratch/users/sbusi/ONT/cedric_ont_basecalling/results/assembly/metaspades_hybrid/lr_barcode07-sr_NEB2_MG_S17/assembly_graph.fastg > megahit_singleton_removed.fastg
##### GraphBin #####
- https://github.com/Vini2/GraphBin - for binning from graphs
- downloaded the "methylation-aware" flye assembly and .gfa files to desktop
- also downloaded the flye_maxbin_scaffolds2bin.tsv and saved as .csv
- working on the desktop (MAC)
cd ~/Documents/Nanopore_ONT
git clone https://github.com/Vini2/GraphBin.git
cd GraphBin/
conda env create -f environment.yml
conda activate graphbin
python graphbin.py --assembler flye --graph assembly_graph.gfa --paths assembly.fasta \
--binned flye_maxbin.scaffolds2bin.csv --output flye_methylation_graphbin --max_iteration 10
# downloaded the spades contigs.fasta, contigs.paths, assembly_graph_with_scaffolds.gfa, and the scaffolds2bin.tsv file from bwa_sr_metaspades_hybrid to desktop
cd ~/Documents/Nanopore_ONT/GraphBin/spades_input
conda activate graphbin
# removed the "maxbin_output." from the .tsv file and saved as .csv
cd /Users/susheel.busi/Documents/Nanopore_ONT/GraphBin
python graphbin.py --assembler spades --graph spades_input/assembly_graph_with_scaffolds.gfa \
--paths spades_input/contigs.paths --binned spades_input/bwa_sr_metaspades_hybrid_maxbin.scaffolds2bin.csv \
--output spades_graphbin_output
##### AGB #####
- Trying AGB: https://almiheenko.github.io/AGB/manual.html
cd /scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB
conda create -c almiheenko -c bioconda -n AGB agb
chmod -R 775 /home/users/sbusi/apps/miniconda3/envs/AGB/
conda activate AGB
agb.py -i methylation_aware_results/assembly/flye/lr/merged/no_barcode/ -a Flye -o flye_methylation_agb_output
1. Flye does not produce metabat bins. Reason == only 2 contigs (less than 1500 bp) have coverages over 1.
2. Should we adjust the "minCV" parameter for metabat?
3. Added the snakemake "--keep-going" or "-k" flag to the launcher to finish the independent jobs, and restarted
4. Need to fix checkm and gtdbktk runs
5. CheckM was throwing file missing erros, so did the following:
cp /home/users/sbusi/apps/checkm/* /home/users/sbusi/.checkm/.
6. Use "checkm data setRoot" to specify the location of CheckM database files. |
