#! /bin/bash -l
dbA=/work/users/ldenies/Toxin_HMM_database/ final_T3DB.faa
$cdhit -i $dbA -i2 $dbB -o clustered.DBs -c 0.9 -n 5 -G 0 -aS 0.70 -g 1 -s2 0.1 -S2 50000 -T $threads -M 10000
#output clustered.DBs contains all sequences in dbB that are not in dbA -> dbA keeps all sequences and the new file is non-overlapping
grep ">" clustered.DBs | wc -l
grep ">" $dbB | wc -l
