#################################################################
# 
#  PERFORMANCE TESTS FOR VARIOUS INPUT METHODS.
#
#  We'll test uncompressed data, gzip, bgzip, and Bam files.
#  Each will be tried as a file, and as the 3 types of stdin:
#  redirects, pipes, and fifos.
#  
#  We'll run each one twice in a row to check for improvement 
#  due to a cache warm up.
#
#################################################################


BT=${BT-../../bin/bedtools}


#################################################################
#  Start by generating data, if desired
#  
#################################################################
if true; then
  echo "generating data..."
  mkdir perfData
  cd perfData
  ../$BT random -l 1000 -n 20000000 -g ../human.hg19.genome | sort -k1,1 -k2,2n > a10M.bed 
  ../$BT random -l 1000 -n 10000000 -g ../human.hg19.genome | sort -k1,1 -k2,2n > b10M.bed 
  cp a10M.bed a10M_gzipped.bed
  gzip a10M_gzipped.bed
  cp a10M.bed a10M_bgzipped.bed
  bgzip a10M_bgzipped.bed
  ../$BT bedtobam -i a10M.bed -g ../human.hg19.genome > a10M.bam
  cd ..
fi

###################################################################
#  Begin Tests
###################################################################
echo -e "Test 1 of 16...."
echo "Test 1: Intersect a10M, ten million records, uncompressed from file with b10M" > runLog.txt
runit $BT intersect -a perfData/a10M.bed -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 1 for possible cache speed up." >> runLog.txt
runit $BT intersect -a perfData/a10M.bed -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt

echo -e "Test 2 of 16...."
echo -e "\n\nTest 2: Intersect a10M, ten million records, gzipped from file with b10M" >> runLog.txt
runit $BT intersect -a perfData/a10M_gzipped.bed.gz -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 2 for possible cache speed up." >> runLog.txt
runit $BT intersect -a perfData/a10M_gzipped.bed.gz -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt

echo -e "Test 3 of 16...."
echo -e "\n\nTest 3: Intersect a10M, ten million records, bgzipped from file with b10M" >> runLog.txt
runit $BT intersect -a perfData/a10M_bgzipped.bed.gz -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 3 for possible cache speed up." >> runLog.txt
runit $BT intersect -a perfData/a10M_bgzipped.bed.gz -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt

echo -e "Test 4 of 16...."
echo -e "\n\nTest 4: Intersect a10M, ten million records, bam from file with b10M" >> runLog.txt
runit $BT intersect -a perfData/a10M.bam -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 4 for possible cache speed up." >> runLog.txt
runit $BT intersect -a perfData/a10M.bam -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt


echo -e "Test 5 of 16...."
echo -e "\n\nTest 5: Intersect a10M, ten million records, uncompressed from redirect with b10M" >> runLog.txt
runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M.bed 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 5 for possible cache speed up." >> runLog.txt
runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M.bed 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt

echo -e "Test 6 of 16...."
echo -e "\n\nTest 6: Intersect a10M, ten million records, gzipped from redirect with b10M" >> runLog.txt
runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M_gzipped.bed.gz 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 6 for possible cache speed up." >> runLog.txt
runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M_gzipped.bed.gz 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt

echo -e "Test 7 of 16...."
echo -e "\n\nTest 7: Intersect a10M, ten million records, bgzipped from redirect with b10M" >> runLog.txt
runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M_bgzipped.bed.gz 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 7 for possible cache speed up." >> runLog.txt
runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M_bgzipped.bed.gz 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt

echo -e "Test 8 of 16...."
echo -e "\n\nTest 8: Intersect a10M, ten million records, bam from redirect with b10M" >> runLog.txt
runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M.bam 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 8 for possible cache speed up." >> runLog.txt
runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M.bam 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt

echo -e "Test 9 of 16...."
echo -e "\n\nTest 9: Intersect a10M, ten million records, uncompressed from pipe with b10M" >> runLog.txt
cat perfData/a10M.bed | runit $BT intersect -a - -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 9 for possible cache speed up." >> runLog.txt
cat perfData/a10M.bed | runit $BT intersect -a - -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt

echo -e "Test 10 of 16...."
echo -e "\n\nTest 10: Intersect a10M, ten million records, gzip from pipe with b10M" >> runLog.txt
cat perfData/a10M_gzipped.bed.gz | runit $BT intersect -a - -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 10 for possible cache speed up." >> runLog.txt
cat perfData/a10M_gzipped.bed.gz | runit $BT intersect -a - -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt

echo -e "Test 11 of 16...."
echo -e "\n\nTest 11: Intersect a10M, ten million records, bgzip from pipe with b10M" >> runLog.txt
cat perfData/a10M_bgzipped.bed.gz | runit $BT intersect -a - -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 11 for possible cache speed up." >> runLog.txt
cat perfData/a10M_bgzipped.bed.gz | runit $BT intersect -a - -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt

echo -e "Test 12 of 16...."
echo -e "\n\nTest 12: Intersect a10M, ten million records, bam from pipe with b10M" >> runLog.txt
cat perfData/a10M.bam | runit $BT intersect -a - -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 12 for possible cache speed up." >> runLog.txt
cat perfData/a10M.bam | runit $BT intersect -a - -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt

echo -e "Test 13 of 16...."
echo -e "\n\nTest 13: Intersect a10M, ten million records, uncompressed from fifo with b10M" >> runLog.txt
runit $BT intersect -a <(cat perfData/a10M.bed) -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 13 for possible cache speed up." >> runLog.txt
runit $BT intersect -a <(cat perfData/a10M.bed) -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt

echo -e "Test 14 of 16...."
echo -e "\n\nTest 14: Intersect a10M, ten million records, gzipped from fifo with b10M" >> runLog.txt
runit $BT intersect -a <(cat perfData/a10M_gzipped.bed.gz) -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 14 for possible cache speed up." >> runLog.txt
runit $BT intersect -a <(cat perfData/a10M_gzipped.bed.gz) -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt

echo -e "Test 15 of 16...."
echo -e "\n\nTest 15: Intersect a10M, ten million records, bgzipped from fifo with b10M" >> runLog.txt
runit $BT intersect -a <(cat perfData/a10M_bgzipped.bed.gz) -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 15 for possible cache speed up." >> runLog.txt
runit $BT intersect -a <(cat perfData/a10M_bgzipped.bed.gz) -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt

echo -e "Test 16 of 16...."
echo -e "\n\nTest 16: Intersect a10M, ten million records, bam from fifo with b10M" >> runLog.txt
runit $BT intersect -a <(cat perfData/a10M.bam) -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt
echo -e "\nRe-do test 16 for possible cache speed up." >> runLog.txt
runit $BT intersect -a <(cat perfData/a10M.bam) -b perfData/b10M.bed -sorted  2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt


echo "Tests completed."
rm -rf perfData