diff --git a/src/nekSandbox1/FormatGuess.h b/src/nekSandbox1/FormatGuess.h new file mode 100644 index 0000000000000000000000000000000000000000..05195002ccdcdfa3cbc49b476a2de67f3677a9dc --- /dev/null +++ b/src/nekSandbox1/FormatGuess.h @@ -0,0 +1,76 @@ +///* +// * FormatGuess.h +// * +// * Created on: Mar 11, 2013 +// * Author: nek3d +// */ +// +//#ifndef FORMATGUESS_H_ +//#define FORMATGUESS_H_ +// +//#include "PushBackStream.h" +// +//class FormatGuess +//{ +// std::string name; +//public: +// FormatGuess(const char* name) +// :name(name) +// {} +// const char* format() const +// { +// return name.c_str(); +// } +// virtual bool guess(PushBackStreamBuf* buf)=0; +//}; +// +//class VCFGuess:public FormatGuess +//{ +//public: +// VCFGuess() +// :FormatGuess("VCF") +// {} +// +// virtual bool guess(PushBackStreamBuf* buf) +// { +// const std::string fileformat("##fileformat="); +// bool is_vcf=true; +// std::ostringstream os; +// for(std::size_t i=0;i< fileformat.size();++i) +// { +// int c=buf->sbumpc(); +// if(c!=-1) os << (char)c; +// if(c!=fileformat[i]) {is_vcf=false;break;} +// } +// buf->push_back(os.str()); +// return is_vcf; +// } +//}; +// +// +//class XMLGuess:public FormatGuess +//{ +//public: +// +// XMLGuess() +// :FormatGuess("XML") +// {} +// +// virtual bool guess(PushBackStreamBuf* buf) +// { +// const std::string xmlheader("<?xml "); +// bool is_xml=true; +// std::ostringstream os; +// for(std::size_t i=0;i< xmlheader.size();++i) +// { +// int c=buf->sbumpc(); +// if(c!=-1) os << (char)c; +// if(c!=xmlheader[i]) {is_xml=false;break;} +// } +// buf->push_back(os.str()); +// return is_xml; +// } +//}; +// +// +//#endif /* FORMATGUESS_H_ */ diff --git a/src/nekSandbox1/Makefile b/src/nekSandbox1/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..a392a8edd9ec6735877e1b14d6ba16928b9e1c3b --- /dev/null +++ b/src/nekSandbox1/Makefile @@ -0,0 +1,40 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/FileRecordTools/ \ + -I$(UTILITIES_DIR)/FileRecordTools/FileReaders \ + -I$(UTILITIES_DIR)/FileRecordTools/Records \ + -I$(UTILITIES_DIR)/general \ + -I$(UTILITIES_DIR)/NewChromsweep \ + -I$(UTILITIES_DIR)/GenomeFile/ \ + -I$(UTILITIES_DIR)/BamTools/include + + + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= nekSandboxMain.cpp +OBJECTS= nekSandboxMain.o +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= nekSandbox1 + +all: $(BUILT_OBJECTS) + +.PHONY: all + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(DFLAGS) $(INCLUDES) + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/nekSandboxMain.o + +.PHONY: clean diff --git a/src/nekSandbox1/nekSandboxMain.cpp b/src/nekSandbox1/nekSandboxMain.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a2922f921aafced8fb74979185703e0ab8d2220e --- /dev/null +++ b/src/nekSandbox1/nekSandboxMain.cpp @@ -0,0 +1,247 @@ +using namespace std; + +#include "Context.h" +#include "FileRecordMgr.h" +#include <iostream> +#include <cstdio> +#include "RecordKeyList.h" +#include "NewChromsweep.h" +#include "DualQueue.h" +#include "ParseTools.h" +#include <sstream> +#include <iomanip> +//#include "FormatGuess.h" + +#include "PushBackStreamBuf.h" +#include "InflateStreamBuf.h" +#include "InputStreamMgr.h" +#include "BufferedStreamMgr.h" +//void doSweep(const Context *context); +//void testDualQueue(Context *context); +// +//void test_streams(); + + +using namespace std; + +int nek_sandbox1_main2(int argc,char** argv); + +int nek_sandbox1_main(int argc,char** argv) +{ + + if (argc < 2) { + cerr << "Error: Need one input file. Use \"-\" for stdin." << endl; + } + + + Context context; + context.addInputFile(argv[1]); + context.setSortedInput(true); +// context.setObeySplits(true); + + FileRecordMgr frm(0, &context); +// frm.getBlockMgr()->setBreakOnSkipOps(true); + if (!frm.open()) { + cerr << "Error: couldn't open file " << argv[1] << ". Exiting." << endl; + exit(1); + } + cout << "File Type is : " << frm.getFileType() << ", " << frm.getFileTypeName() << "." << endl; + cout << "RecordType is : " << frm.getRecordType() << ", " << frm.getRecordTypeName() << "." << endl; + + bool headerFound = false; + QuickString outbuf; + while (!frm.eof()) { + Record *record = frm.allocateAndGetNextRecord(); + if (!headerFound && frm.hasHeader()) { + cout << frm.getHeader() << endl; + headerFound = true; + } + if (record == NULL) { + break; + } + + if (record->getStartPos() == 90647945) { + printf("Breakpoint here.\n"); + } + outbuf.clear(); + record->print(outbuf); + printf("%s\n", outbuf.c_str()); + +// RecordKeyList recList(record); +// int blockCount = frm.getBlockMgr()->getBlocks(recList); +// printf("The %d blocks are:\n", blockCount); +// for (RecordKeyList::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) { +// iter->value()->print(); +// printf("\n"); +// } +// printf("\n\n"); +// frm.getBlockMgr()->deleteBlocks(recList); + + frm.deleteRecord(record); + } + cout << "Final header is: " << frm.getHeader() << endl; + frm.close(); + + return 0; +} + +//int nek_sandbox1_main2(int argc,char** argv) +//{ +// +// +// vector<FormatGuess *> formats; +// formats.push_back(new VCFGuess()); +// formats.push_back(new XMLGuess()); +// +// PushBackStreamBuf pbs(std::cin.rdbuf()); +// std::istream in(&pbs); +// const char* format=0; +// for(size_t i=0;i< 2;++i) +// { +// if( formats[i]->guess(&pbs)) +// { +// format=formats[i]->format(); +// break; +// } +// } +// std::string line; +// while(getline(in,line)) +// { +// if(format!=0) cout << format << "\t"; +// cout << line << endl; +// } +// return 0; +//} +// +#ifdef false +int nek_sandbox1_main(int argc, char **argv) { + + + + for (int i=0; i < argc; i ++) { + cout << "Arg " << i << " is: " << argv[i] << endl; + } + test_streams(); + return 0; + + ///////////////////////////////// + // + // BLOCK FOR DUEL QUEUE + // + if (argc < 2) { + cerr << "Error: need at least one file name." << endl; + return 1; + } + + Context *context = new Context(); + context->addInputFile(argv[1]); + testDualQueue(context); + return 0; + // + //////////////////////////////// + + + + + + ///////////////////////////////// + // + // BLOCK FOR SWEEP + // + + if (argc < 3) { + cerr << "Error: need at least two data file names." << endl; + return 1; + } + + Context *context = new Context(); + context->parseCmdArgs(argc, argv, 1); + + doSweep(context); + return 0; + // + //////////////////////////////// + +} + +void doSweep(const char *file1, const char *file2, const string &genomeFile) +{ + Context *context = new Context(); + context->addInputFile(file1); + context->addInputFile(file2); + context->openGenomeFile(genomeFile); + + ChromSweep sweep = ChromSweep(context); + + if (!sweep.init()) { + cerr << "ERROR: Failure to open files in jaccard's getIntersection method." << endl; + return; + } + + RecordKeyList hit_set; + while (sweep.next(hit_set)) { +// _intersectionVal += getTotalIntersection(&hit_set); + continue; + } + unsigned long unionVal = sweep.getQueryTotalRecordLength() + sweep.getDatabaseTotalRecordLength(); + cout << endl << endl << "Union value is: " << unionVal << endl << endl; +} + +void testDualQueue(Context *context) { + DualQueue<Record *, DualQueueAscending > dqAsc; +// DualQueue<Record *, DualQueueDescending> dqDesc; + + FileRecordMgr frm(context->getInputFileName(0), context); + frm.open(); + + printf("Original record order is:\n"); + while (!frm.eof()) { + Record *record = frm.allocateAndGetNextRecord(); + if (record == NULL) { + continue; + } +// printf("\n\nNext Record is:\n"); + record->print(); + printf("\n"); + dqAsc.push(record); +// dqDesc.push(record); + } + + printf("\nSupposedly ascending order is:\n"); + while (!dqAsc.empty()) { + const Record *record = dqAsc.top(); + dqAsc.pop(); + record->print(); + printf("\n"); + } + +// printf("\nSupposedly descending order is:\n"); +// while (!dqDesc.empty()) { +// const Record *record = dqDesc.top(); +// dqDesc.pop(); +// record->print(); +// printf("\n"); +// } + frm.close(); +} + +void test_streams() +{ + char myBuf[10]; + memset(myBuf, 0, 10); + + cin >> noskipws >> setw(9) >> myBuf; + + stringstream newStream; + newStream << "myBuf =:" << myBuf << endl; + + newStream << "Full stream was:" << endl; + newStream << myBuf; + newStream << cin.rdbuf(); + + + cout << newStream.str() << endl; + +} + +#endif //ifdef false diff --git a/src/regressTest/Makefile b/src/regressTest/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..526db6bdd21bae5e07950113e1a6bd3efb6bdbe8 --- /dev/null +++ b/src/regressTest/Makefile @@ -0,0 +1,48 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/Contexts/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/general/ \ + -I$(UTILITIES_DIR)/BamTools/include \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/FileRecordTools/ \ + -I$(UTILITIES_DIR)/FileRecordTools/FileReaders \ + -I$(UTILITIES_DIR)/FileRecordTools/Records \ + -I$(UTILITIES_DIR)/general + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +#SOURCES= regressTestMain.cpp RegressTest.cpp RegressTest.h +#OBJECTS= regressTestMain.o RegressTest.o +#BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= regressTest + + +all: #$(BUILT_OBJECTS) + @echo "compiling RegressTest.cpp" + @$(CXX) -c -o $(OBJ_DIR)/RegressTest.o RegressTest.cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + @echo "compiling regressTestMain.cpp" + @$(CXX) -c -o $(OBJ_DIR)/regressTestMain.o regressTestMain.cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +.PHONY: all + +#$(BUILT_OBJECTS): $(SOURCES) +# @echo " * compiling" $(*F).cpp +# @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/regressTestMain.o $(OBJ_DIR)/regressTest.o + +.PHONY: clean diff --git a/src/regressTest/RegressTest.cpp b/src/regressTest/RegressTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2612b8d450d42d036ffc84ed0fa8c8f5e54f1866 --- /dev/null +++ b/src/regressTest/RegressTest.cpp @@ -0,0 +1,664 @@ +#include "RegressTest.h" +#include <cstdlib> +#include "SingleLineDelimTextFileReader.h" +#include "BufferedStreamMgr.h" +#include "lineFileUtilities.h" +#include "ParseTools.h" +#include <sys/stat.h> +#include <ctime> +#include "Context.h" + +const string RegressTest::_hardOptsCmd = "HARD_OPTIONS"; +const string RegressTest::_correctCmd = "CORRECT"; +const string RegressTest::_performCmd = "PERFORM"; +const string RegressTest::_randomCmd = "RANDOM"; + +const string RegressTest::_space = " "; +const string RegressTest::_redirect = " > "; +const string RegressTest::_devNull = " /dev/null "; +const string RegressTest::_bedOpsCmd = "/home/nek3d/testWorkspace/bedops/bin/bedmap --echo --echo-map --bp-ovr 1 "; + + +RegressTest::RegressTest() +: _filesPerRun(0), + _generatedFileNumber(1), + _isOldProgBedops(false) +{ + _correctFiles = new fileListType(); + _performFiles = new fileListType(); +} + +RegressTest::~RegressTest() +{ + + delete _correctFiles; + _correctFiles = NULL; + + delete _performFiles; + _performFiles = NULL; +} + +bool RegressTest::init(int argc, char **argv) +{ + //TBD: Perhaps allow option for specifying report file. + _fpReportFile = stdout; + + if (!parseParams(argc, argv)) { + return false; + } + + //make banner title for report + fprintf(_fpReportFile, "\n\n***********************************************************\n\n"); + fprintf(_fpReportFile, "REGRESSION TEST FOR BEDTOOLS SUB-PROGRAM: %s\n", _subProgram.c_str()); + fprintf(_fpReportFile, "\n***********************************************************\n\n"); + + + //setup output directory for testing's temporary files and generated data files. + _tmpDirname = "tempTesting_"; + time_t currTime = time(NULL); + string timeStr = ctime(&currTime); //timeStr will equal Www Mmm dd hh:mm:ss yyyy followed by newline and null char. + //chomp the newLine + timeStr.erase(timeStr.size()-1); + //adjust timeStr to change spaces to underscores. + for (int i=0; i < (int)timeStr.size(); i++) { + if (isspace(timeStr[i])) { + timeStr[i] = '_'; + } else if (timeStr[i] == ':') { + timeStr[i] = '-'; + } + } + _tmpDirname += timeStr; + int mkdirRetval = mkdir(_tmpDirname.c_str(), S_IRWXU | S_IRWXG | S_IRWXO ); //mkdir directory with all permissions allowed. + if (mkdirRetval != 0) { + fprintf(stderr, "Error: Unable to create temporary output directory %s.\n", _tmpDirname.c_str()); + return false; + } + _tmpDirname += "/"; + + _memoryLogfilename = _tmpDirname + "memoryLog.txt"; + + _userName = getenv("USER"); + + if ((int)_filePrecessorOptions.size() != _filesPerRun) { + fprintf(stderr, "Error: Sub-program specific filesPerRun number must match number of precessor options.\n"); + return false; + } + + if (!config()) { + return false; + } + + if ((int)_correctFiles->size() % _filesPerRun != 0) { + fprintf(stderr, "Error: number of correctness files is not evenly divisible by number of files per run.\n"); + return false; + } + + if ((int)_performFiles->size() % _filesPerRun != 0) { + fprintf(stderr, "Error: number of performance files is not evenly divisible by number of files per run.\n"); + return false; + } + + return true; +} + +bool RegressTest::runTests() { + echoOptions(); + + //All set with set-up. Let's work some mojo. + if (!performTests(true)) { + fprintf(stderr, "Error: Failure in correctness tests.\n"); + return false; + } + + if (!performTests(false)) { + fprintf(stderr, "Error: Failure in performance tests.\n"); + return false; + } + + return true; + +} + +bool RegressTest::parseParams(int argc, char **argv) +{ + //usage: bedtools devtest sub-prog targetVersion configFile [optionsToTest] + if (argc < 5) { + usage(); + return false; + } + _newVersion = argv[0]; + _subProgram = argv[2]; + + _oldVersion = argv[3]; + if (_oldVersion.find("bedops") != string::npos) { + _isOldProgBedops = true; + } + + + _configFilename = argv[4]; + + //special: add a blank option to the softOptions, so that performTests will compare + //runs with each soft opt to a run with no soft ops. + _softOptions.push_back(""); + for (int i=5; i < argc; i++ ) { + _softOptions.push_back(argv[i]); + } + return true; +} + +void RegressTest::usage() const { + fprintf(stderr, "Usage: bedtools regresstest sub-prog targetVersion configFile [optionsToTest]\n"); +} + +void RegressTest::echoOptions() const +{ + //show all command line and config file options and files + fprintf(_fpReportFile, "\nCONFIGURATION AND OPTIONS ARE:\n\n"); + fprintf(_fpReportFile, "New Version: %s\n", _newVersion.c_str()); + fprintf(_fpReportFile, "Old Version: %s\n\n", _oldVersion.c_str()); + fprintf(_fpReportFile, "hardOptions: %s\n\n", _hardOptions.c_str()); + + for (int i=1; i < (int)_softOptions.size(); i++) { + fprintf(_fpReportFile, "SoftOption %d: %s\n", i, _softOptions[i].c_str()); + } + + fprintf(_fpReportFile, "\nFiles per run: %d\n", _filesPerRun); + for (int i=0; i < (int)_filePrecessorOptions.size(); i++) { + fprintf(_fpReportFile, "Precessor option %d: %s\n", i+1, _filePrecessorOptions[i].c_str()); + } + + for (int i=0; i < 2; i++) { + fileListType *fileList = i == 0 ? _correctFiles : _performFiles; + fprintf(_fpReportFile, "\n\n\%s TEST FILES:\n", i ==0 ? "CORRECTNESS" : "PERFORMANCE"); + for (fileListType::const_iterator iter = fileList->begin(); iter != fileList->end(); iter++) { + + const string &filename = iter->first; + const string &desc = iter->second; + fprintf(_fpReportFile, "\nFilename: %s\n", filename.c_str()); + fprintf(_fpReportFile, "Description: %s\n", desc.c_str()); + } + } +} + +bool RegressTest::config() { + //Set hard options, populate correctness and performance file vectors by reading a config file + + _configFile.open(_configFilename.c_str()); + if (!_configFile.good()) { + cerr << "Error: unable to open config file " << _configFilename << endl; + } + vector<string> fields; + bool parseStatus = false; + int numLinesToRead = 0; + int linesRead = 0; + string sLine; + while (!_configFile.eof()) { + sLine.clear(); + getline(_configFile, sLine); + fields.clear(); + + Tokenize(sLine, fields); + if (fields.size() != 2) { + continue; + } + + const string &field1 = fields[0]; + const string &field2 = fields[1]; + + if (field1 == _hardOptsCmd) { + _hardOptions = field2.c_str(); + continue; + } else if (field1 == _correctCmd || field1 == _performCmd) { + numLinesToRead = atoi(field2.c_str()); + if (field1 == _correctCmd) { + parseStatus = parseConfigLines(numLinesToRead, true); + } else { + parseStatus = parseConfigLines(numLinesToRead, false); + } + if (!parseStatus) { + fprintf(stderr, "Error: failed to read and parse requested %d lines for %s\n", + numLinesToRead, field1 == _correctCmd ? "Correctness tests" : "Performance tests"); + _configFile.close(); + return false; + } else { + linesRead += numLinesToRead; + } + } else { + fprintf(stderr, "Error: Malformed config file %s\n.\tCheck that num files specified matches num provided.\n", _configFilename.c_str()); + _configFile.close(); + return false; + } + } + if (linesRead == 0) { + fprintf(stderr, "Error: No file lines read in config file %s\n", _configFilename.c_str()); + _configFile.close(); + return false; + } + + _configFile.close(); + return true; +} + + + +bool RegressTest::parseConfigLines(int numLinesToRead, bool correctnessFiles) +{ + string description; + + + fileListType *fileList = correctnessFiles ? _correctFiles : _performFiles; + string sLine; + vector<string> fields; + for (int i=0; i < numLinesToRead; i++) { + + sLine.clear(); + getline(_configFile, sLine); + fields.clear(); + + Tokenize(sLine, fields); + if (fields.size() != 2) { + continue; + } + + const string &field1 = fields[0]; + const string &field2 = fields[1]; + + if (field1 == _randomCmd) { + string genFilename; + if (!generateRandomFile(field2.c_str(), genFilename)) { + fprintf(stderr, "Error: could not generate random file with args: %s\n", field2.c_str()); + return false; + } + description = _randomCmd + _space + field2.c_str(); + fileList->push_back(make_pair(genFilename, description)); + } else { + fileList->push_back(make_pair(field1.c_str(), field2.c_str())); + } + } + return true; +} + +bool RegressTest::generateRandomFile(const string & randomArgs, string &filename) +{ + if (_generatedFileNumber == 1) { //first call, print banner to report + fprintf(_fpReportFile, "\nGENERATING TEST DATA:\n\n"); + } + + + string genFilename = _tmpDirname + "generatedFile_"; + string strNum; + int2str(_generatedFileNumber, strNum); + _generatedFileNumber++; + + genFilename += strNum; + genFilename += ".bed"; + + //quick hack since v2.18 random currently doesn't support the -sort option in the random command + string genFileCmd = "~/testWorkspace/pfm3release/bin/bedtools"; //_newVersion; + genFileCmd += " random "; + string sortCmd = ""; //" | sort -k1,1 -nk2,2 "; Trouble getting pipes to work. Use -sorted command from now. + genFileCmd += randomArgs + sortCmd + _redirect + genFilename; + + fprintf(_fpReportFile, "Creating data file %s with random args %s...\n", genFilename.c_str(), randomArgs.c_str()); + if (system(genFileCmd.c_str()) != 0) { //expects successful calls to randomBed to return zero. + fprintf(_fpReportFile, "FAILED.\n"); + filename.clear(); + return false; + } + filename = genFilename; + fprintf(_fpReportFile, "Done.\n"); + return true; +} + +bool RegressTest::performTests(bool isCorrectnessTest) +{ + bool retval = true; + fileListType *fileList = isCorrectnessTest ? _correctFiles : _performFiles; + if (fileList->empty()) { + return true; + } + string inFilesCmd; + string baseCmd; + string allOptsCmd; + string finalCmd; + string oldVerionFinalCmd; + string newVersionFinalCmd; + + int outFileCounter = 1; + string counterStr; + string outputBase = "tmp"; + string outputSuffix = ".txt"; + string outputFile = "/dev/null"; // /dev/null will be used for performance, but over-written for correctness + + + string bedOpsFileCmd; //special handling for bedops + + string testType = isCorrectnessTest ? "CORRECTNESS" : "PERFORMANCE"; + int testCounter = 0; + fprintf(_fpReportFile, "\n\n\n***********************************************************\n\n"); + fprintf(_fpReportFile, "TESTS FOR %s\n", testType.c_str()); + fprintf(_fpReportFile, "\n***********************************************************\n"); + + vector< fileListType::const_iterator> currFiles; + for (fileListType::const_iterator fileIter = fileList->begin(); fileIter != fileList->end(); fileIter += _filesPerRun) { + currFiles.clear(); + string inFilesCmd = _space; + for (int i=0; i < _filesPerRun; i++) { + const string &infileName = (fileIter + i)->first; + inFilesCmd += _filePrecessorOptions[i] + _space + infileName + _space; + currFiles.push_back(fileIter + i); + } + if (_isOldProgBedops) { + bedOpsFileCmd = fileIter->first + _space + (fileIter + 1)->first + _space; + } + for (int i=0; i < (int)_softOptions.size(); i++ ) { + const string &softOpt = _softOptions[i]; + + //stop to report what test we're going to do. + testCounter++; + fprintf(_fpReportFile, "\n%s TEST %d\n\n", testType.c_str(), testCounter); + fprintf(_fpReportFile, "\tInput Files:\n"); + for (int j=0; j < (int)currFiles.size(); j++) { + fprintf(_fpReportFile, "\tName: %s\n", currFiles[j]->first.c_str()); + fprintf(_fpReportFile, "\tDesc: %s\n\n", currFiles[j]->second.c_str()); + } + fprintf(_fpReportFile, "\tSoft option in use: %s\n", softOpt.empty() ? "NONE" : softOpt.c_str()); + fprintf(_fpReportFile, "\n\tCommands to run:\n"); + fileListType cmdAndOutput; + for (int j=0; j < 2; j++) { //loop through the two versions, old and new + const string &mainProg = j == 0 ? _oldVersion : _newVersion; + if (j==1 || !_isOldProgBedops) { + baseCmd = mainProg + _space + _subProgram + _space + inFilesCmd + _hardOptions + _space + softOpt + _redirect; + } else { + baseCmd = /*mainProg + _space + */ _bedOpsCmd + bedOpsFileCmd + _redirect; + } + + //now we just need tmp output for correctness, or clock and memory footprinting for performance + if (isCorrectnessTest) { + int2str(outFileCounter, counterStr); + outFileCounter++; + outputFile = _tmpDirname + outputBase + counterStr + outputSuffix; + } + + finalCmd = baseCmd + outputFile; + fprintf(_fpReportFile, "\t%s\n", finalCmd.c_str()); + cmdAndOutput.push_back(make_pair(finalCmd, outputFile)); + } + if (isCorrectnessTest) { + bool compStatus = executeAndCompareCorrectness(cmdAndOutput); + retval = retval && compStatus; + } else { + bool compStatus = executeAndComparePerformance(cmdAndOutput); + retval = retval && compStatus; + } + } + } + + return retval; + +} + +bool RegressTest::executeAndCompareCorrectness(const fileListType &fileList) { + const string &cmd1 =fileList[0].first; + const string &output1 = fileList[0].second; + const string &cmd2 = fileList[1].first; + const string &output2 = fileList[1].second; + +// printf("\nRun correctness test: %s\n%s\n%s\n%s\n", cmd1.c_str(), output1.c_str(), cmd2.c_str(), output2.c_str()); +// return true; + + int ret1 = system(cmd1.c_str()); + if (ret1 !=0) { + fprintf(stderr, "\nError: received non-zero exit code %d from old version.\n", ret1); + return false; + } + + int ret2 = system(cmd2.c_str()); + + if (ret2 !=0) { + fprintf(stderr, "\nError: received non-zero exit code %d from new version.\n", ret2); + return false; + } + + //Here: implement a way to test differences in output. In the future, may wish to actually read the records + //and determine equivalence, i.e. same value for core fields chrName, start, end, name, score, strand. + //Right now, just use diff command, ensure output of diff is empty. + + string diffFilename = _tmpDirname + "diffOut.txt"; + string diffCmd = "diff "; + diffCmd += output1 + _space + output2 + _redirect + diffFilename; + + system(diffCmd.c_str()); + + //now need to check for empty diffFile. + struct stat buf ; + int i; + + i = stat(diffFilename.c_str(), &buf); + if (i!=0) { + fprintf(stderr, "Error: can't get status of diff output file %s\n", diffFilename.c_str()) ; + return false; + } + + if (buf.st_size > 0) { + fprintf(_fpReportFile, "\n\tFAILED. Output files are different.\n"); + return true; + } + + fprintf(_fpReportFile, "\n\tPASSED. Output files are identical.\n"); + return true; +} + +bool RegressTest::executeAndComparePerformance(const fileListType &fileList) { + const string &cmd1 =fileList[0].first; +// const string &output1 = fileList[0].second; + const string &cmd2 = fileList[1].first; +// const string &output2 = fileList[1].second; + +// printf("\nRun performance test: %s\n%s\n%s\n%s\n", cmd1.c_str(), output1.c_str(), cmd2.c_str(), output2.c_str()); +// return true; + + bool cmd1IsBedops = cmd1.find("bedops") != string::npos; + bool cmd2IsBedops = cmd2.find("bedops") != string::npos; + + char timeBuf[100]; //be sure to initialize before use. + //run and time old version + fprintf(_fpReportFile, "\n\tRunning first command...\n"); + + + startMemoryProfile(cmd1IsBedops); + + time_t oldStartTime = time(NULL); + int ret1 = system(cmd1.c_str()); + if (ret1 !=0) { + fprintf(stderr, "\nError: received non-zero exit code %d from old version.\n", ret1); + return false; + } + time_t oldEndTime = time(NULL); + time_t oldRunTime = oldEndTime - oldStartTime; + struct tm *oldRunTimeInfo = gmtime(&oldRunTime); + memset(timeBuf, 0, 100); + strftime(timeBuf, 100, "%X", oldRunTimeInfo); + fprintf(_fpReportFile, "\tDone. Elaspsed time for old version was %s.\n", timeBuf); + + endMemoryProfile(); + + calcMemoryStats(); + + + + //run and time new version + fprintf(_fpReportFile, "\n\tRunning second command...\n"); + + startMemoryProfile(cmd2IsBedops); + + time_t newStartTime = time(NULL); + int ret2 = system(cmd2.c_str()); + if (ret2 !=0) { + fprintf(stderr, "\nError: received non-zero exit code %d from new version.\n", ret2); + return false; + } + time_t newEndTime = time(NULL); + time_t newRunTime = newEndTime - newStartTime; + struct tm *newRunTimeInfo = gmtime(&newRunTime); + memset(timeBuf, 0, 100); + strftime(timeBuf, 100, "%X", newRunTimeInfo); + fprintf(_fpReportFile, "\tDone. Elaspsed time for new version was %s.\n", timeBuf); + + endMemoryProfile(); + + calcMemoryStats(); + + if (newRunTime < oldRunTime) { //new version is faster + time_t diffTime = oldRunTime - newRunTime; + struct tm *diffRunTimeInfo = gmtime(&diffTime); + memset(timeBuf, 0, 100); + strftime(timeBuf, 100, "%X", diffRunTimeInfo); + + fprintf(_fpReportFile, "\n\tPASSED. New version is faster by %s", timeBuf); + + if (newRunTime > 0) { + fprintf(_fpReportFile, ", or %7.2fx faster\n", (float)oldRunTime/(float)newRunTime); + } else { + fprintf(_fpReportFile, ", or ...WAY FASTER.\n"); + } + + } else if (oldRunTime < newRunTime) { //old version is faster + time_t diffTime = newRunTime - oldRunTime; + struct tm *diffRunTimeInfo = gmtime(&diffTime); + + memset(timeBuf, 0, 100); + strftime(timeBuf, 100, "%X", diffRunTimeInfo); + fprintf(_fpReportFile, "\n\tFAILED. old version is faster by %s", timeBuf); + + if (oldRunTime > 0) { + fprintf(_fpReportFile, ", or %7.2fx faster\n", (float)newRunTime/(float)oldRunTime); + } else { + fprintf(_fpReportFile, ", or ...WAY FASTER.\n"); + } + + } else { //run times were same. + fprintf(_fpReportFile, "\n\tMEH. No difference in speed.\n"); + } + + return true; +} + + +bool RegressTest::startMemoryProfile(bool isBedops) +{ + //kick off ps as a background process to monitor memory usage + string pidFilename = _tmpDirname + "pidFile.txt"; + string psCmd; + if (!isBedops) { + psCmd = "while [ 1 ] ; do ps au | grep bedtools | grep "; + } else { + psCmd = "while [ 1 ] ; do ps au | grep bedops | grep "; + } + psCmd += _userName; + psCmd += " | grep -v grep | grep -v regresstest | grep -v \"sh -c\" >> "; + psCmd += _memoryLogfilename; + psCmd += " ; sleep .5 ; done & echo $! > "; + psCmd += _tmpDirname + "pidFile.txt"; + + if (system(psCmd.c_str()) != 0) { + fprintf(stderr, "Error: unable to launch top as background process for memory profiling.\n"); + return false; + } + + // open and parse the pidFile to get the process id of the memory profiling background process. + FILE *fp = fopen(pidFilename.c_str(), "r"); + char sLine[4192]; + memset(sLine, 0, 4192); + fgets(sLine, 4192, fp); + fclose(fp); + + _currPidOfMemoryLogging = sLine; + return true; +} + +bool RegressTest::endMemoryProfile() +{ + + string killCmd = "kill -9 "; + killCmd += _currPidOfMemoryLogging; + if (system(killCmd.c_str()) != 0) { + fprintf(stderr, "Error: failed to kill process id %s\n", _currPidOfMemoryLogging.c_str()); + return false; + } + + return true; +} + +bool RegressTest::calcMemoryStats() +{ + //read and parse the memory log file, calc basic stats: max, mean, median. + FILE *fp = fopen(_memoryLogfilename.c_str(), "r"); + if (fp == NULL) { + fprintf(stderr, "Error: Unable to read memory profile log %s.\n", _memoryLogfilename.c_str()); + return false; + } + + char sLine[4192]; + int numValidLines=0; + char field1[2048]; + char field2[2048]; + char field3[2048]; + char field4[2048]; + char field5[2048]; + + int totalMem= 0; + int currMem = 0; + int maxMem = 0; + while(!feof(fp)) { + memset(sLine, 0, 4192); + memset(field1, 0, 2048); + memset(field2, 0, 2048); + memset(field3, 0, 2048); + memset(field4, 0, 2048); + memset(field5, 0, 2048); + + currMem = 0; + fgets(sLine, 4192, fp); + int len = strlen(sLine); + bool isValidLine = false; + for (int i=0; i < len; i++) { + if (!isspace(sLine[i])) { + isValidLine = true; + break; + } + } + if (isValidLine) { + numValidLines++; + sscanf(sLine, "%s %s %s %s %s", field1, field2, field3, field4, field5); + + //now field 5 has the number of kilobytes the process used at that moment. + currMem = atoi(field5); + if (currMem == 0) { + //atoi failed. something wrong with input + fprintf(stderr, "Error: bad field where memory usage expected: %s.\n", field5); + fclose(fp); + return false; + } + maxMem = max(currMem, maxMem); + totalMem += currMem; + } + } + + int avgMem = numValidLines > 0 ? totalMem / numValidLines : -1; + + fclose(fp); + if (avgMem > 0) { + fprintf(_fpReportFile, "\tMemory uage: max = %dkb\tavg = %dkb\n", maxMem, avgMem); + } else { + fprintf(_fpReportFile, "\tMemory usage: TOO FAST TO PROFILE!\n"); + } + + //cleanup: erase memoryLogfile, as we wish to append to a blank file when next used. + if (remove(_memoryLogfilename.c_str()) != 0) { + fprintf(stderr, "Error: couldn't delete old memory logfile %s.\n", _memoryLogfilename.c_str()); + return false; + } + + return true; +} diff --git a/src/regressTest/RegressTest.h b/src/regressTest/RegressTest.h new file mode 100644 index 0000000000000000000000000000000000000000..93064dc097598c1166d91722af2b170f3c7848a6 --- /dev/null +++ b/src/regressTest/RegressTest.h @@ -0,0 +1,93 @@ +/* + * RegressTest.h + * + * Created on: Dec 21, 2012 + * Author: nek3d + */ + +#ifndef REGRESSTEST_H_ +#define REGRESSTEST_H_ + +using namespace std; + +#include <string> +#include <cstdio> +#include <vector> +#include <fstream> +class SingleLineDelimTextFileReader; +class BufferedStreamMgr; + +class RegressTest { +public: + RegressTest(); + ~RegressTest(); + bool init(int argc, char **argv); + + void setFilesPerRun(int numFiles) { _filesPerRun = numFiles; } + void addFilePrecessorOption(const char *option) { _filePrecessorOptions.push_back(option); } + bool runTests(); + + + +protected: + string _configFilename; + string _newVersion; + string _subProgram; + string _oldVersion; + + string _hardOptions; + vector<string> _softOptions; + + int _filesPerRun; + vector<string> _filePrecessorOptions; + + FILE *_fpReportFile; + ifstream _configFile; + + typedef vector<pair<string, string> > fileListType; + fileListType *_correctFiles; //list of files for correctness tests. First in pair is file name, second is description. + fileListType *_performFiles; //list of files for performance tests. First in pair is file name, second is description. + + //config file key words + static const string _hardOptsCmd; + static const string _correctCmd; + static const string _performCmd; + static const string _randomCmd; + + //useful strings to have for building command strings + static const string _space; + static const string _redirect; + static const string _devNull; + static const string _bedOpsCmd; + + int _generatedFileNumber; // a "tag" to give generatedFiles. + + string _tmpDirname; + string _memoryLogfilename; + string _currPidOfMemoryLogging; + string _userName; + + //Special: Since RegressTest was originally designed only for testing bedtools against prior versions + // of itself, some hacking is needed to make it play with bedops. + bool _isOldProgBedops; + + + bool parseParams(int argc, char **argv); + void usage() const; + void echoOptions() const; + bool config(); + bool parseConfigLines(int numLinesToRead, bool correctnessFiles); + bool generateRandomFile(const string & randomArgs, string &filename); + + bool performTests(bool correctness); //pass true for correctness, false for performance. + bool executeAndCompareCorrectness(const fileListType &cmdAndOutput); + bool executeAndComparePerformance(const fileListType &cmdAndOutput); + + bool startMemoryProfile(bool isBedops); + bool endMemoryProfile(); + bool calcMemoryStats(); + +}; + + +#endif /* REGRESSTEST_H_ */ diff --git a/src/regressTest/regressTestMain.cpp b/src/regressTest/regressTestMain.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0377ca667fa715e72d5986a018132e76e9a406a0 --- /dev/null +++ b/src/regressTest/regressTestMain.cpp @@ -0,0 +1,67 @@ + +#include "RegressTest.h" +#include <cstring> +#include <cstdlib> +#include <cstdio> +#include "QuickString.h" + +void usage() { + printf("Usage: bedtools regressTest sub-prog targetVersion configFile [optionsToTest]\n"); +} + +void setOneWayOptions(RegressTest *regressTest) { + //Use this for programs whose main arguments are two files, preceded by -a and -b, respectively. + + regressTest->setFilesPerRun(1); + regressTest->addFilePrecessorOption("-i"); +} + +void setTwoWayOptions(RegressTest *regressTest) { + //Use this for programs whose main arguments are two files, preceded by -a and -b, respectively. + + regressTest->setFilesPerRun(2); + regressTest->addFilePrecessorOption("-a"); + regressTest->addFilePrecessorOption("-b"); +} + +int regress_test_main(int argc, char **argv) { + + //usage: bedtools regressTest sub-prog targetVersion [optionsToTest] + if (argc < 5) { + usage(); + exit(1); + } + QuickString program(argv[2]); + + RegressTest *regressTest = new RegressTest(); + + //set specific options for each sub-program + if (program == "intersect") { + setTwoWayOptions(regressTest); + } else if (program == "jaccard") { + setTwoWayOptions(regressTest); + } else if (program == "merge") { + setOneWayOptions(regressTest); + } else { + //TBD: Handle all other programs eventually + fprintf(stderr, "Sorry, sub-program %s is not yet supported.\n", argv[2]); + delete regressTest; + exit(1); + } + + if (!regressTest->init(argc, argv)) { + fprintf(stderr, "Error: could not initialize tests for %s.\n", argv[2]); + delete regressTest; + exit(1); + } + + if (!regressTest->runTests()) { + fprintf(stderr, "Error: Failed to run tests for %s.\n", argv[2]); + delete regressTest; + exit(1); + } + + delete regressTest; + exit(0); + +}