diff --git a/Makefile b/Makefile index f3cfb4fc399744c40d09b06bc50f8646b5f3d024..139988be1dae1d5c62c3a8148e28fbdb8ed18a4b 100644 --- a/Makefile +++ b/Makefile @@ -86,7 +86,8 @@ UTIL_SUBDIRS = $(SRC_DIR)/utils/bedFile \ $(SRC_DIR)/utils/BlockedIntervals \ $(SRC_DIR)/utils/Fasta \ $(SRC_DIR)/utils/VectorOps \ - $(SRC_DIR)/utils/GenomeFile + $(SRC_DIR)/utils/GenomeFile \ + $(SRC_DIR)/utils/RecordOutputMgr BUILT_OBJECTS = $(OBJ_DIR)/*.o diff --git a/src/intersectFile/Makefile b/src/intersectFile/Makefile index e8ac9ea59f32f81ed62311c5d49259c33feac41c..e265b3348d1b1883c8f46e15b2902e3f2e5f8933 100644 --- a/src/intersectFile/Makefile +++ b/src/intersectFile/Makefile @@ -17,6 +17,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \ -I$(UTILITIES_DIR)/FileRecordTools/ \ -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ -I$(UTILITIES_DIR)/FileRecordTools/Records/ \ + -I$(UTILITIES_DIR)/RecordOutputMgr/ \ -I$(UTILITIES_DIR)/NewChromsweep \ -I$(UTILITIES_DIR)/BinTree \ -I$(UTILITIES_DIR)/version/ diff --git a/src/intersectFile/intersectFile.cpp b/src/intersectFile/intersectFile.cpp index 5c78887425de2e8a7712aac3ba69e41db86a56ac..928d1d9a4f7e8f985cc9edd2ca44b1dbdcf8a517 100644 --- a/src/intersectFile/intersectFile.cpp +++ b/src/intersectFile/intersectFile.cpp @@ -24,18 +24,16 @@ FileIntersect::FileIntersect(ContextIntersect *context) _recordOutputMgr(NULL) { _recordOutputMgr = new RecordOutputMgr(); + _recordOutputMgr->init(_context); if (_context->getObeySplits()) { - _blockMgr = new BlockMgr(); - _blockMgr->setContext(context); + _blockMgr = new BlockMgr(_context->getOverlapFraction(), _context->getReciprocal()); _recordOutputMgr->setSplitInfo(_blockMgr); } } FileIntersect::~FileIntersect(void) { - if (_blockMgr != NULL) { - delete _blockMgr; - _blockMgr = NULL; - } + delete _blockMgr; + _blockMgr = NULL; delete _recordOutputMgr; } @@ -59,9 +57,6 @@ bool FileIntersect::processSortedFiles() if (!sweep.init()) { return false; } - if (!_recordOutputMgr->init(_context)) { - return false; - } RecordKeyList hitSet; while (sweep.next(hitSet)) { @@ -79,23 +74,11 @@ bool FileIntersect::processSortedFiles() bool FileIntersect::processUnsortedFiles() { - const QuickString &databaseFilename = _context->getDatabaseFileName(); - BinTree *binTree = new BinTree(_context->getDatabaseFileIdx(), _context); - - FileRecordMgr *queryFRM = new FileRecordMgr(_context->getQueryFileIdx(), _context); - if (!queryFRM->open()) { - return false; - } - - if (!binTree->loadDB()) { - fprintf(stderr, "Error: Unable to load database file %s.\n", databaseFilename.c_str()); - delete binTree; - exit(1); - } + BinTree *binTree = new BinTree( _context); + binTree->loadDB(); + FileRecordMgr *queryFRM = _context->getFile(_context->getQueryFileIdx()); - _context->determineOutputType(); - _recordOutputMgr->init(_context); while (!queryFRM->eof()) { Record *queryRecord = queryFRM->allocateAndGetNextRecord(); @@ -117,7 +100,6 @@ bool FileIntersect::processUnsortedFiles() queryFRM->close(); //clean up. - delete queryFRM; delete binTree; return true; } diff --git a/src/mapFile/Makefile b/src/mapFile/Makefile index 3eea8d065b54983bd7a1fea9d2f8ecad708fe54a..17bb42df2d5c71d7eae0093ef8013c57c3fce42d 100644 --- a/src/mapFile/Makefile +++ b/src/mapFile/Makefile @@ -28,6 +28,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \ -I$(UTILITIES_DIR)/FileRecordTools/ \ -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ -I$(UTILITIES_DIR)/FileRecordTools/Records/ \ + -I$(UTILITIES_DIR)/RecordOutputMgr/ \ -I$(UTILITIES_DIR)/NewChromsweep \ -I$(UTILITIES_DIR)/VectorOps \ -I$(UTILITIES_DIR)/BinTree \ diff --git a/src/mapFile/mapFile.cpp b/src/mapFile/mapFile.cpp index 3bd133d2b4fb13c01d58eecaa97dd9e48e091a37..88dcc26fe8eb24eacf8108a577e8ab4e757b201b 100644 --- a/src/mapFile/mapFile.cpp +++ b/src/mapFile/mapFile.cpp @@ -23,18 +23,16 @@ FileMap::FileMap(ContextMap *context) _blockMgr(NULL), _recordOutputMgr(NULL) { - // FIX ME - block manager only works for intersect - //_blockMgr = new BlockMgr(); - //_blockMgr->setContext(context); + _blockMgr = new BlockMgr(_context->getOverlapFraction(), _context->getReciprocal()); _recordOutputMgr = new RecordOutputMgr(); + _recordOutputMgr->init(_context); } FileMap::~FileMap(void) { - if (_blockMgr != NULL) { - delete _blockMgr; - _blockMgr = NULL; - } - delete _recordOutputMgr; + delete _blockMgr; + _blockMgr = NULL; + delete _recordOutputMgr; + _recordOutputMgr = NULL; } bool FileMap::mapFiles() @@ -43,20 +41,18 @@ bool FileMap::mapFiles() if (!sweep.init()) { return false; } - if (!_recordOutputMgr->init(_context)) { - return false; - } RecordKeyList hitSet; while (sweep.next(hitSet)) { - //if (_context->getObeySplits()) { - // RecordKeyList keySet(hitSet.getKey()); - // RecordKeyList resultSet(hitSet.getKey()); - // _blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet); - //} else { - //} - //_recordOutputMgr->printKeyAndTerminate(hitSet); - SummarizeHits(hitSet); - _recordOutputMgr->printRecord(hitSet.getKey(), _output); + if (_context->getObeySplits()) { + RecordKeyList keySet(hitSet.getKey()); + RecordKeyList resultSet(hitSet.getKey()); + _blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet); + SummarizeHits(resultSet); + _recordOutputMgr->printRecord(resultSet.getKey(), _output); + } else { + SummarizeHits(hitSet); + _recordOutputMgr->printRecord(hitSet.getKey(), _output); + } } return true; } @@ -116,7 +112,7 @@ void FileMap::SummarizeHits(RecordKeyList &hits) { else if (operation == "distinct") _tmp_output << vo.GetDistinct(); else { - cerr << "ERROR: " << operation << " is an unrecoginzed operation\n"; + cerr << "ERROR: " << operation << " is an unrecognized operation\n"; exit(1); } _output.append(_tmp_output.str()); diff --git a/src/mapFile/mapMain.cpp b/src/mapFile/mapMain.cpp index fe87c9f3d96cc6b1b98b69607f6a40affab05597..a9eeb36990267c69e78b473f8824483b75629cb6 100644 --- a/src/mapFile/mapMain.cpp +++ b/src/mapFile/mapMain.cpp @@ -215,6 +215,8 @@ void map_help(void) { cerr << "\t\tthat overlap A on the _opposite_ strand." << endl; cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl; + cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl << endl; + cerr << "\t-g\t" << "Provide a genome file to enforce consistent chromosome sort order" << endl; cerr <<"\t\tacross input files." << endl << endl; diff --git a/src/nekSandbox1/nekSandboxMain.cpp b/src/nekSandbox1/nekSandboxMain.cpp index 3a5449ff6077bc8404180213415d11dfb87b2f2f..ac3fb3196b49d602e98a2e79ae369ef907573603 100644 --- a/src/nekSandbox1/nekSandboxMain.cpp +++ b/src/nekSandbox1/nekSandboxMain.cpp @@ -128,36 +128,36 @@ int nek_sandbox1_main(int argc,char** argv) // // return 0; // - ContextIntersect context; - context.addInputFile(argv[1]); - context.setSortedInput(true); -// context.setObeySplits(true); - - FileRecordMgr frm(0, &context); -// frm.getBlockMgr()->setBreakOnSkipOps(true); - if (!frm.open()) { - cerr << "Error: couldn't open file " << argv[1] << ". Exiting." << endl; - exit(1); - } - cout << "File Type is : " << frm.getFileType() << ", " << frm.getFileTypeName() << "." << endl; - cout << "RecordType is : " << frm.getRecordType() << ", " << frm.getRecordTypeName() << "." << endl; - - bool headerFound = false; - QuickString outbuf; - while (!frm.eof()) { - Record *record = frm.allocateAndGetNextRecord(); - if (!headerFound && frm.hasHeader()) { - cout << frm.getHeader() << endl; - headerFound = true; - } - if (record == NULL) { - break; - } - - outbuf.clear(); - record->print(outbuf); - printf("%s\n", outbuf.c_str()); - +// ContextIntersect context; +// context.addInputFile(argv[1]); +// context.setSortedInput(true); +//// context.setObeySplits(true); +// +// FileRecordMgr frm(0, &context); +//// frm.getBlockMgr()->setBreakOnSkipOps(true); +// if (!frm.open()) { +// cerr << "Error: couldn't open file " << argv[1] << ". Exiting." << endl; +// exit(1); +// } +// cout << "File Type is : " << frm.getFileType() << ", " << frm.getFileTypeName() << "." << endl; +// cout << "RecordType is : " << frm.getRecordType() << ", " << frm.getRecordTypeName() << "." << endl; +// +// bool headerFound = false; +// QuickString outbuf; +// while (!frm.eof()) { +// Record *record = frm.allocateAndGetNextRecord(); +// if (!headerFound && frm.hasHeader()) { +// cout << frm.getHeader() << endl; +// headerFound = true; +// } +// if (record == NULL) { +// break; +// } +// +// outbuf.clear(); +// record->print(outbuf); +// printf("%s\n", outbuf.c_str()); +// // RecordKeyList recList(record); // int blockCount = frm.getBlockMgr()->getBlocks(recList); // printf("The %d blocks are:\n", blockCount); @@ -168,10 +168,10 @@ int nek_sandbox1_main(int argc,char** argv) // printf("\n\n"); // frm.getBlockMgr()->deleteBlocks(recList); - frm.deleteRecord(record); - } +// frm.deleteRecord(record); +// } // cout << "Final header is: " << frm.getHeader() << endl; - frm.close(); +// frm.close(); return 0; } diff --git a/src/sampleFile/Makefile b/src/sampleFile/Makefile index 5c20a9c56e2450c6c67fa35efb79041e576c6525..2042291e68735b4316062d64a04e7f35d9847b2a 100644 --- a/src/sampleFile/Makefile +++ b/src/sampleFile/Makefile @@ -17,6 +17,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \ -I$(UTILITIES_DIR)/FileRecordTools/ \ -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ -I$(UTILITIES_DIR)/FileRecordTools/Records/ \ + -I$(UTILITIES_DIR)/RecordOutputMgr/ \ -I$(UTILITIES_DIR)/version/ # ---------------------------------- diff --git a/src/sampleFile/SampleFile.cpp b/src/sampleFile/SampleFile.cpp index 8f532c5f47a8eb89da81fac4d5c3a35f15a17187..8291824cef1cb5a67d7827c9bf0e867b7df4f48f 100644 --- a/src/sampleFile/SampleFile.cpp +++ b/src/sampleFile/SampleFile.cpp @@ -35,11 +35,7 @@ SampleFile::~SampleFile() { bool SampleFile::takeSample() { //we're only operating on one file, so the idx is zero. - _inputFile = new FileRecordMgr(0, _context); - if (!_inputFile->open()) { - return false; // FRM will give the error and die anyway, - //no error message needed here. - } + _inputFile = _context->getFile(0); _samples.resize(_numSamples, NULL); @@ -50,7 +46,6 @@ bool SampleFile::takeSample() _context->getUnspecifiedSeed(); } - _context->determineOutputType(); _outputMgr = new RecordOutputMgr(); _outputMgr->init(_context); @@ -82,10 +77,6 @@ bool SampleFile::takeSample() } delete _outputMgr; _inputFile->close(); - - //clean up. - delete _inputFile; - return true; } diff --git a/src/utils/BinTree/BinTree.cpp b/src/utils/BinTree/BinTree.cpp index e7a1fde481700340b646f7457ec1bc8d77712c57..54696f6928e7667683d993c658f7771d8e88ff2f 100644 --- a/src/utils/BinTree/BinTree.cpp +++ b/src/utils/BinTree/BinTree.cpp @@ -2,11 +2,10 @@ #include "FileRecordMgr.h" -BinTree::BinTree(int databaseFileIdx, ContextIntersect *context) -: _databaseFileIdx(databaseFileIdx), +BinTree::BinTree(ContextIntersect *context) +: _databaseFile(NULL), _context(context), _binOffsetsExtended(NULL), - _dbFileMgr(NULL), _showBinMetrics(false), _maxBinNumFound(0) { @@ -37,7 +36,7 @@ BinTree::~BinTree() { } for (innerListIterType listIter = bin->begin(); listIter != bin->end(); listIter = bin->next()) { const Record *record = listIter->value(); - _dbFileMgr->deleteRecord(record); + _databaseFile->deleteRecord(record); } delete bin; bin = NULL; @@ -46,10 +45,6 @@ BinTree::~BinTree() { delete [] bins; bins = NULL; } - if (_dbFileMgr != NULL) { - delete _dbFileMgr; - _dbFileMgr = NULL; - } delete [] _binOffsetsExtended; if (_showBinMetrics) { @@ -73,18 +68,13 @@ BinTree::~BinTree() { } } -bool BinTree::loadDB() +void BinTree::loadDB() { - _dbFileMgr = new FileRecordMgr(_databaseFileIdx, _context); - if (!_dbFileMgr->open()) { - fprintf(stderr, "ERROR: Can't open database file %s to build tree.\n", _context->getInputFileName(_databaseFileIdx).c_str()); - delete _dbFileMgr; - _dbFileMgr = NULL; - return false; - } + _databaseFile = _context->getFile(_context->getDatabaseFileIdx()); + Record *record = NULL; - while (!_dbFileMgr->eof()) { - record = _dbFileMgr->allocateAndGetNextRecord(); + while (!_databaseFile->eof()) { + record = _databaseFile->allocateAndGetNextRecord(); //In addition to NULL records, we also don't want to add unmapped reads. if (record == NULL || record->isUnmapped()) { continue; @@ -92,19 +82,17 @@ bool BinTree::loadDB() if (!addRecordToTree(record)) { fprintf(stderr, "ERROR: Unable to add record to tree.\n"); - _dbFileMgr->close(); - return false; + _databaseFile->close(); + exit(1); } } - _dbFileMgr->close(); + _databaseFile->close(); //TBD: give ERROR and return false if tree is empty. if (_mainMap.empty()) { fprintf(stderr, "ERROR: Tree is empty, no records added.\n"); - return false; + exit(1); } - return true; - } void BinTree::getHits(Record *record, RecordKeyList &hitSet) diff --git a/src/utils/BinTree/BinTree.h b/src/utils/BinTree/BinTree.h index 800fc0e3ca1c23168cc1337852b9e1a557a37f09..ce15c481e9831cc4bd95f544bb4f3585084e956b 100644 --- a/src/utils/BinTree/BinTree.h +++ b/src/utils/BinTree/BinTree.h @@ -24,15 +24,15 @@ class Record; class BinTree { public: - BinTree(int databaseFileIdx, ContextIntersect *context); + BinTree(ContextIntersect *context); ~BinTree(); - bool loadDB(); + void loadDB(); void getHits(Record *record, RecordKeyList &hitSet); private: - int _databaseFileIdx; + FileRecordMgr *_databaseFile; ContextIntersect *_context; // @@ -60,8 +60,6 @@ private: typedef map<mainKeyType, allBinsType> mainMapType; mainMapType _mainMap; - FileRecordMgr *_dbFileMgr; - bool _showBinMetrics; uint32_t _maxBinNumFound; map<uint32_t, int> _binsHit; diff --git a/src/utils/Contexts/Context.cpp b/src/utils/Contexts/Context.cpp deleted file mode 100644 index 439c1fef8931460ea24540b42cc10fe3d38757ce..0000000000000000000000000000000000000000 --- a/src/utils/Contexts/Context.cpp +++ /dev/null @@ -1,451 +0,0 @@ -/* - * Context.cpp - * - * Created on: Feb 12, 2013 - * Author: nek3d - */ - -#include "Context.h" -#include <unistd.h> -#include <sys/types.h> - -Context::Context() -: - _program(UNSPECIFIED_PROGRAM), - _useMergedIntervals(false), - _genomeFile(NULL), - _outputFileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE), - _outputTypeDetermined(false), - _skipFirstArgs(0), - _showHelp(false), - _obeySplits(false), - _uncompressedBam(false), - _useBufferedOutput(true), - _anyHit(false), - _noHit(false), - _writeA(false), - _writeB(false), - _leftJoin(false), - _writeCount(false), - _writeOverlap(false), - _writeAllOverlap(false), - _haveFraction(false), - _overlapFraction(1E-9), - _reciprocal(false), - _sameStrand(false), - _diffStrand(false), - _sortedInput(false), - _printHeader(false), - _printable(true), - _explicitBedOutput(false), - _queryFileIdx(-1), - _databaseFileIdx(-1), - _bamHeaderAndRefIdx(-1), - _maxNumDatabaseFields(0), - _useFullBamTags(false), - _reportCount(false), - _maxDistance(0), - _reportNames(false), - _reportScores(false), - _numOutputRecords(0), - _hasConstantSeed(false), - _seed(0), - _forwardOnly(false), - _reverseOnly(false) -{ - _programNames["intersect"] = INTERSECT; - _programNames["sample"] = SAMPLE; - - _validScoreOps.insert("sum"); - _validScoreOps.insert("max"); - _validScoreOps.insert("min"); - _validScoreOps.insert("mean"); - _validScoreOps.insert("mode"); - _validScoreOps.insert("median"); - _validScoreOps.insert("antimode"); - _validScoreOps.insert("collapse"); -} - -Context::~Context() -{ - if (_genomeFile != NULL) { - delete _genomeFile; - _genomeFile = NULL; - } -} - -bool Context::determineOutputType() { - if (_outputTypeDetermined) { - return true; - } - //test whether output should be BED or BAM. - //If the user explicitly requested BED, then it's BED. - if (getExplicitBedOutput()) { - setOutputFileType(FileRecordTypeChecker::SINGLE_LINE_DELIM_TEXT_FILE_TYPE); - _outputTypeDetermined = true; - return true; - } - //If this is an intersection, and the query is BAM, then - //the output is BAM. - if (_program == INTERSECT && getQueryFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) { - setOutputFileType(FileRecordTypeChecker::BAM_FILE_TYPE); - _outputTypeDetermined = true; - return true; - - } - //Otherwise, if there are any BAM files in the input, - //then the output should be BAM. - for (size_t i = 0; i < _inputFiles.size(); i++) { - if (_inputFiles[i]._fileType == FileRecordTypeChecker::BAM_FILE_TYPE) { - setOutputFileType(FileRecordTypeChecker::BAM_FILE_TYPE); - _bamHeaderAndRefIdx = i; - _outputTypeDetermined = true; - return true; - } - } - - //Okay, it's bed. - setOutputFileType(FileRecordTypeChecker::SINGLE_LINE_DELIM_TEXT_FILE_TYPE); - _outputTypeDetermined = true; - return true; - - -} - -void Context::openGenomeFile(const QuickString &genomeFilename) -{ - _genomeFile = new NewGenomeFile(genomeFilename.c_str()); -} - -void Context::openGenomeFile(const BamTools::RefVector &refVector) -{ - _genomeFile = new NewGenomeFile(refVector); -} - -bool Context::parseCmdArgs(int argc, char **argv, int skipFirstArgs) { - _argc = argc; - _argv = argv; - _skipFirstArgs = skipFirstArgs; - if (argc < 2) { - setShowHelp(true); - return false; - } - - setProgram(_programNames[argv[0]]); - - _argsProcessed.resize(argc - skipFirstArgs, false); - - for (int i=skipFirstArgs; i < argc; i++) { - if (isUsed(i - skipFirstArgs)) { - continue; - } - - if (strcmp(argv[i], "-i") == 0) { - if (argc <= i+1) { - _errorMsg = "\n***** ERROR: -i option given, but no input file specified. *****"; - return false; - } - addInputFile(argv[i+1]); - markUsed(i - skipFirstArgs); - i++; - markUsed(i - skipFirstArgs); - } else if (strcmp(argv[i], "-g") == 0) { - if (argc <= i+1) { - _errorMsg = "\n***** ERROR: -g option given, but no genome file specified. *****"; - return false; - } - openGenomeFile(argv[i+1]); - markUsed(i - skipFirstArgs); - i++; - markUsed(i - skipFirstArgs); - } else if (strcmp(argv[i], "-h") == 0) { - setShowHelp(true); - markUsed(i - skipFirstArgs); - } else if (strcmp(argv[i], "--help") == 0) { - setShowHelp(true); - markUsed(i - skipFirstArgs); - } - else if (strcmp(argv[i], "-split") == 0) { - setObeySplits(true); - markUsed(i - skipFirstArgs); - } - if (strcmp(argv[i], "-a") == 0) { - if (argc <= i+1) { - _errorMsg = "\n***** ERROR: -a option given, but no query file specified. *****"; - return false; - } - - addInputFile(argv[i+1]); - _queryFileIdx = getNumInputFiles() -1; - markUsed(i - skipFirstArgs); - i++; - markUsed(i - skipFirstArgs); - } - else if(strcmp(argv[i], "-abam") == 0) { - if (argc <= i+1) { - _errorMsg = "\n***** ERROR: -abam option given, but no query BAM file specified. *****"; - return false; - } - addInputFile(argv[i+1]); - _queryFileIdx = getNumInputFiles() -1; - markUsed(i - skipFirstArgs); - i++; - markUsed(i - skipFirstArgs); - setInputFileType(_queryFileIdx, FileRecordTypeChecker::BAM_FILE_TYPE); - } - else if (strcmp(argv[i], "-b") == 0) { - if (argc <= i+1) { - _errorMsg = "\n***** ERROR: -b option given, but no database file specified. *****"; - return false; - } - addInputFile(argv[i+1]); - _databaseFileIdx = getNumInputFiles() -1; - markUsed(i - skipFirstArgs); - i++; - markUsed(i - skipFirstArgs); - } else if (strcmp(argv[i], "-u") == 0) { - setAnyHit(true); - markUsed(i - skipFirstArgs); - } else if(strcmp(argv[i], "-f") == 0) { - if ((i+1) < argc) { - setHaveFraction(true); - setOverlapFraction(atof(argv[i + 1])); - markUsed(i - skipFirstArgs); - i++; - markUsed(i - skipFirstArgs); - } - } - else if(strcmp(argv[i], "-bed") == 0) { - setExplicitBedOutput(true); - markUsed(i - skipFirstArgs); - } - else if(strcmp(argv[i], "-wa") == 0) { - setWriteA(true); - markUsed(i - skipFirstArgs); - } - else if(strcmp(argv[i], "-wb") == 0) { - setWriteB(true); - markUsed(i - skipFirstArgs); - } - else if(strcmp(argv[i], "-wo") == 0) { - setWriteOverlap(true); - markUsed(i - skipFirstArgs); - } - else if(strcmp(argv[i], "-wao") == 0) { - setWriteAllOverlap(true); - setWriteOverlap(true); - markUsed(i - skipFirstArgs); - } - else if(strcmp(argv[i], "-c") == 0) { - setWriteCount(true); - markUsed(i - skipFirstArgs); - } - else if(strcmp(argv[i], "-r") == 0) { - setReciprocal(true); - markUsed(i - skipFirstArgs); - } - else if (strcmp(argv[i], "-v") == 0) { - setNoHit(true); - markUsed(i - skipFirstArgs); - } - else if (strcmp(argv[i], "-s") == 0) { - setSameStrand(true); - markUsed(i - skipFirstArgs); - - if (_program == SAMPLE) { - if (argc <= i+1) { - _errorMsg = "\n***** ERROR: -s option given, but \"forward\" or \"reverse\" not specified. *****"; - return false; - } - if (strcmp(argv[i+1], "forward") == 0) { - _forwardOnly = true; - } else if (strcmp(argv[i+1], "reverse") == 0) { - _reverseOnly = true; - } else { - _errorMsg = "\n***** ERROR: -s option given, but \"forward\" or \"reverse\" not specified. *****"; - return false; - } - i++; - markUsed(i - skipFirstArgs); - } - } - else if (strcmp(argv[i], "-S") == 0) { - setDiffStrand(true); - markUsed(i - skipFirstArgs); - } - else if (strcmp(argv[i], "-loj") == 0) { - setLeftJoin(true); - markUsed(i - skipFirstArgs); - } - else if (strcmp(argv[i], "-ubam") == 0) { - setUncompressedBam(true); - markUsed(i - skipFirstArgs); - } - else if (strcmp(argv[i], "-fbam") == 0) { - setUseFullBamTags(true); - markUsed(i - skipFirstArgs); - } - else if(strcmp(argv[i], "-sorted") == 0) { - setSortedInput(true); - markUsed(i - skipFirstArgs); - } - else if(strcmp(argv[i], "-nobuf") == 0) { - setUseBufferedOutput(false); - markUsed(i - skipFirstArgs); - } - else if(strcmp(argv[i], "-header") == 0) { - setPrintHeader(true); - markUsed(i - skipFirstArgs); - } else if (strcmp(argv[i], "-n") == 0) { - if (argc <= i+1) { - _errorMsg = "\n***** ERROR: -n option given, but no number of output records specified. *****"; - return false; - } - setNumOutputRecords(atoi(argv[i + 1])); - markUsed(i - skipFirstArgs); - i++; - markUsed(i - skipFirstArgs); - } else if (strcmp(argv[i], "-seed") == 0) { - if (argc <= i+1) { - _errorMsg = "\n***** ERROR: -seed option given, but no seed specified. *****"; - return false; - } - _hasConstantSeed = true; - _seed = atoi(argv[i+1]); - srand(_seed); - markUsed(i - skipFirstArgs); - i++; - markUsed(i - skipFirstArgs); - } - } - return true; -} - -bool Context::isValidState() -{ - if (!cmdArgsValid()) { - return false; - } - - if (_program == INTERSECT) { - return isValidIntersectState(); - } - if (_program == SAMPLE) { - return isValidSampleState(); - } - return false; -} - - -bool Context::cmdArgsValid() -{ - bool retval = true; - for (int i = _skipFirstArgs; i < _argc; i++) { - if (!isUsed(i - _skipFirstArgs)) { - _errorMsg += "\n***** ERROR: Unrecognized parameter: "; - _errorMsg += _argv[i]; - _errorMsg += " *****"; - retval = false; - } - } - return retval; -} - -int Context::getBamHeaderAndRefIdx() { - if (_bamHeaderAndRefIdx != -1) { - //already found which BAM file to use for the header - return _bamHeaderAndRefIdx; - } - if (_inputFiles[_queryFileIdx]._fileType == FileRecordTypeChecker::BAM_FILE_TYPE) { - _bamHeaderAndRefIdx = _queryFileIdx; - } else { - _bamHeaderAndRefIdx = _databaseFileIdx; - } - return _bamHeaderAndRefIdx; -} - -int Context::getUnspecifiedSeed() -{ - // thanks to Rob Long for the tip. - _seed = (unsigned)time(0)+(unsigned)getpid(); - srand(_seed); - return _seed; -} - - -bool Context::isValidIntersectState() -{ - if (_queryFileIdx == -1 || _databaseFileIdx == -1) { - _errorMsg = "\n***** ERROR: query and database files not specified. *****"; - return false; - } - - if (getAnyHit() && getNoHit()) { - _errorMsg = "\n***** ERROR: request either -u for anyHit OR -v for noHit, not both. *****"; - return false; - } - if (getWriteCount()) { - if (getAnyHit()) { - _errorMsg = "\n***** ERROR: request either -c for writeCount OR -u for anyHit, not both. *****"; - return false; - } else if (getWriteB()) { - _errorMsg = "\n***** ERROR: request either -c for writeCount OR -wb for writeB, not both. *****"; - return false; - } else if (getQueryFileType() == FileRecordTypeChecker::BAM_FILE_TYPE && !getExplicitBedOutput()) { - _errorMsg = "\n***** ERROR: writeCount option is not valid with BAM query input, unless bed output is specified with -bed option. *****"; - return false; - } - } - if (getWriteOverlap()) { - - if (getWriteA()) { - _errorMsg = "\n***** ERROR: request either -wa for writeA OR -wo for writeOverlap, not both. *****"; - return false; - } else if (getWriteB()) { - _errorMsg = "\n***** ERROR: request either -wb for writeB OR -wo for writeOverlap, not both. *****"; - return false; - } else if (getWriteCount()) { - _errorMsg = "\n***** ERROR: request either -c for writeCount OR -wo for writeOverlap, not both. *****"; - return false; - } else if (getAnyHit()) { - _errorMsg = "\n***** ERROR: request either -u for anyHit OR -wo for writeOverlap, not both. *****"; - return false; - } else if (getQueryFileType() == FileRecordTypeChecker::BAM_FILE_TYPE && !getExplicitBedOutput()) { - _errorMsg = "\n***** ERROR: writeAllOverlap option is not valid with BAM query input, unless bed output is specified with -bed option. *****"; - return false; - } - } - if (getWriteB() || getLeftJoin()) { - if (getQueryFileType() == FileRecordTypeChecker::BAM_FILE_TYPE && !getExplicitBedOutput()) { - cerr << endl << "*****" << endl << "*****WARNING: -wb and -loj are ignored with bam input, unless bed output is specified with -bed option." << endl << "*****" << endl; - } - } - if (getSameStrand() && getDiffStrand()) { - _errorMsg = "\n***** ERROR: request -s for sameStrand, or -S for diffStrand, not both. *****"; - return false; - } - - if (getQueryFileType() == FileRecordTypeChecker::BAM_FILE_TYPE && getPrintHeader()) { - cerr << endl << "*****" << endl << "*****WARNING: -header option is not valid for BAM input." << endl << "*****" << endl; - setPrintHeader(false); - } - if (getAnyHit() || getNoHit() || getWriteCount()) { - setPrintable(false); - } - return _inputFiles.size() > 0; -} - -bool Context::isValidSampleState() -{ - if (_inputFiles.size() != 1) { - _errorMsg = "\n***** ERROR: input file not specified. *****"; - // Allow one and only input file for now - return false; - } -// if (_numOutputRecords < 1) { -// _errorMsg = "\n***** ERROR: number of output records not specified. *****"; -// return false; -// } - return true; -} - diff --git a/src/utils/Contexts/Context.h b/src/utils/Contexts/Context.h deleted file mode 100644 index 19160e9556936d3e6907c147d7653be35b3069d8..0000000000000000000000000000000000000000 --- a/src/utils/Contexts/Context.h +++ /dev/null @@ -1,293 +0,0 @@ -/* - * ContextBase.h - * - * Created on: Feb 11, 2013 - * Author: nek3d - */ - -#ifndef CONTEXTBASE_H_ -#define CONTEXTBASE_H_ - - -// The Context class handles the settings for an operation, -// such as merge, intersect, jaccard, etc. -// -// Settings include the input and output parameters, such as input -// files, file types (if explicitly provided), genome files, -// run options, output format, etc. - -#include <cstdlib> -#include "version.h" -#include "BedtoolsTypes.h" -#include "FileRecordTypeChecker.h" -#include "NewGenomeFile.h" -#include "api/BamReader.h" -#include "api/BamAux.h" - - -class Context { -public: - Context(); - ~Context(); - - typedef FileRecordTypeChecker::FILE_TYPE ContextFileType; - typedef FileRecordTypeChecker::RECORD_TYPE ContextRecordType; - - typedef enum {UNSPECIFIED_PROGRAM, INTERSECT, WINDOW, CLOSEST, COVERAGE, MAP, GENOMECOV, MERGE, CLUSTER, - COMPLEMENT, SUBTRACT, SLOP, FLANK, SORT, RANDOM, SAMPLE, SHUFFLE, ANNOTATE, MULTIINTER, UNIONBEDG, PAIRTOBED, - PAIRTOPAIR,BAMTOBED, BEDTOBAM, BEDTOFASTQ, BEDPETOBAM, BED12TOBED6, GETFASTA, MASKFASTA, NUC, - MULTICOV, TAG, JACCARD, OVERLAP, IGV, LINKS,MAKEWINDOWS, GROUPBY, EXPAND } PROGRAM_TYPE; - - PROGRAM_TYPE getProgram() const { return _program; } - void setProgram(PROGRAM_TYPE program) { _program = program; } - - void addInputFile(const QuickString &inputFile, - ContextFileType explicitFileType = FileRecordTypeChecker::UNKNOWN_FILE_TYPE, - ContextRecordType explicitRecordType = FileRecordTypeChecker::UNKNOWN_RECORD_TYPE) { - _inputFiles.push_back(FileEntryType(inputFile, explicitFileType, explicitRecordType)); - } - - int getNumInputFiles() const { return _inputFiles.size(); } - const QuickString &getInputFileName(int fileNum) const { return _inputFiles[fileNum]._fileName; } - ContextFileType getInputFileType(int fileNum) const { return _inputFiles[fileNum]._fileType; } - void setInputFileType(int fileNum, ContextFileType fileType) { _inputFiles[fileNum]._fileType = fileType; } - ContextRecordType getInputRecordType(int fileNum) const { return _inputFiles[fileNum]._recordType; } - void setInputRecordType(int fileNum, ContextRecordType recordType) { _inputFiles[fileNum]._recordType = recordType; } - //HERE ARE SOME SIMPLER VERSIONS OF THE ABOVE FOR APPS THAT HAVE ONLY ONE INPUT FILE - const QuickString &getInputFileName() const { return _inputFiles[0]._fileName; } - ContextFileType getInputFileType() const { return _inputFiles[0]._fileType; } - void setInputFileType(ContextFileType fileType) { _inputFiles[0]._fileType = fileType; } - ContextRecordType getInputRecordType() const { return _inputFiles[0]._recordType; } - void setInputRecordType(ContextRecordType recordType) { _inputFiles[0]._recordType = recordType; } - int getInputFileIdx() const { return 0; } - - - - - bool determineOutputType(); - - const QuickString &getHeader(int fileIdx) { return _headers[fileIdx]; } - void setHeader(int fileIdx, const QuickString &header) { _headers[fileIdx] = header; } - const BamTools::RefVector &getReferences(int fileIdx) { return _references[fileIdx]; } - void setReferences(int fileIdx, const BamTools::RefVector &refs) { _references[fileIdx] = refs; } - int getBamHeaderAndRefIdx(); //return idx of 1st query that is BAM. If none, first DB that is BAM. - - bool getUseMergedIntervals() const { return _useMergedIntervals; } - void setUseMergedIntervals(bool val) { _useMergedIntervals = val; } - - void openGenomeFile(const QuickString &genomeFilename); - void openGenomeFile(const BamTools::RefVector &refVector); - bool hasGenomeFile() const { return _genomeFile != NULL; } - NewGenomeFile *getGenomeFile() const { return _genomeFile; } - - void setOutputFileType(ContextFileType fileType) { _outputFileType = fileType; } - ContextFileType getOutputFileType() const { return _outputFileType; } - - bool parseCmdArgs(int argc, char **argv, int skipFirstArgs); - - //isValidState checks that parameters to context are in an acceptable state. - // If not, the error msg string will be set with a reason why it failed. - bool isValidState(); - bool getShowHelp() const { return _showHelp; } - void setShowHelp(bool val) { _showHelp = val; } - - const QuickString &getErrorMsg() const { return _errorMsg; } - void setErrorMessage(const QuickString &errorMsg) { _errorMsg = errorMsg; } - - //split handling. - bool getObeySplits() const {return _obeySplits; } - void setObeySplits(bool val) { _obeySplits = val; } - - //Decide whether output format is BED or BAM. - //Default is BAM if any input files are BAM. - bool getExplicitBedOutput() const { return _explicitBedOutput; } - void setExplicitBedOutput(bool val) { _explicitBedOutput = val; } - - bool getUncompressedBam() const { return _uncompressedBam; } - void setUncompressedBam(bool val) { _uncompressedBam = val; } - - bool getUseBufferedOutput() const { return _useBufferedOutput; } - void setUseBufferedOutput(bool val) { _useBufferedOutput = val; } - // - // INTERSECT METOHDS - // - //NOTE: Query and database files will only be marked as such by either the - //parseCmdArgs method, or by explicitly setting them. - int getQueryFileIdx() const { return _queryFileIdx; } - void setQueryFileIdx(int idx) { _queryFileIdx = idx; } - int getDatabaseFileIdx() const { return _databaseFileIdx; } - void setDatabaseFileIdx(int idx) { _databaseFileIdx = idx; } - const QuickString &getQueryFileName() const { return _inputFiles[_queryFileIdx]._fileName; } - const QuickString &getDatabaseFileName() const { return _inputFiles[_databaseFileIdx]._fileName; } - ContextFileType getQueryFileType() const { return _inputFiles[_queryFileIdx]._fileType; } - ContextFileType getDatabaseFileType() const { return _inputFiles[_databaseFileIdx]._fileType; } - ContextRecordType getQueryRecordType() const { return _inputFiles[_queryFileIdx]._recordType; } - ContextRecordType getDatabaseRecordType() const { return _inputFiles[_databaseFileIdx]._recordType; } - int getMaxNumDatabaseFields() const { return _maxNumDatabaseFields; } - void setMaxNumDatabaseFields(int val) { _maxNumDatabaseFields = val; } - - bool getAnyHit() const {return _anyHit; } - void setAnyHit(bool val) { _anyHit = val; } - - bool getNoHit() const {return _noHit; } - void setNoHit(bool val) { _noHit = val; } - - bool getLeftJoin() const {return _leftJoin; } - void setLeftJoin(bool val) { _leftJoin = val; } - - bool getWriteA() const {return _writeA; } - void setWriteA(bool val) { _writeA = val; } - - bool getWriteB() const {return _writeB; } - void setWriteB(bool val) { _writeB = val; } - - bool getWriteCount() const {return _writeCount; } - void setWriteCount(bool val) { _writeCount = val; } - - bool getWriteOverlap() const {return _writeOverlap; } - void setWriteOverlap(bool val) { _writeOverlap = val; } - - bool getWriteAllOverlap() const {return _writeAllOverlap; } - void setWriteAllOverlap(bool val) { _writeAllOverlap = val; } - - bool getHaveFraction() const {return _haveFraction; } - void setHaveFraction(bool val) { _haveFraction = val; } - - float getOverlapFraction() const { return _overlapFraction; } - void setOverlapFraction(float fraction) { _overlapFraction = fraction; } - - bool getReciprocal() const {return _reciprocal; } - void setReciprocal(bool val) { _reciprocal = val; } - - bool getSameStrand() const {return _sameStrand; } - void setSameStrand(bool val) { _sameStrand = val; } - bool getForwardOnly() const { return _forwardOnly; } - bool getReverseOnly() const { return _reverseOnly; } - - bool getDiffStrand() const {return _diffStrand; } - void setDiffStrand(bool val) { _diffStrand = val; } - - bool getSortedInput() const {return _sortedInput; } - void setSortedInput(bool val) { _sortedInput = val; } - - bool getPrintHeader() const {return _printHeader; } - void setPrintHeader(bool val) { _printHeader = val; } - - bool getPrintable() const { return _printable; } - void setPrintable(bool val) { _printable = val; } - - bool getUseFullBamTags() const { return _useFullBamTags; } - void setUseFullBamTags(bool val) { _useFullBamTags = val; } - - // - // MERGE METHODS - // - bool getReportCount() const { return _reportCount; } - void setReportCount(bool val) { _reportCount = val; } - - int getMaxDistance() const { return _maxDistance; } - void setMaxDistance(int distance) { _maxDistance = distance; } - - bool getReportNames() const { return _reportNames; } - void setReportNames(bool val) { _reportNames = val; } - - bool getReportScores() const { return _reportScores; } - void setReportScores(bool val) { _reportScores = val; } - - const QuickString &getScoreOp() const { return _scoreOp; } - void setScoreOp(const QuickString &op) { _scoreOp = op; } - - - // METHODS FOR PROGRAMS WITH USER_SPECIFIED NUMBER - // OF OUTPUT RECORDS. - int getNumOutputRecords() const { return _numOutputRecords; } - void setNumOutputRecords(int val) { _numOutputRecords = val; } - - //SEED OPS FOR APPS WITH RANDOMNESS - //When a seed has been specified on the command line, srand - //will have been already been called during command line parsing. - //For reference, srand is the C function that initializes the - //psuedo-random number generator. If a seed has not been - //specifified, the user may call the - bool hasConstantSeed() const { return _hasConstantSeed; } - int getConstantSeed() const { return _seed; } - int getUnspecifiedSeed(); - -protected: - PROGRAM_TYPE _program; - - class FileEntryType { - public: - FileEntryType(const QuickString &filename, ContextFileType fileType, ContextRecordType recordType) - : _fileName(filename), _fileType(fileType), _recordType(recordType) {} - QuickString _fileName; - ContextFileType _fileType; - ContextRecordType _recordType; - }; - vector<FileEntryType> _inputFiles; - map<QuickString, PROGRAM_TYPE> _programNames; - - bool _useMergedIntervals; - NewGenomeFile *_genomeFile; - - ContextFileType _outputFileType; - bool _outputTypeDetermined; - - map<int, QuickString> _headers; - map<int, BamTools::RefVector> _references; - - QuickString _errorMsg; - vector<bool> _argsProcessed; //used for processing cmdLine args. - int _argc; - char **_argv; - int _skipFirstArgs; - bool _showHelp; - bool _obeySplits; - bool _uncompressedBam; - bool _useBufferedOutput; - - bool _anyHit; - bool _noHit; - bool _writeA; - bool _writeB; - bool _leftJoin; - bool _writeCount; - bool _writeOverlap; - bool _writeAllOverlap; - bool _haveFraction; - float _overlapFraction; - bool _reciprocal; - bool _sameStrand; - bool _diffStrand; - bool _sortedInput; - bool _printHeader; - bool _printable; - bool _explicitBedOutput; - int _queryFileIdx; - int _databaseFileIdx; - int _bamHeaderAndRefIdx; - int _maxNumDatabaseFields; - bool _useFullBamTags; - - bool _reportCount; - int _maxDistance; - bool _reportNames; - bool _reportScores; - QuickString _scoreOp; - set<QuickString> _validScoreOps; - - int _numOutputRecords; - - bool _hasConstantSeed; - int _seed; - bool _forwardOnly; - bool _reverseOnly; - - void markUsed(int i) { _argsProcessed[i] = true; } - bool isUsed(int i) const { return _argsProcessed[i]; } - bool cmdArgsValid(); - bool isValidIntersectState(); - bool isValidSampleState(); -}; - -#endif /* CONTEXT_H_ */ diff --git a/src/utils/Contexts/ContextBase.cpp b/src/utils/Contexts/ContextBase.cpp index 13b0864b9c85cd9a7264c0e42cdcc4f942c975f1..cd30b203881b61bfa81ec85ec6ac069bdceb2640 100644 --- a/src/utils/Contexts/ContextBase.cpp +++ b/src/utils/Contexts/ContextBase.cpp @@ -12,6 +12,7 @@ ContextBase::ContextBase() : _program(UNSPECIFIED_PROGRAM), + _allFilesOpened(false), _useMergedIntervals(false), _genomeFile(NULL), _outputFileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE), @@ -69,9 +70,14 @@ ContextBase::ContextBase() ContextBase::~ContextBase() { - if (_genomeFile != NULL) { - delete _genomeFile; - _genomeFile = NULL; + delete _genomeFile; + _genomeFile = NULL; + + //close all files and delete FRM objects. + for (int i=0; i < (int)_files.size(); i++) { + _files[i]->close(); + delete _files[i]; + _files[i] = NULL; } } @@ -89,8 +95,8 @@ bool ContextBase::determineOutputType() { //Otherwise, if there are any BAM files in the input, //then the output should be BAM. - for (_i = 0; _i < (int)_inputFiles.size(); _i++) { - if (_inputFiles[_i]._fileType == FileRecordTypeChecker::BAM_FILE_TYPE) { + for (_i = 0; _i < (int)_files.size(); _i++) { + if (_files[_i]->getFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) { setOutputFileType(FileRecordTypeChecker::BAM_FILE_TYPE); _bamHeaderAndRefIdx = _i; _outputTypeDetermined = true; @@ -176,7 +182,16 @@ bool ContextBase::parseCmdArgs(int argc, char **argv, int skipFirstArgs) { bool ContextBase::isValidState() { - return cmdArgsValid(); + if (!openFiles()) { + return false; + } + if (!cmdArgsValid()) { + return false; + } + if (!determineOutputType()) { + return false; + } + return true; } @@ -194,12 +209,36 @@ bool ContextBase::cmdArgsValid() return retval; } +bool ContextBase::openFiles() { + + //Make a vector of FileRecordMgr objects by going through the vector + //of filenames and opening each one. + if (_allFilesOpened) { + return true; + } + _files.resize(_fileNames.size()); + + for (int i = 0; i < (int)_fileNames.size(); i++) { + FileRecordMgr *frm = new FileRecordMgr(_fileNames[i], _sortedInput); + if (hasGenomeFile()) { + frm->setGenomeFile(_genomeFile); + } + frm->setFullBamFlags(_useFullBamTags); + if (!frm->open()) { + return false; + } + _files[i] = frm; + } + _allFilesOpened = true; + return true; +} + int ContextBase::getBamHeaderAndRefIdx() { if (_bamHeaderAndRefIdx != -1) { //already found which BAM file to use for the header return _bamHeaderAndRefIdx; } - if (_inputFiles[_queryFileIdx]._fileType == FileRecordTypeChecker::BAM_FILE_TYPE) { + if (_files[_queryFileIdx]->getFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) { _bamHeaderAndRefIdx = _queryFileIdx; } else { _bamHeaderAndRefIdx = _databaseFileIdx; diff --git a/src/utils/Contexts/ContextBase.h b/src/utils/Contexts/ContextBase.h index 2335f11c6d0e084bfcdd4d7fac82385f27adbafa..872193fd5d8724538abf722e8e22b4cbfdf7efff 100644 --- a/src/utils/Contexts/ContextBase.h +++ b/src/utils/Contexts/ContextBase.h @@ -20,6 +20,7 @@ #include "version.h" #include "BedtoolsTypes.h" #include "FileRecordTypeChecker.h" +#include "FileRecordMgr.h" #include "NewGenomeFile.h" #include "api/BamReader.h" #include "api/BamAux.h" @@ -39,34 +40,20 @@ public: MULTICOV, TAG, JACCARD, OVERLAP, IGV, LINKS,MAKEWINDOWS, GROUPBY, EXPAND } PROGRAM_TYPE; PROGRAM_TYPE getProgram() const { return _program; } + FileRecordMgr *getFile(int fileIdx) { return _files[fileIdx]; } void setProgram(PROGRAM_TYPE program) { _program = program; } - void addInputFile(const QuickString &inputFile, - ContextFileType explicitFileType = FileRecordTypeChecker::UNKNOWN_FILE_TYPE, - ContextRecordType explicitRecordType = FileRecordTypeChecker::UNKNOWN_RECORD_TYPE) { - _inputFiles.push_back(FileEntryType(inputFile, explicitFileType, explicitRecordType)); - } - - int getNumInputFiles() const { return _inputFiles.size(); } - const QuickString &getInputFileName(int fileNum) const { return _inputFiles[fileNum]._fileName; } - ContextFileType getInputFileType(int fileNum) const { return _inputFiles[fileNum]._fileType; } - void setInputFileType(int fileNum, ContextFileType fileType) { _inputFiles[fileNum]._fileType = fileType; } - ContextRecordType getInputRecordType(int fileNum) const { return _inputFiles[fileNum]._recordType; } - void setInputRecordType(int fileNum, ContextRecordType recordType) { _inputFiles[fileNum]._recordType = recordType; } - //HERE ARE SOME SIMPLER VERSIONS OF THE ABOVE FOR APPS THAT HAVE ONLY ONE INPUT FILE - const QuickString &getInputFileName() const { return _inputFiles[0]._fileName; } - ContextFileType getInputFileType() const { return _inputFiles[0]._fileType; } - void setInputFileType(ContextFileType fileType) { _inputFiles[0]._fileType = fileType; } - ContextRecordType getInputRecordType() const { return _inputFiles[0]._recordType; } - void setInputRecordType(ContextRecordType recordType) { _inputFiles[0]._recordType = recordType; } - int getInputFileIdx() const { return 0; } + void addInputFile(const QuickString &inputFile) { _fileNames.push_back(inputFile); } + + int getNumInputFiles() const { return _fileNames.size(); } + const QuickString &getInputFileName(int fileNum) const { return _fileNames[fileNum]; } + ContextFileType getInputFileType(int fileNum) const { return _files[fileNum]->getFileType(); } + ContextRecordType getInputRecordType(int fileNum) const { return _files[fileNum]->getRecordType(); } virtual bool determineOutputType(); - const QuickString &getHeader(int fileIdx) { return _headers[fileIdx]; } - void setHeader(int fileIdx, const QuickString &header) { _headers[fileIdx] = header; } - const BamTools::RefVector &getReferences(int fileIdx) { return _references[fileIdx]; } - void setReferences(int fileIdx, const BamTools::RefVector &refs) { _references[fileIdx] = refs; } + const QuickString &getHeader(int fileIdx) { return _files[fileIdx]->getHeader(); } + const BamTools::RefVector &getBamReferences(int fileIdx) { return _files[fileIdx]->getBamReferences(); } int getBamHeaderAndRefIdx(); //return idx of 1st query that is BAM. If none, first DB that is BAM. bool getUseMergedIntervals() const { return _useMergedIntervals; } @@ -160,15 +147,9 @@ public: protected: PROGRAM_TYPE _program; - class FileEntryType { - public: - FileEntryType(const QuickString &filename, ContextFileType fileType, ContextRecordType recordType) - : _fileName(filename), _fileType(fileType), _recordType(recordType) {} - QuickString _fileName; - ContextFileType _fileType; - ContextRecordType _recordType; - }; - vector<FileEntryType> _inputFiles; + vector<QuickString> _fileNames; + vector<FileRecordMgr *> _files; + bool _allFilesOpened; map<QuickString, PROGRAM_TYPE> _programNames; bool _useMergedIntervals; @@ -230,6 +211,7 @@ protected: void markUsed(int i) { _argsProcessed[i] = true; } bool isUsed(int i) const { return _argsProcessed[i]; } bool cmdArgsValid(); + bool openFiles(); //set cmd line params and counter, i, as members so code //is more readable (as opposed to passing all 3 everywhere). diff --git a/src/utils/Contexts/ContextIntersect.cpp b/src/utils/Contexts/ContextIntersect.cpp index d43994fd4d969235b091aee0dfb7e0d693fbb2dc..fecf51d115ebde0c87446c9341cb95e1cf4b89ea 100644 --- a/src/utils/Contexts/ContextIntersect.cpp +++ b/src/utils/Contexts/ContextIntersect.cpp @@ -88,7 +88,7 @@ bool ContextIntersect::parseCmdArgs(int argc, char **argv, int skipFirstArgs) { bool ContextIntersect::isValidState() { - if (!cmdArgsValid()) { + if (!ContextBase::isValidState()) { return false; } @@ -149,7 +149,7 @@ bool ContextIntersect::isValidState() if (getAnyHit() || getNoHit() || getWriteCount()) { setPrintable(false); } - if (_inputFiles.size() < 2 ) { + if (_files.size() != 2 ) { return false; } return true; @@ -159,6 +159,15 @@ bool ContextIntersect::determineOutputType() { if (_outputTypeDetermined) { return true; } + + //determine the maximum number of database fields. + for (int i=0; i < (int)_files.size(); i++) { + int numFields = _files[i]->getNumFields(); + if ( numFields > _maxNumDatabaseFields) { + _maxNumDatabaseFields = numFields; + } + } + //If the query is BAM, and bed output wasn't specified, then the output is BAM. if (getQueryFileType() == FileRecordTypeChecker::BAM_FILE_TYPE && !getExplicitBedOutput()) { setOutputFileType(FileRecordTypeChecker::BAM_FILE_TYPE); @@ -196,7 +205,6 @@ bool ContextIntersect::handle_abam() markUsed(_i - _skipFirstArgs); _i++; markUsed(_i - _skipFirstArgs); - setInputFileType(_queryFileIdx, FileRecordTypeChecker::BAM_FILE_TYPE); return true; } diff --git a/src/utils/Contexts/ContextIntersect.h b/src/utils/Contexts/ContextIntersect.h index 9449845125d43a6a8c904d4061580675ccaf34f2..0144a1210eef34278c19ac9fe529fd74db9ccfe6 100644 --- a/src/utils/Contexts/ContextIntersect.h +++ b/src/utils/Contexts/ContextIntersect.h @@ -25,12 +25,12 @@ public: void setQueryFileIdx(int idx) { _queryFileIdx = idx; } int getDatabaseFileIdx() const { return _databaseFileIdx; } void setDatabaseFileIdx(int idx) { _databaseFileIdx = idx; } - const QuickString &getQueryFileName() const { return _inputFiles[_queryFileIdx]._fileName; } - const QuickString &getDatabaseFileName() const { return _inputFiles[_databaseFileIdx]._fileName; } - ContextFileType getQueryFileType() const { return _inputFiles[_queryFileIdx]._fileType; } - ContextFileType getDatabaseFileType() const { return _inputFiles[_databaseFileIdx]._fileType; } - ContextRecordType getQueryRecordType() const { return _inputFiles[_queryFileIdx]._recordType; } - ContextRecordType getDatabaseRecordType() const { return _inputFiles[_databaseFileIdx]._recordType; } + const QuickString &getQueryFileName() const { return _files[_queryFileIdx]->getFileName(); } + const QuickString &getDatabaseFileName() const { return _files[_databaseFileIdx]->getFileName(); } + ContextFileType getQueryFileType() const { return _files[_queryFileIdx]->getFileType(); } + ContextFileType getDatabaseFileType() const { return _files[_databaseFileIdx]->getFileType(); } + ContextRecordType getQueryRecordType() const { return _files[_queryFileIdx]->getRecordType(); } + ContextRecordType getDatabaseRecordType() const { return _files[_databaseFileIdx]->getRecordType(); } int getMaxNumDatabaseFields() const { return _maxNumDatabaseFields; } void setMaxNumDatabaseFields(int val) { _maxNumDatabaseFields = val; } diff --git a/src/utils/Contexts/ContextMap.cpp b/src/utils/Contexts/ContextMap.cpp index 80fd2b7bda0b485752223951d3906a4f0bde5ed8..d94d08884aa1c3e2caef93939b84179b6332ff80 100644 --- a/src/utils/Contexts/ContextMap.cpp +++ b/src/utils/Contexts/ContextMap.cpp @@ -60,12 +60,10 @@ bool ContextMap::parseCmdArgs(int argc, char **argv, int skipFirstArgs) { bool ContextMap::isValidState() { - if (!cmdArgsValid()) { + if (!ContextIntersect::isValidState()) { return false; } - ContextIntersect::isValidState(); - if (getDatabaseFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) { //throw Error cerr << endl << "*****" @@ -76,8 +74,7 @@ bool ContextMap::isValidState() } // TODO // enforce any specific checks for Map. - - return ContextIntersect::isValidState(); + return true; } diff --git a/src/utils/Contexts/ContextSample.cpp b/src/utils/Contexts/ContextSample.cpp index 452b1f8ae33923e40062d3459564909880d8a872..38fbcb3c9fc47512bd7078841424c63bf4b269f9 100644 --- a/src/utils/Contexts/ContextSample.cpp +++ b/src/utils/Contexts/ContextSample.cpp @@ -43,10 +43,10 @@ bool ContextSample::parseCmdArgs(int argc, char **argv, int skipFirstArgs) { bool ContextSample::isValidState() { - if (!cmdArgsValid()) { + if (!ContextBase::isValidState()) { return false; } - if (_inputFiles.size() != 1) { + if (_files.size() != 1) { _errorMsg = "\n***** ERROR: input file not specified. *****"; // Allow one and only input file for now return false; diff --git a/src/utils/Contexts/Makefile b/src/utils/Contexts/Makefile index 8746fcdd9f83dc4c63ffd29a381fdaa6ccddb5ca..7ddc3c6c3cac4427c0578a33bc6822636bdb8a73 100644 --- a/src/utils/Contexts/Makefile +++ b/src/utils/Contexts/Makefile @@ -6,9 +6,12 @@ UTILITIES_DIR = ../../utils/ # ------------------- INCLUDES = -I$(UTILITIES_DIR)/general/ \ -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ + -I$(UTILITIES_DIR)/FileRecordTools/ \ + -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ + -I$(UTILITIES_DIR)/FileRecordTools/Records/ \ -I$(UTILITIES_DIR)/GenomeFile/ \ - -I$(UTILITIES_DIR)/BamTools/include/ \ + -I$(UTILITIES_DIR)/BamTools/include \ + -I$(UTILITIES_DIR)/BamTools/src/ \ -I$(UTILITIES_DIR)/version/ diff --git a/src/utils/FileRecordTools/FileReaders/BamFileReader.cpp b/src/utils/FileRecordTools/FileReaders/BamFileReader.cpp index a9d129d75f9fb3904ef4f7e06b0211842d2607a9..ab717f921236e17c8610e683a91c843b628c1736 100644 --- a/src/utils/FileRecordTools/FileReaders/BamFileReader.cpp +++ b/src/utils/FileRecordTools/FileReaders/BamFileReader.cpp @@ -4,44 +4,17 @@ BamFileReader::BamFileReader() : _bamReader(NULL), _eof(false), - _useTags(true), - _shouldDeleteBamReader(false) + _useTags(true) { } BamFileReader::~BamFileReader() { - if (_bamReader != NULL && _shouldDeleteBamReader) { - delete _bamReader; - _shouldDeleteBamReader = false; - _bamReader = NULL; - } } bool BamFileReader::open() { -// if (_bamReader == NULL) { -// _bamReader = new BamTools::BamReader(); -// _shouldDeleteBamReader = true; -// } -// if (_inputStream != NULL) { -// try { -// _bamReader->OpenStream(_inputStream); -// } -// catch (...) { -// fprintf(stderr, "ERROR: Unable to open BAM file from standard input.\n"); -// exit(1); -// } -// } else { -// try { -// _bamReader->Open(_filename); -// } -// catch (...) { -// fprintf(stderr, "ERROR: Unable to open BAM file %s\n", _filename.c_str()); -// exit(1); -// } -// } _bamHeader = _bamReader->GetHeaderText(); _references = _bamReader->GetReferenceData(); diff --git a/src/utils/FileRecordTools/FileReaders/BamFileReader.h b/src/utils/FileRecordTools/FileReaders/BamFileReader.h index deb03a2986f1bee63d32999435dc9807b239294e..8bea945ed8b1bdbecfaec9e8e20d82bf6e26f295 100644 --- a/src/utils/FileRecordTools/FileReaders/BamFileReader.h +++ b/src/utils/FileRecordTools/FileReaders/BamFileReader.h @@ -48,17 +48,18 @@ public: void getName(QuickString &) const; void getScore(QuickString &) const; char getStrand() const; + virtual int getNumFields() const { return MINIMUM_PRINTABLE_BAM_FIELDS; } protected: + BamTools::BamReader *_bamReader; BamTools::BamAlignment _bamAlignment; bool _eof; QuickString _bamHeader; BamTools::RefVector _references; bool _useTags; - bool _shouldDeleteBamReader; - + static const int MINIMUM_PRINTABLE_BAM_FIELDS = 6; void extractNameFromCore(); }; diff --git a/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.cpp b/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.cpp index 55ec8865e0745c90f7d80a5b93b48b07d0f4fc74..27f07894c347c2b871201f933287ed410c555bd0 100644 --- a/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.cpp +++ b/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.cpp @@ -26,13 +26,11 @@ BufferedStreamMgr::BufferedStreamMgr(const QuickString &filename) BufferedStreamMgr::~BufferedStreamMgr() { - if (_mainBuf != NULL) { - delete [] _mainBuf; - } - if (_inputStreamMgr != NULL) { - delete _inputStreamMgr; - _inputStreamMgr = NULL; - } + delete [] _mainBuf; + _mainBuf = NULL; + + delete _inputStreamMgr; + _inputStreamMgr = NULL; } bool BufferedStreamMgr::init() diff --git a/src/utils/FileRecordTools/FileReaders/FileReader.h b/src/utils/FileRecordTools/FileReaders/FileReader.h index f8d25d0da81c69f513ea242dff209099f9e199cb..29c25630918e27408fb6e099549781a94d97cf47 100644 --- a/src/utils/FileRecordTools/FileReaders/FileReader.h +++ b/src/utils/FileRecordTools/FileReaders/FileReader.h @@ -29,6 +29,7 @@ public: virtual int getCurrChromdId() const { return _currChromId; } virtual bool hasHeader() const = 0; virtual const QuickString &getHeader() const =0; + virtual int getNumFields() const = 0; protected: string _filename; BufferedStreamMgr *_bufStreamMgr; diff --git a/src/utils/FileRecordTools/FileReaders/InputStreamMgr.cpp b/src/utils/FileRecordTools/FileReaders/InputStreamMgr.cpp index 5b1d0a9c10831dcfc123efe560ec86acf4fbac3b..2035b9c6529e8ac97f53766b22c2fd98c8eba56b 100644 --- a/src/utils/FileRecordTools/FileReaders/InputStreamMgr.cpp +++ b/src/utils/FileRecordTools/FileReaders/InputStreamMgr.cpp @@ -35,38 +35,29 @@ InputStreamMgr::InputStreamMgr(const QuickString &filename, bool buildScanBuffer InputStreamMgr::~InputStreamMgr() { - if (_pushBackStreamBuf != NULL) { - delete _pushBackStreamBuf; - _pushBackStreamBuf = NULL; - } - if (_inputFileStream != NULL) { - delete _inputFileStream; - _inputFileStream = NULL; - } - if (_oldInputStream != NULL) { - delete _oldInputStream; - _oldInputStream = NULL; - } - if (_infStreamBuf != NULL) { - delete _infStreamBuf; - _infStreamBuf = NULL; - } - if (_bamReader != NULL) { - delete _bamReader; - _bgStream = NULL; - } - if (_bgStream != NULL) { - delete _bgStream; - _bgStream = NULL; - } - if (_finalInputStream != NULL) { - delete _finalInputStream; - _finalInputStream = NULL; - } - if (_tmpZipBuf != NULL) { - delete [] _tmpZipBuf; - _tmpZipBuf = NULL; - } + delete _pushBackStreamBuf; + _pushBackStreamBuf = NULL; + + delete _inputFileStream; + _inputFileStream = NULL; + + delete _oldInputStream; + _oldInputStream = NULL; + + delete _infStreamBuf; + _infStreamBuf = NULL; + + delete _bamReader; + _bgStream = NULL; + + delete _bgStream; + _bgStream = NULL; + + delete _finalInputStream; + _finalInputStream = NULL; + + delete [] _tmpZipBuf; + _tmpZipBuf = NULL; } bool InputStreamMgr::init() @@ -98,7 +89,6 @@ bool InputStreamMgr::init() //now we have a PushBackStreamBuf. Make a new stream. _finalInputStream = new istream(_pushBackStreamBuf); populateScanBuffer(); -// resetStream(); return true; } @@ -240,9 +230,7 @@ bool InputStreamMgr::detectBamOrBgzip(int &numChars, int currChar) _bamRuledOut = true; _numBytesInBuffer = 0; _infStreamBuf = new InflateStreamBuf(_finalInputStream); - if (_oldInputStream != NULL) { - delete _oldInputStream; - } + delete _oldInputStream; _oldInputStream = _finalInputStream; _finalInputStream = new istream(_infStreamBuf); return false; @@ -251,23 +239,6 @@ bool InputStreamMgr::detectBamOrBgzip(int &numChars, int currChar) return false; } -//void InputStreamMgr::decompressBuffer() -//{ -// //allocate an array to hold uncompressed data. -// _saveDataStr.clear(); -// uInt maxDecompressSize = 20 * _numBytesInBuffer; -// unsigned char *newScanBuffer = new unsigned char[maxDecompressSize]; -// memset(newScanBuffer, 0, maxDecompressSize); -// -// unsigned int numDecompressChars = inflateGzippedArray(_scanBuffer, newScanBuffer, maxDecompressSize, _numBytesInBuffer); -// -// // newScanBuffer should now contain uncompressed data. -// //delete old buffer, point it at new buffer. -// _saveDataStr.append((char *)newScanBuffer, numDecompressChars); -// -// delete [] newScanBuffer; -//} - void InputStreamMgr::readZipChunk() { if (_tmpZipBuf == NULL) { diff --git a/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.cpp b/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.cpp index 12739abe41b42c9412c989387e846cd5464eaf01..38074edf3513817e7413cf4426269fe618d84b7d 100644 --- a/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.cpp +++ b/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.cpp @@ -16,6 +16,7 @@ SingleLineDelimTextFileReader::SingleLineDelimTextFileReader(int numFields, char SingleLineDelimTextFileReader::~SingleLineDelimTextFileReader() { delete [] _delimPositions; + _delimPositions = NULL; } bool SingleLineDelimTextFileReader::readEntry() diff --git a/src/utils/FileRecordTools/FileRecordMgr.cpp b/src/utils/FileRecordTools/FileRecordMgr.cpp index 16ebf006d9eac5054940b4147e18833f6e66ff1e..38fc056b483236611748943ebcce4a7228f0a95f 100644 --- a/src/utils/FileRecordTools/FileRecordMgr.cpp +++ b/src/utils/FileRecordTools/FileRecordMgr.cpp @@ -1,50 +1,47 @@ #include "FileRecordMgr.h" -#include "ContextIntersect.h" #include "FreeList.h" #include "Record.h" +#include "NewGenomeFile.h" -FileRecordMgr::FileRecordMgr(int fileIdx, ContextBase *context) +FileRecordMgr::FileRecordMgr(const QuickString &filename, bool isSorted) : + _filename(filename), _bufStreamMgr(NULL), - _contextFileIdx(fileIdx), - _context(context), _fileReader(NULL), _fileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE), _recordType(FileRecordTypeChecker::UNKNOWN_RECORD_TYPE), _recordMgr(NULL), + _isSortedInput(isSorted), _freeListBlockSize(512), _useFullBamTags(false), - _headerSet(false), _prevStart(INT_MAX), _prevChromId(-1), _mustBeForward(false), _mustBeReverse(false), _totalRecordLength(0), - _totalMergedRecordLength(0) + _totalMergedRecordLength(0), + _blockMgr(NULL), + _bamReader(NULL), + _hasGenomeFile(false), + _genomeFile(NULL) { } FileRecordMgr::~FileRecordMgr(){ - if (_bufStreamMgr != NULL) { - delete _bufStreamMgr; - _bufStreamMgr = NULL; - } - if (_fileReader != NULL) { - close(); //just make sure file was closed. - delete _fileReader; - _fileReader = NULL; - } - if (_recordMgr != NULL) { - delete _recordMgr; - _recordMgr = NULL; - } + delete _bufStreamMgr; + _bufStreamMgr = NULL; + + close(); //just make sure file was closed. + delete _fileReader; + _fileReader = NULL; + + delete _recordMgr; + _recordMgr = NULL; } bool FileRecordMgr::open(){ - - _filename = _context->getInputFileName(_contextFileIdx); _bufStreamMgr = new BufferedStreamMgr(_filename); if (!_bufStreamMgr->init()) { cerr << "Error: unable to open file or unable to determine types for file " << _filename << endl; @@ -72,31 +69,14 @@ bool FileRecordMgr::open(){ _bufStreamMgr = NULL; exit(1); } - _context->setInputFileType(_contextFileIdx, _fileType); - _context->setInputRecordType(_contextFileIdx, _recordType); - - //if this is a database file, and the numFields is greater than the numFields in other DB files, update. - // TBD: This is one of many things that will need to be changed to support multiple database files in future versions. - int numFields = _bufStreamMgr->getTypeChecker().getNumFields(); - if (_context->hasIntersectMethods()) { - if (_contextFileIdx == (static_cast<ContextIntersect *>(_context))->getDatabaseFileIdx() && numFields > - (static_cast<ContextIntersect *>(_context))->getMaxNumDatabaseFields()) { - (static_cast<ContextIntersect *>(_context))->setMaxNumDatabaseFields(numFields); - } - } - if (_fileType == FileRecordTypeChecker::BAM_FILE_TYPE) { - _context->setHeader(_contextFileIdx, _fileReader->getHeader()); - _context->setReferences(_contextFileIdx, static_cast<BamFileReader *>(_fileReader)->getReferences()); - _headerSet = true; - } + return true; } void FileRecordMgr::close(){ - if (_bufStreamMgr != NULL) { - delete _bufStreamMgr; - _bufStreamMgr = NULL; - } + delete _bufStreamMgr; + _bufStreamMgr = NULL; + if (_fileReader != NULL) { _fileReader->close(); delete _fileReader; @@ -106,7 +86,6 @@ void FileRecordMgr::close(){ bool FileRecordMgr::eof(){ return _fileReader->eof(); -// return _storedRecords.empty() && _fileReader->eof() ? true: false; } Record *FileRecordMgr::allocateAndGetNextRecord() @@ -117,10 +96,6 @@ Record *FileRecordMgr::allocateAndGetNextRecord() if (!_fileReader->readEntry()) { return NULL; } - if (!_headerSet && _fileReader->hasHeader()) { - _context->setHeader(_contextFileIdx, _fileReader->getHeader()); - _headerSet = true; - } Record *record = NULL; record = _recordMgr->allocateRecord(); if (!record->initFromFile(_fileReader)) { @@ -128,17 +103,18 @@ Record *FileRecordMgr::allocateAndGetNextRecord() return NULL; } - // In the rare case of Bam records where both the read and it's mate failed to map, - // Ignore the record. Delete and return null. + // If the record is unmapped, don't test for valid coords or sort order, + // but still return it so the -v (noHit) option and the like will still + // see it. - if (!(record->isUnmapped() )) { + if (!record->isUnmapped()) { if (!record->coordsValid()) { cerr << "Error: Invalid record in file " << _filename << ". Record is " << endl << *record << endl; exit(1); } //test for sorted order, if necessary. - if (_context->getSortedInput()) { + if (_isSortedInput) { testInputSortOrder(record); } } @@ -149,8 +125,8 @@ Record *FileRecordMgr::allocateAndGetNextRecord() void FileRecordMgr::assignChromId(Record *record) { const QuickString &currChrom = record->getChrName(); - if (currChrom != _prevChrom && _context->hasGenomeFile()) { - _prevChromId = _context->getGenomeFile()->getChromId(currChrom); + if (currChrom != _prevChrom && _hasGenomeFile) { + _prevChromId = _genomeFile->getChromId(currChrom); record->setChromId(_prevChromId); } else { record->setChromId(_prevChromId); @@ -185,8 +161,8 @@ void FileRecordMgr::testInputSortOrder(Record *record) } else { //new chrom has not been seen before. //TBD: test genome file for ChromId. - if (_context->hasGenomeFile()) { - int currChromId = _context->getGenomeFile()->getChromId(currChrom); + if (_hasGenomeFile) { + int currChromId = _genomeFile->getChromId(currChrom); if (currChromId < _prevChromId) { sortError(record, true); } else { @@ -209,7 +185,7 @@ void FileRecordMgr::sortError(const Record *record, bool genomeFileError) { if (genomeFileError) { cerr << "Error: Sorted input specified, but the file " << _filename << " has the following record with a different sort order than the genomeFile " << - _context->getGenomeFile()->getGenomeFileName() << endl; + _genomeFile->getGenomeFileName() << endl; } else { cerr << "Error: Sorted input specified, but the file " << _filename << " has the following out of order record" << endl; } @@ -232,7 +208,7 @@ void FileRecordMgr::allocateFileReader() case FileRecordTypeChecker::BAM_FILE_TYPE: _fileReader = new BamFileReader(); - (static_cast<BamFileReader *>(_fileReader))->setUseTags(_context->getUseFullBamTags()); + (static_cast<BamFileReader *>(_fileReader))->setUseTags(_useFullBamTags); (static_cast<BamFileReader *>(_fileReader))->setBamReader(_bufStreamMgr->getBamReader()); break; default: @@ -240,6 +216,14 @@ void FileRecordMgr::allocateFileReader() } } +const BamTools::RefVector & FileRecordMgr::getBamReferences() { + // exta safety check to insure user checked the file type first. + if (_fileType != FileRecordTypeChecker::BAM_FILE_TYPE) { + cerr << "Error: Attempted to get BAM references from file " << _filename << ", which is NOT a BAM file." << endl; + exit(1); + } + return static_cast<BamFileReader *>(_fileReader)->getReferences(); +} #ifdef false diff --git a/src/utils/FileRecordTools/FileRecordMgr.h b/src/utils/FileRecordTools/FileRecordMgr.h index ff523c47547b8e84d0b166539f9f6cedbd79e937..b4e76e92f1e7e1618d67d28dcedba1581862024c 100644 --- a/src/utils/FileRecordTools/FileRecordMgr.h +++ b/src/utils/FileRecordTools/FileRecordMgr.h @@ -16,7 +16,6 @@ using namespace std; //#include "DualQueue.h" //include headers for all FileReader and derivative classes. -#include "ContextBase.h" #include "BufferedStreamMgr.h" #include "FileReader.h" #include "SingleLineDelimTextFileReader.h" @@ -29,50 +28,47 @@ using namespace std; #include "BlockMgr.h" class Record; +class NewGenomeFile; + class FileRecordMgr { public: - FileRecordMgr(int inputFileIdx, ContextBase *context); + FileRecordMgr(const QuickString & filename, bool isSorted = false); ~FileRecordMgr(); bool open(); void close(); bool eof(); - //NOTE: FOR Non-BAM FILES, HEADER INFORMATION IS CURRENTLY ONLY AVAILABLE AFTER 1st CALL - //TO allocateAndGetNextRecord(). + const QuickString &getFileName() const { return _filename;} bool hasHeader() const { return _fileReader->hasHeader(); } const QuickString &getHeader() const { return _fileReader->getHeader(); } bool recordsHaveName() const { - testBufferedMgr(); return _bufStreamMgr->getTypeChecker().recordTypeHasName(_recordType); } bool recordsHaveScore() const { - testBufferedMgr(); return _bufStreamMgr->getTypeChecker().recordTypeHasScore(_recordType); } bool recordsHaveStrand() const { - testBufferedMgr(); return _bufStreamMgr->getTypeChecker().recordTypeHasStrand(_recordType); } FileRecordTypeChecker::FILE_TYPE getFileType() const { - testBufferedMgr(); return _fileType; } FileRecordTypeChecker::RECORD_TYPE getRecordType() const { - testBufferedMgr(); return _recordType; } const string &getFileTypeName() const { - testBufferedMgr(); return _bufStreamMgr->getTypeChecker().getFileTypeName(); } const string &getRecordTypeName() const { - testBufferedMgr(); return _bufStreamMgr->getTypeChecker().getRecordTypeName(); } + const BamTools::RefVector &getBamReferences(); + + int getNumFields() const { return _fileReader->getNumFields(); } //This is an all-in-one method to give the user a new record that is initialized with //the next entry in the data file. //NOTE!! User MUST pass back the returned pointer to deleteRecord method for cleanup! @@ -139,20 +135,22 @@ public: //will use all the tags, if more information is desired. //MUST BE CALLED BEFORE THE BAM FILE IS OPEN. void setFullBamFlags(bool flag) { _useFullBamTags = flag; } + void setGenomeFile(NewGenomeFile *genomeFile) { + _genomeFile = genomeFile; + _hasGenomeFile = true; + } private: QuickString _filename; BufferedStreamMgr *_bufStreamMgr; - int _contextFileIdx; - ContextBase *_context; FileReader *_fileReader; FileRecordTypeChecker::FILE_TYPE _fileType; FileRecordTypeChecker::RECORD_TYPE _recordType; RecordMgr *_recordMgr; + bool _isSortedInput; int _freeListBlockSize; bool _useFullBamTags; - bool _headerSet; //members for enforcing sorted order. set<QuickString> _foundChroms; @@ -172,18 +170,14 @@ private: BlockMgr *_blockMgr; BamTools::BamReader *_bamReader; - + bool _hasGenomeFile; + NewGenomeFile *_genomeFile; void allocateFileReader(); void testInputSortOrder(Record *record); void assignChromId(Record *); void sortError(const Record *record, bool genomeFileError); - void testBufferedMgr() const { - if (_bufStreamMgr == NULL) { - cerr << "Error: attempted to access type information for file " << _context->getInputFileName(_contextFileIdx) << " before it was opened." << endl; - exit (1); - } - } + #ifdef false void deleteAllMergedItemsButKey(RecordKeyList &recList); diff --git a/src/utils/FileRecordTools/Makefile b/src/utils/FileRecordTools/Makefile index cacf887f920b03582030d3041be93fbacacf88b8..9d021179772c29991565b157bb9aba7d0063e40f 100644 --- a/src/utils/FileRecordTools/Makefile +++ b/src/utils/FileRecordTools/Makefile @@ -9,8 +9,7 @@ INCLUDES = -I$(UTILITIES_DIR)/fileType/ \ -I$(UTILITIES_DIR)/FileRecordTools/Records/ \ -I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/general/ \ - -I$(UTILITIES_DIR)/Contexts/ \ - -I$(UTILITIES_DIR)/BamTools/include \ + -I$(UTILITIES_DIR)/BamTools/include \ -I$(UTILITIES_DIR)/BamTools/src/ \ -I$(UTILITIES_DIR)/version/ @@ -22,7 +21,7 @@ SUBDIRS = ./FileReaders \ # ---------------------------------- # define our source and object files # ---------------------------------- -SOURCES= FileRecordMgr.cpp FileRecordMgr.h RecordOutputMgr.cpp RecordOutputMgr.h +SOURCES= FileRecordMgr.cpp FileRecordMgr.h OBJECTS= FileRecordMgr.o RecordOutputMgr.o _EXT_OBJECTS=SingleLineDelimTextFileReader.o BamFileReader.o Bed3Interval.o Bed6Interval.o BedPlusInterval.o Bed12Interval.o BamRecord.o \ SingleLineDelimTransferBuffer.o FileRecordTypeChecker.o QuickString.o ParseTools.o RecordKeyList.o BufferedStreamMgr.o @@ -32,8 +31,6 @@ BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) $(BUILT_OBJECTS): $(SOURCES) $(SUBDIRS) @echo " * compiling FileRecordMgr.cpp" @$(CXX) -c -o $(OBJ_DIR)/FileRecordMgr.o FileRecordMgr.cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - @echo " * compiling RecordOutputMgr.cpp" - @$(CXX) -c -o $(OBJ_DIR)/RecordOutputMgr.o RecordOutputMgr.cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) @@ -49,7 +46,6 @@ clean: @rm -f $(OBJ_DIR)/FileRecordTypeChecker.o @rm -f $(OBJ_DIR)/SingleLineDelimTextFileReader.o @rm -f $(OBJ_DIR)/SingleLineDelimTransferBuffer.o - @rm -f $(OBJ_DIR)/RecordOutputMgr.o .PHONY: clean \ No newline at end of file diff --git a/src/utils/FileRecordTools/Records/BlockMgr.cpp b/src/utils/FileRecordTools/Records/BlockMgr.cpp index 4aede83d0de63f3259cf81e66d20fa59b7c18f05..21b060df73b71af677bde159f1a980c73b48bbf8 100644 --- a/src/utils/FileRecordTools/Records/BlockMgr.cpp +++ b/src/utils/FileRecordTools/Records/BlockMgr.cpp @@ -7,17 +7,18 @@ #include "BlockMgr.h" #include "RecordMgr.h" -#include "ContextIntersect.h" #include "Bed12Interval.h" #include "BamRecord.h" #include "ParseTools.h" #include "api/BamAlignment.h" #include "api/BamAux.h" -BlockMgr::BlockMgr() +BlockMgr::BlockMgr(float overlapFraction, bool hasReciprocal) : _blockRecordsMgr(NULL), _breakOnDeletionOps(false), - _breakOnSkipOps(true) + _breakOnSkipOps(true), + _overlapFraction(overlapFraction), + _hasReciprocal(hasReciprocal) { _blockRecordsMgr = new RecordMgr(_blockRecordsType); } @@ -194,11 +195,11 @@ int BlockMgr::findBlockedOverlaps(RecordKeyList &keyList, RecordKeyList &hitList } _overlapBases.push_back(totalHitOverlap); if (hitHasOverlap) { - if ((float) totalHitOverlap / (float)keyBlocksSumLength > _context->getOverlapFraction()) { - if (_context->getReciprocal() && - ((float)totalHitOverlap / (float)hitBlockSumLength > _context->getOverlapFraction())) { + if ((float) totalHitOverlap / (float)keyBlocksSumLength > _overlapFraction) { + if (_hasReciprocal && + ((float)totalHitOverlap / (float)hitBlockSumLength > _overlapFraction)) { resultList.push_back(hitListIter->value()); - } else if (!_context->getReciprocal()) { + } else if (!_hasReciprocal) { resultList.push_back(hitListIter->value()); } } diff --git a/src/utils/FileRecordTools/Records/BlockMgr.h b/src/utils/FileRecordTools/Records/BlockMgr.h index 86ddfd7e2b5e1b3d7de129e10b18d87b0cb13866..c83b1e0125284bb5743c95ff5d3cc91708a06d04 100644 --- a/src/utils/FileRecordTools/Records/BlockMgr.h +++ b/src/utils/FileRecordTools/Records/BlockMgr.h @@ -16,14 +16,12 @@ using namespace std; #include "FileRecordTypeChecker.h" #include "RecordKeyList.h" -class ContextIntersect; class RecordMgr; class BlockMgr { public: - BlockMgr(); + BlockMgr(float overlapFraction = 1E-9, bool hasReciprocal = false); ~BlockMgr(); - void setContext(ContextIntersect *context) { _context = context; } // Return value is the number of blocks this main record has been split into. void getBlocks(RecordKeyList &keyList, bool &mustDelete); @@ -46,11 +44,13 @@ public: private: RecordMgr *_blockRecordsMgr; - ContextIntersect *_context; bool _breakOnDeletionOps; bool _breakOnSkipOps; vector<int> _overlapBases; + float _overlapFraction; + bool _hasReciprocal; + // For now, all records will be split into Bed6 records. const static FileRecordTypeChecker::RECORD_TYPE _blockRecordsType = FileRecordTypeChecker::BED6_RECORD_TYPE; void getBlocksFromBed12(RecordKeyList &keyList, bool &mustDelete); diff --git a/src/utils/NewChromsweep/NewChromsweep.cpp b/src/utils/NewChromsweep/NewChromsweep.cpp index a6b5d18049e71cb0a99625496ab5deed9fb24f0c..5e6045be625799cca515ac056e9a17b33f1908a6 100644 --- a/src/utils/NewChromsweep/NewChromsweep.cpp +++ b/src/utils/NewChromsweep/NewChromsweep.cpp @@ -37,18 +37,9 @@ bool NewChromSweep::init() { //Open them, and get the first record from each. //if any of that goes wrong, return false; //otherwise, return true. - _queryFRM = new FileRecordMgr(_context->getQueryFileIdx(), _context); - _databaseFRM = new FileRecordMgr(_context->getDatabaseFileIdx(), _context); + _queryFRM = _context->getFile(_context->getQueryFileIdx()); + _databaseFRM = _context->getFile(_context->getDatabaseFileIdx()); - if (!_queryFRM->open()) { - return false; - } - if (!_databaseFRM->open()) { - return false; - } - - _context->determineOutputType(); - nextRecord(false); if (_currDatabaseRec == NULL) { return false; @@ -77,18 +68,14 @@ NewChromSweep::~NewChromSweep(void) { if (!_wasInitialized) { return; } - if (_currQueryRec != NULL) { - _queryFRM->deleteRecord(_currQueryRec); - } - if (_currDatabaseRec != NULL) { - _databaseFRM->deleteRecord(_currDatabaseRec); - } - _queryFRM->close(); - _databaseFRM->close(); + _queryFRM->deleteRecord(_currQueryRec); + _currQueryRec = NULL; - delete _queryFRM; - delete _databaseFRM; + _databaseFRM->deleteRecord(_currDatabaseRec); + _currDatabaseRec = NULL; + _queryFRM->close(); + _databaseFRM->close(); } diff --git a/src/utils/RecordOutputMgr/Makefile b/src/utils/RecordOutputMgr/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2d196ec11518d10b6f57877c77bc35b7b9063446 --- /dev/null +++ b/src/utils/RecordOutputMgr/Makefile @@ -0,0 +1,39 @@ +OBJ_DIR = ../../../obj/ +BIN_DIR = ../../../bin/ +UTILITIES_DIR = ../../utils/ +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/general/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/Contexts/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ + -I$(UTILITIES_DIR)/FileRecordTools/ \ + -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ + -I$(UTILITIES_DIR)/FileRecordTools/Records/ \ + -I$(UTILITIES_DIR)/BamTools/include \ + -I$(UTILITIES_DIR)/BamTools/src/ \ + -I$(UTILITIES_DIR)/version/ + + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= RecordOutputMgr.cpp RecordOutputMgr.h +OBJECTS= RecordOutputMgr.o +_EXT_OBJECTS= +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(INCLUDES) + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/RecordOutputMgr.o + +.PHONY: clean \ No newline at end of file diff --git a/src/utils/FileRecordTools/RecordOutputMgr.cpp b/src/utils/RecordOutputMgr/RecordOutputMgr.cpp similarity index 93% rename from src/utils/FileRecordTools/RecordOutputMgr.cpp rename to src/utils/RecordOutputMgr/RecordOutputMgr.cpp index 29148a2862094f716b519f712af90b7f221a3e8d..55cb70ec9aa74af58b33e5faf32d18b19c0b0bae 100644 --- a/src/utils/FileRecordTools/RecordOutputMgr.cpp +++ b/src/utils/RecordOutputMgr/RecordOutputMgr.cpp @@ -50,14 +50,15 @@ RecordOutputMgr::~RecordOutputMgr() } -bool RecordOutputMgr::init(ContextBase *context) { +void RecordOutputMgr::init(ContextBase *context) { _context = context; if (_context->getOutputFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) { //set-up BAM writer. _bamWriter = new BamTools::BamWriter(); _bamWriter->SetCompressionMode(_context->getUncompressedBam() ? BamTools::BamWriter::Uncompressed : BamTools::BamWriter::Compressed); - _bamWriter->Open("stdout", _context->getHeader(_context->getBamHeaderAndRefIdx()).c_str(), _context->getReferences(_context->getBamHeaderAndRefIdx())); + int bamFileIdx = _context->getBamHeaderAndRefIdx(); + _bamWriter->Open("stdout", _context->getFile(bamFileIdx)->getHeader().c_str(), _context->getFile(bamFileIdx)->getBamReferences()); } else { //for everything but BAM, we'll copy output to an output buffer before printing. _outBuf.reserve(MAX_OUTBUF_SIZE); @@ -68,14 +69,8 @@ bool RecordOutputMgr::init(ContextBase *context) { _printable = false; } } - return true; } -//void RecordOutputMgr::printHeader(const string &header) -//{ -// _outBuf.append(header); -//} - bool RecordOutputMgr::printKeyAndTerminate(RecordKeyList &keyList) { printBamType bamCode = printBamRecord(keyList); if (bamCode == BAM_AS_BAM) { @@ -215,15 +210,13 @@ void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockLi } void RecordOutputMgr::checkForHeader() { - if (_context->getProgram() == ContextBase::INTERSECT || - _context->getProgram() == ContextBase::MAP) { - if (_context->getPrintHeader()) { - _outBuf.append(_context->getHeader((static_cast<ContextIntersect *>(_context))->getQueryFileIdx())); - } - } else if (_context->getProgram() == ContextBase::SAMPLE) { - if (_context->getPrintHeader()) { - _outBuf.append(_context->getHeader(_context->getInputFileIdx())); - } + //if the program is based on intersection, we want the header from the query file. + if (_context->hasIntersectMethods()) { + int queryIdx = (static_cast<ContextIntersect *>(_context))->getQueryFileIdx(); + const QuickString &header = _context->getFile(queryIdx)->getHeader(); + _outBuf.append(header); + } else { + _outBuf.append(_context->getFile(0)->getHeader()); } _context->setPrintHeader(false); if (needsFlush()) flush(); @@ -358,14 +351,14 @@ void RecordOutputMgr::reportOverlapSummary(RecordKeyList &keyList) void RecordOutputMgr::null(bool queryType, bool dbType) { FileRecordTypeChecker::RECORD_TYPE recordType = FileRecordTypeChecker::UNKNOWN_RECORD_TYPE; - if (_context->getProgram() == ContextBase::INTERSECT) { + if (_context->hasIntersectMethods()) { if (queryType) { recordType = (static_cast<ContextIntersect *>(_context))->getQueryRecordType(); } else if (dbType) { recordType = (static_cast<ContextIntersect *>(_context))->getDatabaseRecordType(); } - } else if (_context->getProgram() == ContextBase::SAMPLE) { - recordType = _context->getInputRecordType(); + } else { + recordType = _context->getFile(0)->getRecordType(); } //This is kind of a hack. Need an instance of the correct class of record in order to call it's printNull method. Record *dummyRecord = NULL; @@ -434,4 +427,4 @@ void RecordOutputMgr::printKey(const Record *key) void RecordOutputMgr::flush() { fwrite(_outBuf.c_str(), 1, _outBuf.size(), stdout); _outBuf.clear(); -} \ No newline at end of file +} diff --git a/src/utils/FileRecordTools/RecordOutputMgr.h b/src/utils/RecordOutputMgr/RecordOutputMgr.h similarity index 96% rename from src/utils/FileRecordTools/RecordOutputMgr.h rename to src/utils/RecordOutputMgr/RecordOutputMgr.h index 16b6cb531687d3fd151f41dc67778870537b7d7e..b6296fdadc2d166d4c4d00b0b1f12fe306d46e70 100644 --- a/src/utils/FileRecordTools/RecordOutputMgr.h +++ b/src/utils/RecordOutputMgr/RecordOutputMgr.h @@ -22,7 +22,7 @@ public: ~RecordOutputMgr(); //The init method must be called after all the input files are open. - bool init(ContextBase *context); + void init(ContextBase *context); void printRecord(const Record *record); void printRecord(RecordKeyList &keyList); @@ -69,4 +69,4 @@ private: void flush(); }; -#endif /* RECORDOUTPUTMGR_H_ */ \ No newline at end of file +#endif /* RECORDOUTPUTMGR_H_ */ diff --git a/test/intersect/gdc.gff b/test/intersect/gdc.gff new file mode 100644 index 0000000000000000000000000000000000000000..4777534dc1f72c874a9b7390ca72fcbb80a1067a --- /dev/null +++ b/test/intersect/gdc.gff @@ -0,0 +1,12 @@ +chr2L . UTR 41 70 0 + . ID=mRNA:xs2:UTR:41-70;Parent=mRNA:xs2; +chr2L . CDS 71 130 0 + . ID=mRNA:xs2:CDS:71-130;Parent=mRNA:xs2; +chr2L . intron 131 170 0 + . ID=mRNA:xs2:intron:131-170;Parent=mRNA:xs2; +chr2L . CDS 171 200 0 + . ID=mRNA:xs2:CDS:171-200;Parent=mRNA:xs2; +chr2L . UTR 201 220 0 + . ID=mRNA:xs2:UTR:201-220;Parent=mRNA:xs2; +chr2L . exon 41 130 0 + . ID=mRNA:xs2:exon:41-130;Parent=mRNA:xs2; +chr2L . exon 171 220 0 + . ID=mRNA:xs2:exon:171-220;Parent=mRNA:xs2; +chr2L . mRNA 41 220 0 + . ID=mRNA:xs2;Parent=g2; +chr2L . CDS 161 230 0 - . ID=tRNA:t2:CDS:161-230;Parent=tRNA:t2; +chr2L . exon 161 230 0 - . ID=tRNA:t2:exon:161-230;Parent=tRNA:t2; +chr2L . tRNA 161 230 0 - . ID=tRNA:t2;Parent=t2; +chr2L . gene 41 220 0 + . ID=g2; diff --git a/test/intersect/gdc_one.bed b/test/intersect/gdc_one.bed new file mode 100644 index 0000000000000000000000000000000000000000..377732965e189e333fc7fb578cf21bf151401daa --- /dev/null +++ b/test/intersect/gdc_one.bed @@ -0,0 +1 @@ +chr2L 50 100 diff --git a/test/intersect/new_test-intersect.sh b/test/intersect/new_test-intersect.sh index 8516c7cb3124956846e8fda2322777593f12eb77..4bdcdf05a1b208d1339dd21c416ef924d59d089e 100755 --- a/test/intersect/new_test-intersect.sh +++ b/test/intersect/new_test-intersect.sh @@ -638,8 +638,31 @@ $BT intersect -a gdc.bam -b gdc.bam -bed > obs check obs gdc_exp rm obs +########################################################### +# Test that if the query is BAM, and bed output is not +# explicit, and they asked for an output option that is not +# valid with BAM output, an error is thrown. +############################################################ +echo " intersect.new.t54...\c" +echo "***** ERROR: writeAllOverlap option is not valid with BAM query input, unless bed output is specified with -bed option. *****" > exp +$BT intersect -a a.bam -b b.bed -wao 2>&1 > /dev/null | head -2 | tail -1 > obs +check obs exp +rm obs exp +########################################################### +# Test that gff files work correctly +############################################################ +echo " intersect.new.t55...\c" +echo \ +"chr2L . UTR 50 70 0 + . ID=mRNA:xs2:UTR:41-70;Parent=mRNA:xs2; +chr2L . CDS 71 100 0 + . ID=mRNA:xs2:CDS:71-130;Parent=mRNA:xs2; +chr2L . exon 50 100 0 + . ID=mRNA:xs2:exon:41-130;Parent=mRNA:xs2; +chr2L . mRNA 50 100 0 + . ID=mRNA:xs2;Parent=g2; +chr2L . gene 50 100 0 + . ID=g2;" > exp +$BT intersect -a gdc.gff -b gdc_one.bed > obs +check obs exp +rm obs exp diff --git a/test/map/test-map.sh b/test/map/test-map.sh index bd25ff4084bcc698b9bb64080b9630c77fb1627e..293d84e3f443ac7b2bbe405e36b77f752c7298f0 100644 --- a/test/map/test-map.sh +++ b/test/map/test-map.sh @@ -661,3 +661,24 @@ $BT map -a ivls.bed -b values5.bed -c 0 -o collapse 2> obs check obs exp rm obs exp + + +########################################################### +# Test that Bam database is not allowed +############################################################ +echo " map.t44...\c" +echo -e "\n*****\n***** ERROR: BAM database file not currently supported for the map tool." > exp +$BT map -a ivls.bed -b values.bam 2> obs +check obs exp +rm obs exp + + + +########################################################### +# Test that -split option works correctly +############################################################ +echo " map.t45...\c" +echo "chr1 0 50 three_blocks_match 15 + 0 0 0 3 10,10,10, 0,20,40, ." > exp +$BT map -o sum -a three_blocks_match.bed -b three_blocks_nomatch.bed -split > obs +check obs exp +rm obs exp diff --git a/test/map/three_blocks_match.bed b/test/map/three_blocks_match.bed new file mode 100644 index 0000000000000000000000000000000000000000..e4004fae96aaecd82f07dafaf801aa8aee1d3c38 --- /dev/null +++ b/test/map/three_blocks_match.bed @@ -0,0 +1 @@ +chr1 0 50 three_blocks_match 15 + 0 0 0 3 10,10,10, 0,20,40, diff --git a/test/map/three_blocks_nomatch.bed b/test/map/three_blocks_nomatch.bed new file mode 100644 index 0000000000000000000000000000000000000000..3db90e037111ae9bf754a24e163872c76a431eb2 --- /dev/null +++ b/test/map/three_blocks_nomatch.bed @@ -0,0 +1 @@ +chr1 10 60 three_blocks_nomatch 25 + 0 0 0 3 10,10,10, 0,20,40, diff --git a/test/map/values.bam b/test/map/values.bam new file mode 100644 index 0000000000000000000000000000000000000000..97caea3c59eed7e599621bcbec98d78441224b59 Binary files /dev/null and b/test/map/values.bam differ diff --git a/test/map/values.bed b/test/map/values.bed index 3710f4c7bd6fbd7984f60c32f801591390eefae9..74a08390d2f02e5da58417f197047498553a8003 100644 --- a/test/map/values.bed +++ b/test/map/values.bed @@ -5,4 +5,4 @@ chr1 120 130 a4 1 + chr3 0 10 a5 1 + chr3 10 20 a6 2 + chr3 20 30 a7 3 + -chr3 120 130 a8 4 + \ No newline at end of file +chr3 120 130 a8 4 +