From e1f71dfe63e1d513114e7ef971052a9c06c6d7b5 Mon Sep 17 00:00:00 2001 From: Brent Pedersen <bpederse@gmail.com> Date: Tue, 28 Oct 2014 11:41:00 -0600 Subject: [PATCH] initial go at -exclude for fisher --- src/fisher/Fisher.cpp | 16 +++++++++++-- src/fisher/Fisher.h | 5 ++++ src/fisher/Makefile | 35 ++++++++++++++++------------ src/utils/Contexts/ContextFisher.cpp | 19 ++++++++++++++- src/utils/Contexts/ContextFisher.h | 7 ++++++ src/utils/bedFile/bedFile.cpp | 18 +++++++++++--- src/utils/bedFile/bedFile.h | 1 + 7 files changed, 80 insertions(+), 21 deletions(-) diff --git a/src/fisher/Fisher.cpp b/src/fisher/Fisher.cpp index 8463e2f9..ea3b073f 100644 --- a/src/fisher/Fisher.cpp +++ b/src/fisher/Fisher.cpp @@ -12,6 +12,14 @@ Fisher::Fisher(ContextFisher *context) _dbLen(0) { _blockMgr = new BlockMgr(_context->getOverlapFraction(), _context->getReciprocal()); + _haveExclude = false; + + if(!(_context->getExcludeFile().empty())){ + string ex = _context->getExcludeFile(); + exclude = new BedFile(ex); + exclude->loadBedFileIntoMergedMap(); + _haveExclude = true; + } } Fisher::~Fisher(void) { @@ -36,6 +44,9 @@ bool Fisher::calculate() { double left, right, two; long long genomeSize = _context->getGenomeFile()->getGenomeSize(); + if(_haveExclude){ + genomeSize -= exclude->getTotalFlattenedLength(); + } // bases covered by neither a nor b long long n11 = genomeSize - _queryLen - _dbLen + _intersectionVal; // bases covered only by -b @@ -46,9 +57,9 @@ bool Fisher::calculate() { long long n22 = _intersectionVal; printf("#_________________________________________\n"); - printf("# | %-12s | %-12s |\n", "not in -b", "in -b"); + printf("# | %-12s | %-12s |\n", "not in -b", "in -b"); printf("# not in -a | %-12lld | %-12lld |\n", n11, n12); - printf("# in -a | %-12lld | %-12lld |\n", n21, n22); + printf("# in -a | %-12lld | %-12lld |\n", n21, n22); printf("#_________________________________________\n"); kt_fisher_exact(n11, n12, n21, n22, &left, &right, &two); @@ -66,6 +77,7 @@ bool Fisher::getFisher() { if (!sweep.init()) { return false; } + RecordKeyVector hitSet; while (sweep.next(hitSet)) { if (_context->getObeySplits()) { diff --git a/src/fisher/Fisher.h b/src/fisher/Fisher.h index 01f2157d..e10bc8fb 100644 --- a/src/fisher/Fisher.h +++ b/src/fisher/Fisher.h @@ -1,6 +1,9 @@ #ifndef FISHER_H #define FISHER_H +#include "bedFile.h" +#include "bedFilePE.h" + #include "ContextFisher.h" class BlockMgr; @@ -19,10 +22,12 @@ private: BlockMgr *_blockMgr; unsigned long _intersectionVal; unsigned long _unionVal; + bool _haveExclude; int _numIntersections; unsigned long _queryLen; unsigned long _dbLen; bool getFisher(); + BedFile *exclude; unsigned long getTotalIntersection(RecordKeyVector &hits); }; diff --git a/src/fisher/Makefile b/src/fisher/Makefile index b94a5097..e9b66511 100644 --- a/src/fisher/Makefile +++ b/src/fisher/Makefile @@ -9,27 +9,32 @@ BIN_DIR = ../../bin/ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \ -I$(UTILITIES_DIR)/general/ \ -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/GenomeFile/ \ - -I$(UTILITIES_DIR)/BamTools/include \ - -I$(UTILITIES_DIR)/BamTools/src \ - -I$(UTILITIES_DIR)/BlockedIntervals \ - -I$(UTILITIES_DIR)/BamTools-Ancillary \ - -I$(UTILITIES_DIR)/FileRecordTools/ \ - -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ - -I$(UTILITIES_DIR)/FileRecordTools/Records/ \ - -I$(UTILITIES_DIR)/RecordOutputMgr/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/bedFilePE/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/BamTools/include \ + -I$(UTILITIES_DIR)/BamTools/src \ + -I$(UTILITIES_DIR)/BlockedIntervals \ + -I$(UTILITIES_DIR)/BamTools-Ancillary \ + -I$(UTILITIES_DIR)/FileRecordTools/ \ + -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ + -I$(UTILITIES_DIR)/FileRecordTools/Records/ \ + -I$(UTILITIES_DIR)/RecordOutputMgr/ \ -I$(UTILITIES_DIR)/KeyListOps/ \ - -I$(UTILITIES_DIR)/NewChromsweep \ - -I$(UTILITIES_DIR)/VectorOps \ + -I$(UTILITIES_DIR)/NewChromsweep \ + -I$(UTILITIES_DIR)/VectorOps \ -I . \ - -I$(UTILITIES_DIR)/BinTree \ - -I$(UTILITIES_DIR)/version/ + -I$(UTILITIES_DIR)/BinTree \ + -I$(UTILITIES_DIR)/version/ # ---------------------------------- # define our source and object files # ---------------------------------- -SOURCES= fisherMain.cpp Fisher.cpp Fisher.h kfunc.c +SOURCES= fisherMain.cpp Fisher.cpp Fisher.h kfunc.c \ + ../utils/Contexts/ContextFisher.cpp \ + ../utils/Contexts/ContextFisher.h OBJECTS= fisherMain.o Fisher.o BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) PROGRAM= Fisher diff --git a/src/utils/Contexts/ContextFisher.cpp b/src/utils/Contexts/ContextFisher.cpp index 90f07282..9452c069 100644 --- a/src/utils/Contexts/ContextFisher.cpp +++ b/src/utils/Contexts/ContextFisher.cpp @@ -8,7 +8,6 @@ ContextFisher::ContextFisher() { setSortedInput(true); setUseMergedIntervals(true); - } ContextFisher::~ContextFisher() { @@ -38,6 +37,9 @@ bool ContextFisher::parseCmdArgs(int argc, char **argv, int skipFirstArgs) else if (strcmp(_argv[_i], "-S") == 0) { if (!handle_S()) return false; } + else if (strcmp(_argv[_i], "-exclude") == 0) { + if (!handle_exclude()) return false; + } if (strcmp(_argv[_i], "-g") == 0) { if (!handle_g()) return false; } @@ -112,4 +114,19 @@ bool ContextFisher::handle_S() { return false; } +bool ContextFisher::handle_exclude() +{ + if (_argc <= _i+1) { + _errorMsg = "\n***** ERROR: -exclude option given, but no file specified. *****"; + return false; + } + + do { + markUsed(_i - _skipFirstArgs); + _i++; + markUsed(_i - _skipFirstArgs); + } while (_argc > _i+1 && _argv[_i+1][0] != '-'); + setExcludeFile(string(_argv[_i])); + return true; +} diff --git a/src/utils/Contexts/ContextFisher.h b/src/utils/Contexts/ContextFisher.h index 49e7dcb2..fd1d8ada 100644 --- a/src/utils/Contexts/ContextFisher.h +++ b/src/utils/Contexts/ContextFisher.h @@ -17,10 +17,17 @@ public: ~ContextFisher(); virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs); virtual bool isValidState(); + string getExcludeFile() { return _excludeFile; } + void setExcludeFile(string excludeFile) { _excludeFile = excludeFile; } + private: bool handle_s(); bool handle_S(); + bool handle_exclude(); + string _excludeFile; + + }; #endif /* CONTEXTFISHER_H_ */ diff --git a/src/utils/bedFile/bedFile.cpp b/src/utils/bedFile/bedFile.cpp index 7c1ae8dc..ed8d229d 100644 --- a/src/utils/bedFile/bedFile.cpp +++ b/src/utils/bedFile/bedFile.cpp @@ -657,12 +657,24 @@ void BedFile::setBed12 (bool isBed12) { this->isBed12 = isBed12; } void BedFile::loadBedFileIntoMap() { BED bedEntry; - + Open(); while (GetNextBed(bedEntry)) { if (_status == BED_VALID) { - BIN bin = getBin(bedEntry.start, bedEntry.end); - bedMap[bedEntry.chrom][bin].push_back(bedEntry); + addBEDIntoMap(bedEntry); + } + } + Close(); +} + +void BedFile::loadBedFileIntoMergedMap() { + + BED bedEntry; + + Open(); + while (GetNextMergedBed(bedEntry)) { + if (_status == BED_VALID) { + addBEDIntoMap(bedEntry); } } Close(); diff --git a/src/utils/bedFile/bedFile.h b/src/utils/bedFile/bedFile.h index 440986c0..a738c21e 100644 --- a/src/utils/bedFile/bedFile.h +++ b/src/utils/bedFile/bedFile.h @@ -418,6 +418,7 @@ public: // load a BED file into a map keyed by chrom, then bin. value is // vector of BEDs void loadBedFileIntoMap(); + void loadBedFileIntoMergedMap(); // load a BED entry into and existing map void addBEDIntoMap(BED bedEntry); -- GitLab