Commit e1f71dfe authored by Brent Pedersen's avatar Brent Pedersen
Browse files

initial go at -exclude for fisher

parent e6f14186
......@@ -12,6 +12,14 @@ Fisher::Fisher(ContextFisher *context)
_dbLen(0)
{
_blockMgr = new BlockMgr(_context->getOverlapFraction(), _context->getReciprocal());
_haveExclude = false;
if(!(_context->getExcludeFile().empty())){
string ex = _context->getExcludeFile();
exclude = new BedFile(ex);
exclude->loadBedFileIntoMergedMap();
_haveExclude = true;
}
}
Fisher::~Fisher(void) {
......@@ -36,6 +44,9 @@ bool Fisher::calculate() {
double left, right, two;
long long genomeSize = _context->getGenomeFile()->getGenomeSize();
if(_haveExclude){
genomeSize -= exclude->getTotalFlattenedLength();
}
// bases covered by neither a nor b
long long n11 = genomeSize - _queryLen - _dbLen + _intersectionVal;
// bases covered only by -b
......@@ -46,9 +57,9 @@ bool Fisher::calculate() {
long long n22 = _intersectionVal;
printf("#_________________________________________\n");
printf("# | %-12s | %-12s |\n", "not in -b", "in -b");
printf("# | %-12s | %-12s |\n", "not in -b", "in -b");
printf("# not in -a | %-12lld | %-12lld |\n", n11, n12);
printf("# in -a | %-12lld | %-12lld |\n", n21, n22);
printf("# in -a | %-12lld | %-12lld |\n", n21, n22);
printf("#_________________________________________\n");
kt_fisher_exact(n11, n12, n21, n22, &left, &right, &two);
......@@ -66,6 +77,7 @@ bool Fisher::getFisher() {
if (!sweep.init()) {
return false;
}
RecordKeyVector hitSet;
while (sweep.next(hitSet)) {
if (_context->getObeySplits()) {
......
#ifndef FISHER_H
#define FISHER_H
#include "bedFile.h"
#include "bedFilePE.h"
#include "ContextFisher.h"
class BlockMgr;
......@@ -19,10 +22,12 @@ private:
BlockMgr *_blockMgr;
unsigned long _intersectionVal;
unsigned long _unionVal;
bool _haveExclude;
int _numIntersections;
unsigned long _queryLen;
unsigned long _dbLen;
bool getFisher();
BedFile *exclude;
unsigned long getTotalIntersection(RecordKeyVector &hits);
};
......
......@@ -9,27 +9,32 @@ BIN_DIR = ../../bin/
INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I$(UTILITIES_DIR)/general/ \
-I$(UTILITIES_DIR)/fileType/ \
-I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/GenomeFile/ \
-I$(UTILITIES_DIR)/BamTools/include \
-I$(UTILITIES_DIR)/BamTools/src \
-I$(UTILITIES_DIR)/BlockedIntervals \
-I$(UTILITIES_DIR)/BamTools-Ancillary \
-I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \
-I$(UTILITIES_DIR)/RecordOutputMgr/ \
-I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/bedFile/ \
-I$(UTILITIES_DIR)/bedFilePE/ \
-I$(UTILITIES_DIR)/GenomeFile/ \
-I$(UTILITIES_DIR)/lineFileUtilities/ \
-I$(UTILITIES_DIR)/BamTools/include \
-I$(UTILITIES_DIR)/BamTools/src \
-I$(UTILITIES_DIR)/BlockedIntervals \
-I$(UTILITIES_DIR)/BamTools-Ancillary \
-I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \
-I$(UTILITIES_DIR)/RecordOutputMgr/ \
-I$(UTILITIES_DIR)/KeyListOps/ \
-I$(UTILITIES_DIR)/NewChromsweep \
-I$(UTILITIES_DIR)/VectorOps \
-I$(UTILITIES_DIR)/NewChromsweep \
-I$(UTILITIES_DIR)/VectorOps \
-I . \
-I$(UTILITIES_DIR)/BinTree \
-I$(UTILITIES_DIR)/version/
-I$(UTILITIES_DIR)/BinTree \
-I$(UTILITIES_DIR)/version/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= fisherMain.cpp Fisher.cpp Fisher.h kfunc.c
SOURCES= fisherMain.cpp Fisher.cpp Fisher.h kfunc.c \
../utils/Contexts/ContextFisher.cpp \
../utils/Contexts/ContextFisher.h
OBJECTS= fisherMain.o Fisher.o
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
PROGRAM= Fisher
......
......@@ -8,7 +8,6 @@
ContextFisher::ContextFisher() {
setSortedInput(true);
setUseMergedIntervals(true);
}
ContextFisher::~ContextFisher() {
......@@ -38,6 +37,9 @@ bool ContextFisher::parseCmdArgs(int argc, char **argv, int skipFirstArgs)
else if (strcmp(_argv[_i], "-S") == 0) {
if (!handle_S()) return false;
}
else if (strcmp(_argv[_i], "-exclude") == 0) {
if (!handle_exclude()) return false;
}
if (strcmp(_argv[_i], "-g") == 0) {
if (!handle_g()) return false;
}
......@@ -112,4 +114,19 @@ bool ContextFisher::handle_S() {
return false;
}
bool ContextFisher::handle_exclude()
{
if (_argc <= _i+1) {
_errorMsg = "\n***** ERROR: -exclude option given, but no file specified. *****";
return false;
}
do {
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
} while (_argc > _i+1 && _argv[_i+1][0] != '-');
setExcludeFile(string(_argv[_i]));
return true;
}
......@@ -17,10 +17,17 @@ public:
~ContextFisher();
virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs);
virtual bool isValidState();
string getExcludeFile() { return _excludeFile; }
void setExcludeFile(string excludeFile) { _excludeFile = excludeFile; }
private:
bool handle_s();
bool handle_S();
bool handle_exclude();
string _excludeFile;
};
#endif /* CONTEXTFISHER_H_ */
......@@ -657,12 +657,24 @@ void BedFile::setBed12 (bool isBed12) { this->isBed12 = isBed12; }
void BedFile::loadBedFileIntoMap() {
BED bedEntry;
Open();
while (GetNextBed(bedEntry)) {
if (_status == BED_VALID) {
BIN bin = getBin(bedEntry.start, bedEntry.end);
bedMap[bedEntry.chrom][bin].push_back(bedEntry);
addBEDIntoMap(bedEntry);
}
}
Close();
}
void BedFile::loadBedFileIntoMergedMap() {
BED bedEntry;
Open();
while (GetNextMergedBed(bedEntry)) {
if (_status == BED_VALID) {
addBEDIntoMap(bedEntry);
}
}
Close();
......
......@@ -418,6 +418,7 @@ public:
// load a BED file into a map keyed by chrom, then bin. value is
// vector of BEDs
void loadBedFileIntoMap();
void loadBedFileIntoMergedMap();
// load a BED entry into and existing map
void addBEDIntoMap(BED bedEntry);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment