BinTree.cpp 3.96 KB
Newer Older
nkindlon's avatar
nkindlon committed
1
2
3
4
#include "BinTree.h"
#include "FileRecordMgr.h"


5
BinTree::BinTree(ContextIntersect *context)
nkindlon's avatar
nkindlon committed
6
:  _context(context),
nkindlon's avatar
nkindlon committed
7
8
9
  _binOffsetsExtended(NULL),
  _maxBinNumFound(0)
 {
10
11
	_binOffsetsExtended = new binNumType[NUM_BIN_LEVELS];
	memset(_binOffsetsExtended, 0, NUM_BIN_LEVELS * sizeof(binNumType));
nkindlon's avatar
nkindlon committed
12
13
14

	//start at idx 1, because the memset above already initialized
	//the first idx to zero, which is what we want.
15
	for (binNumType i= 1; i < NUM_BIN_LEVELS; i++) {
nkindlon's avatar
nkindlon committed
16
17
18
19
20
21
22
23
		_binOffsetsExtended[i] = _binOffsetsExtended[i-1] + (1 << ((NUM_BIN_LEVELS - i -1) * 3));
	}
}

BinTree::~BinTree() {
	delete [] _binOffsetsExtended;
}

24
void BinTree::loadDB()
nkindlon's avatar
nkindlon committed
25
{
nkindlon's avatar
nkindlon committed
26
27
28
29
30
31
32
33
34
35
	for (int i=0; i < _context->getNumDatabaseFiles(); i++) {
		FileRecordMgr *databaseFile = _context->getDatabaseFile(i);

		Record *record = NULL;
		while (!databaseFile->eof()) {
			record = databaseFile->getNextRecord();
			//In addition to NULL records, we also don't want to add unmapped reads.
			if (record == NULL || record->isUnmapped()) {
				continue;
			}
nkindlon's avatar
nkindlon committed
36

37
38
			_context->testNameConventions(record);

nkindlon's avatar
nkindlon committed
39
40
41
42
43
			if (!addRecordToTree(record)) {
				fprintf(stderr, "ERROR: Unable to add record to tree.\n");
				databaseFile->close();
				exit(1);
			}
nkindlon's avatar
nkindlon committed
44
45
46
47
		}
	}
}

nkindlon's avatar
nkindlon committed
48
void BinTree::getHits(Record *record, RecordKeyVector &hitSet)
nkindlon's avatar
nkindlon committed
49
{
50
51
52
	if (record->isUnmapped()) {
		return;
	}
nkindlon's avatar
nkindlon committed
53
54
55
56
57
58
59
    const QuickString &chr = record->getChrName();
	mainMapType::iterator mainIter = _mainMap.find(chr);
	if (mainIter == _mainMap.end()) {
		//given chrom not even in map.
		return;
	}

60
61
    binNumType startPos = record->getStartPos();
    binNumType endPos = record->getEndPos();
nkindlon's avatar
nkindlon committed
62

63
64
    binNumType startBin = (startPos >> _binFirstShift);
    binNumType endBin = ((endPos-1) >> _binFirstShift);
nkindlon's avatar
nkindlon committed
65
66


67
	const allBinsType &bins = mainIter->second;
nkindlon's avatar
nkindlon committed
68
69
70
71
72
73
74

    /* SYNOPSIS:
         1. We loop through each UCSC BIN level for feature A's chrom.
         2. For each BIN, we loop through each B feature and add it to
            hits if it meets all of the user's requests, which include:
               (a) overlap fractio, (b) strandedness, (c) reciprocal overlap
    */
75
76
77
    for (binNumType i = 0; i < NUM_BIN_LEVELS; i++) {
        binNumType offset = _binOffsetsExtended[i];
        for (binNumType j = (startBin+offset); j <= (endBin+offset); j++)  {
nkindlon's avatar
nkindlon committed
78
79

        	// move to the next bin if this one is empty
80
81
        	allBinsType::const_iterator allBinsIter = bins.find(j);
        	if (allBinsIter == bins.end()) {
nkindlon's avatar
nkindlon committed
82
83
        		continue;
        	}
84
85
86
87
        	const binType &bin = allBinsIter->second;

        	for (binType::const_iterator iter = bin.begin(); iter != bin.end(); iter++) {
            	const Record *dbRec = *iter;
nkindlon's avatar
nkindlon committed
88
89
90
91
92
93
94
95
96
            	if (record->intersects(dbRec, _context->getSameStrand(), _context->getDiffStrand(),
            			_context->getOverlapFraction(), _context->getReciprocal())) {
            		hitSet.push_back(dbRec);
            	}
            }
        }
        startBin >>= _binNextShift;
        endBin >>= _binNextShift;
    }
nkindlon's avatar
nkindlon committed
97
98
99
	if (_context->getSortOutput()) {
		hitSet.sortVector();
	}
nkindlon's avatar
nkindlon committed
100
101
102
103
}

bool BinTree::addRecordToTree(const Record *record)
{
104
	// Get chr, bin.
nkindlon's avatar
nkindlon committed
105
	const QuickString &chr = record->getChrName();
106
107
108
	binNumType startPos = (binNumType)(record->getStartPos());
	binNumType endPos = (binNumType)(record->getEndPos());
	binNumType binNum = getBin(startPos, endPos);
nkindlon's avatar
nkindlon committed
109
110
111
112
113

	if (binNum < 0 || binNum >= NUM_BINS) {
		fprintf(stderr, "ERROR: Received illegal bin number %u from getBin call.\n", binNum);
		return false;
	}
114
	_mainMap[chr][binNum].push_back(record);
nkindlon's avatar
nkindlon committed
115
116
117
	return true;
}

118
119
120

BinTree::binNumType BinTree::getBin(const Record *record) const {
	return getBin((binNumType)(record->getStartPos()), (binNumType)(record->getEndPos()));
nkindlon's avatar
nkindlon committed
121
122
}

123
BinTree::binNumType BinTree::getBin(binNumType start, binNumType end) const {
nkindlon's avatar
nkindlon committed
124
125
126
127
    --end;
    start >>= _binFirstShift;
    end   >>= _binFirstShift;

128
    for (binNumType i = 0; i < NUM_BIN_LEVELS; ++i) {
nkindlon's avatar
nkindlon committed
129
130
131
132
133
134
135
136
137
        if (start == end) {
        	return _binOffsetsExtended[i] + start;
        }
        start >>= _binNextShift;
        end   >>= _binNextShift;
    }
    //failure
    return -1;
}