complementFile.cpp 3.05 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include "complementFile.h"
#include "NewGenomeFile.h"

ComplementFile::ComplementFile(ContextComplement *context)
: ToolBase(context),
  _genomeFile(context->getGenomeFile()),
  _currStartPos(0),
  _outputMgr(NULL),
  _chromList(_genomeFile->getChromList()),
  _currPosInGenomeList(-1)
{

}

ComplementFile::~ComplementFile() {
}

bool ComplementFile::init()
{
	_frm = static_cast<FileRecordMergeMgr *>(upCast(_context)->getFile(0));
	return true;
}

bool ComplementFile::findNext(RecordKeyVector &hits)
{
    while (!_frm->eof()) {
    	_frm->getNextRecord(&hits);
    	if (hits.getKey() == NULL) continue;
    	return true;
    }
    return false;

}

void ComplementFile::processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hits)
{
	_outputMgr = outputMgr;
	const Record *rec = hits.getKey();

	//test for chrom change.
	const QuickString &newChrom = rec->getChrName();
	if (_currChrom != newChrom) {

		outPutLastRecordInPrevChrom();

		//if record's chrom doesn't exist in the genome file, do
		//nothing
		if (!fastForward(newChrom)) return;

		//we've switched to a new chromosome that is in both the DB
		//and genome file.
		_currStartPos = 0;
		_currChrom = newChrom;
		_outRecord.setChrName(newChrom);
	}

	int endPos = rec->getStartPos();
	printRecord(endPos);
	_currStartPos = rec->getEndPos();
}

void ComplementFile::cleanupHits(RecordKeyVector &hits)
{
	_frm->deleteMergedRecord(hits);
}

bool ComplementFile::finalizeCalculations() {
	outPutLastRecordInPrevChrom();
	fastForward("");
	return true;
}

void ComplementFile::outPutLastRecordInPrevChrom()
{
	const QuickString &chrom = _outRecord.getChrName();

	//do nothing if triggered by first record in DB. At this point,
	//there was no prev chrom, so nothing is stored in the output Record yet.
	if (chrom.empty()) return;
	int maxChromSize = _genomeFile->getChromSize(chrom);
	if (_currStartPos >= maxChromSize) return; //chrom already covered and reported.

	printRecord(maxChromSize);
}

bool ComplementFile::fastForward(const QuickString &newChrom) {
	if (!newChrom.empty() && !_genomeFile->hasChrom(newChrom)) return false;

	int i= _currPosInGenomeList +1;
	while (i < (int)_chromList.size() && _chromList[i] != newChrom) {
		_outRecord.setChrName(_chromList[i]);
		_currStartPos = 0;
		int endPos = _genomeFile->getChromSize(_chromList[i]);
		printRecord(endPos);
		i++;
	}
	if (newChrom.empty()) return true;

	if (i== (int)_chromList.size()) {
		//reached end but didn't find new chrom. Genome and DB are not sorted in same order.
		cerr << "***** ERROR: genome file and input file are not sorted in same order. Exiting..." << endl;
		exit(1);
		//this is where we'd return false if we weren't exiting.
	}
	_currChrom = newChrom;
	_currPosInGenomeList = i;
	return true;
}

void ComplementFile::printRecord(int endPos)
{
	_outRecord.setStartPos(_currStartPos);
	QuickString startStr;
	startStr.append(_currStartPos);
	_outRecord.setStartPosStr(startStr);

	_outRecord.setEndPos(endPos);
	QuickString endStr;
	endStr.append(endPos);
	_outRecord.setEndPosStr(endStr);

	_outputMgr->printRecord(&_outRecord);
	_outputMgr->newline();

}