Commit b6c80bbc authored by Aaron's avatar Aaron
Browse files

Prettied up the code before pushing to github.

parent 537dd5fa
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#include "annotateBed.h" #include "annotateBed.h"
// build // build
BedAnnotate::BedAnnotate(const string &mainFile, const vector<string> &annoFileNames, BedAnnotate::BedAnnotate(const string &mainFile, const vector<string> &annoFileNames,
const vector<string> &annoTitles, bool forceStrand, bool reportCounts, bool reportBoth) : const vector<string> &annoTitles, bool forceStrand, bool reportCounts, bool reportBoth) :
_mainFile(mainFile), _mainFile(mainFile),
...@@ -24,36 +24,36 @@ BedAnnotate::BedAnnotate(const string &mainFile, const vector<string> &annoFileN ...@@ -24,36 +24,36 @@ BedAnnotate::BedAnnotate(const string &mainFile, const vector<string> &annoFileN
_reportBoth(reportBoth) _reportBoth(reportBoth)
{ {
_bed = new BedFile(_mainFile); _bed = new BedFile(_mainFile);
} }
// destroy and delete the open file pointers // destroy and delete the open file pointers
BedAnnotate::~BedAnnotate(void) { BedAnnotate::~BedAnnotate(void) {
delete _bed; delete _bed;
CloseAnnoFiles(); CloseAnnoFiles();
} }
void BedAnnotate::OpenAnnoFiles() { void BedAnnotate::OpenAnnoFiles() {
for (size_t i=0; i < _annoFileNames.size(); ++i) { for (size_t i=0; i < _annoFileNames.size(); ++i) {
BedFile *file = new BedFile(_annoFileNames[i]); BedFile *file = new BedFile(_annoFileNames[i]);
file->Open(); file->Open();
_annoFiles.push_back(file); _annoFiles.push_back(file);
} }
} }
void BedAnnotate::CloseAnnoFiles() { void BedAnnotate::CloseAnnoFiles() {
for (size_t i=0; i < _annoFiles.size(); ++i) { for (size_t i=0; i < _annoFiles.size(); ++i) {
BedFile *file = _annoFiles[i]; BedFile *file = _annoFiles[i];
delete file; delete file;
_annoFiles[i] = NULL; _annoFiles[i] = NULL;
} }
} }
void BedAnnotate::PrintHeader() { void BedAnnotate::PrintHeader() {
// print a hash to indicate header and then write a tab // print a hash to indicate header and then write a tab
// for each field in the main file. // for each field in the main file.
printf("#"); printf("#");
for (size_t i = 0; i < _bed->bedType; ++i) for (size_t i = 0; i < _bed->bedType; ++i)
...@@ -74,135 +74,135 @@ void BedAnnotate::PrintHeader() { ...@@ -74,135 +74,135 @@ void BedAnnotate::PrintHeader() {
void BedAnnotate::InitializeMainFile() { void BedAnnotate::InitializeMainFile() {
// process each chromosome // process each chromosome
masterBedCovListMap::iterator chromItr = _bed->bedCovListMap.begin(); masterBedCovListMap::iterator chromItr = _bed->bedCovListMap.begin();
masterBedCovListMap::iterator chromEnd = _bed->bedCovListMap.end(); masterBedCovListMap::iterator chromEnd = _bed->bedCovListMap.end();
for (; chromItr != chromEnd; ++chromItr) { for (; chromItr != chromEnd; ++chromItr) {
// for each chrom, process each bin // for each chrom, process each bin
binsToBedCovLists::iterator binItr = chromItr->second.begin(); binsToBedCovLists::iterator binItr = chromItr->second.begin();
binsToBedCovLists::iterator binEnd = chromItr->second.end(); binsToBedCovLists::iterator binEnd = chromItr->second.end();
for (; binItr != binEnd; ++binItr) { for (; binItr != binEnd; ++binItr) {
// initialize BEDCOVLIST in this chrom/bin // initialize BEDCOVLIST in this chrom/bin
vector<BEDCOVLIST>::iterator bedItr = binItr->second.begin(); vector<BEDCOVLIST>::iterator bedItr = binItr->second.begin();
vector<BEDCOVLIST>::iterator bedEnd = binItr->second.end(); vector<BEDCOVLIST>::iterator bedEnd = binItr->second.end();
for (; bedItr != bedEnd; ++bedItr) { for (; bedItr != bedEnd; ++bedItr) {
// initialize the depthMaps, counts, etc. for each anno file. // initialize the depthMaps, counts, etc. for each anno file.
for (size_t i = 0; i < _annoFiles.size(); ++i) { for (size_t i = 0; i < _annoFiles.size(); ++i) {
map<unsigned int, DEPTH> dummy; map<unsigned int, DEPTH> dummy;
bedItr->depthMapList.push_back(dummy); bedItr->depthMapList.push_back(dummy);
bedItr->counts.push_back(0); bedItr->counts.push_back(0);
bedItr->minOverlapStarts.push_back(INT_MAX); bedItr->minOverlapStarts.push_back(INT_MAX);
} }
} }
} }
} }
} }
void BedAnnotate::AnnotateBed() { void BedAnnotate::AnnotateBed() {
// load the "main" bed file into a map so // load the "main" bed file into a map so
// that we can easily compare each annoFile to it for overlaps // that we can easily compare each annoFile to it for overlaps
_bed->loadBedCovListFileIntoMap(); _bed->loadBedCovListFileIntoMap();
// open the annotations files for processing; // open the annotations files for processing;
OpenAnnoFiles(); OpenAnnoFiles();
// initialize counters, depths, etc. for the main file // initialize counters, depths, etc. for the main file
InitializeMainFile(); InitializeMainFile();
// annotate the main file with the coverage from the annotation files. // annotate the main file with the coverage from the annotation files.
for (size_t annoIndex = 0; annoIndex < _annoFiles.size(); ++annoIndex) { for (size_t annoIndex = 0; annoIndex < _annoFiles.size(); ++annoIndex) {
// grab the current annotation file. // grab the current annotation file.
BedFile *anno = _annoFiles[annoIndex]; BedFile *anno = _annoFiles[annoIndex];
int lineNum = 0; int lineNum = 0;
BED a, nullBed; BED a, nullBed;
BedLineStatus bedStatus; BedLineStatus bedStatus;
// process each entry in the current anno file // process each entry in the current anno file
while ((bedStatus = anno->GetNextBed(a, lineNum)) != BED_INVALID) { while ((bedStatus = anno->GetNextBed(a, lineNum)) != BED_INVALID) {
if (bedStatus == BED_VALID) { if (bedStatus == BED_VALID) {
_bed->countListHits(a, annoIndex, _forceStrand); _bed->countListHits(a, annoIndex, _forceStrand);
a = nullBed; a = nullBed;
} }
} }
} }
// report the annotations of the main file from the anno file. // report the annotations of the main file from the anno file.
ReportAnnotations(); ReportAnnotations();
// close the annotations files; // close the annotations files;
CloseAnnoFiles(); CloseAnnoFiles();
} }
void BedAnnotate::ReportAnnotations() { void BedAnnotate::ReportAnnotations() {
if (_annoTitles.size() > 0) { if (_annoTitles.size() > 0) {
PrintHeader(); PrintHeader();
} }
// process each chromosome // process each chromosome
masterBedCovListMap::const_iterator chromItr = _bed->bedCovListMap.begin(); masterBedCovListMap::const_iterator chromItr = _bed->bedCovListMap.begin();
masterBedCovListMap::const_iterator chromEnd = _bed->bedCovListMap.end(); masterBedCovListMap::const_iterator chromEnd = _bed->bedCovListMap.end();
for (; chromItr != chromEnd; ++chromItr) { for (; chromItr != chromEnd; ++chromItr) {
// for each chrom, process each bin // for each chrom, process each bin
binsToBedCovLists::const_iterator binItr = chromItr->second.begin(); binsToBedCovLists::const_iterator binItr = chromItr->second.begin();
binsToBedCovLists::const_iterator binEnd = chromItr->second.end(); binsToBedCovLists::const_iterator binEnd = chromItr->second.end();
for (; binItr != binEnd; ++binItr) { for (; binItr != binEnd; ++binItr) {
// for each chrom & bin, compute and report // for each chrom & bin, compute and report
// the observed coverage for each feature // the observed coverage for each feature
vector<BEDCOVLIST>::const_iterator bedItr = binItr->second.begin(); vector<BEDCOVLIST>::const_iterator bedItr = binItr->second.begin();
vector<BEDCOVLIST>::const_iterator bedEnd = binItr->second.end(); vector<BEDCOVLIST>::const_iterator bedEnd = binItr->second.end();
for (; bedItr != bedEnd; ++bedItr) { for (; bedItr != bedEnd; ++bedItr) {
// print the main BED entry. // print the main BED entry.
_bed->reportBedTab(*bedItr); _bed->reportBedTab(*bedItr);
// now report the coverage from each annotation file. // now report the coverage from each annotation file.
for (size_t i = 0; i < _annoFiles.size(); ++i) { for (size_t i = 0; i < _annoFiles.size(); ++i) {
unsigned int totalLength = 0; unsigned int totalLength = 0;
int zeroDepthCount = 0; // number of bases with zero depth int zeroDepthCount = 0; // number of bases with zero depth
int depth = 0; // tracks the depth at the current base int depth = 0; // tracks the depth at the current base
// the start is either the first base in the feature OR // the start is either the first base in the feature OR
// the leftmost position of an overlapping feature. e.g. (s = start): // the leftmost position of an overlapping feature. e.g. (s = start):
// A ---------- // A ----------
// B s ------------ // B s ------------
int start = min(bedItr->minOverlapStarts[i], bedItr->start); int start = min(bedItr->minOverlapStarts[i], bedItr->start);
map<unsigned int, DEPTH>::const_iterator depthItr; map<unsigned int, DEPTH>::const_iterator depthItr;
map<unsigned int, DEPTH>::const_iterator depthEnd; map<unsigned int, DEPTH>::const_iterator depthEnd;
// compute the coverage observed at each base in the feature marching from start to end. // compute the coverage observed at each base in the feature marching from start to end.
for (CHRPOS pos = start+1; pos <= bedItr->end; pos++) { for (CHRPOS pos = start+1; pos <= bedItr->end; pos++) {
// map pointer grabbing the starts and ends observed at this position // map pointer grabbing the starts and ends observed at this position
depthItr = bedItr->depthMapList[i].find(pos); depthItr = bedItr->depthMapList[i].find(pos);
depthEnd = bedItr->depthMapList[i].end(); depthEnd = bedItr->depthMapList[i].end();
// increment coverage if starts observed at this position. // increment coverage if starts observed at this position.
if (depthItr != depthEnd) if (depthItr != depthEnd)
depth += depthItr->second.starts; depth += depthItr->second.starts;
// update zero depth // update zero depth
if ((pos > bedItr->start) && (pos <= bedItr->end) && (depth == 0)) if ((pos > bedItr->start) && (pos <= bedItr->end) && (depth == 0))
zeroDepthCount++; zeroDepthCount++;
// decrement coverage if ends observed at this position. // decrement coverage if ends observed at this position.
if (depthItr != depthEnd) if (depthItr != depthEnd)
depth = depth - depthItr->second.ends; depth = depth - depthItr->second.ends;
} }
// Summarize the coverage for the current interval, // Summarize the coverage for the current interval,
CHRPOS length = bedItr->end - bedItr->start; CHRPOS length = bedItr->end - bedItr->start;
totalLength += length; totalLength += length;
int nonZeroBases = (length - zeroDepthCount); int nonZeroBases = (length - zeroDepthCount);
float fractCovered = (float) nonZeroBases / length; float fractCovered = (float) nonZeroBases / length;
if (_reportCounts == false && _reportBoth == false) if (_reportCounts == false && _reportBoth == false)
printf("%f\t", fractCovered); printf("%f\t", fractCovered);
else if (_reportCounts == true && _reportBoth == false) else if (_reportCounts == true && _reportBoth == false)
printf("%d\t", bedItr->counts[i]); printf("%d\t", bedItr->counts[i]);
else if (_reportCounts == false && _reportBoth == true) else if (_reportCounts == false && _reportBoth == true)
printf("%d\t%f\t", bedItr->counts[i], fractCovered); printf("%d\t%f\t", bedItr->counts[i], fractCovered);
} }
// print newline for next feature. // print newline for next feature.
printf("\n"); printf("\n");
} }
} }
} }
} }
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
Licenced under the GNU General Public License 2.0 license. Licenced under the GNU General Public License 2.0 license.
******************************************************************************/ ******************************************************************************/
#ifndef ANNOTATEBED_H #ifndef ANNOTATEBED_H
#define ANNOTATEBED_H #define ANNOTATEBED_H
#include "bedFile.h" #include "bedFile.h"
...@@ -35,41 +35,41 @@ class BedAnnotate { ...@@ -35,41 +35,41 @@ class BedAnnotate {
public: public:
// constructor // constructor
BedAnnotate(const string &mainFile, const vector<string> &annoFileNames, BedAnnotate(const string &mainFile, const vector<string> &annoFileNames,
const vector<string> &annoTitles, bool forceStrand, bool reportCounts, bool reportBoth); const vector<string> &annoTitles, bool forceStrand, bool reportCounts, bool reportBoth);
// destructor
~BedAnnotate(void);
// annotate the master file with all of the annotation files.
void AnnotateBed();
// destructor
~BedAnnotate(void);
// annotate the master file with all of the annotation files.
void AnnotateBed();
private: private:
// input files. // input files.
string _mainFile; string _mainFile;
vector<string> _annoFileNames; vector<string> _annoFileNames;
vector<string> _annoTitles; vector<string> _annoTitles;
// instance of a bed file class. // instance of a bed file class.
BedFile *_bed; BedFile *_bed;
vector<BedFile*> _annoFiles; vector<BedFile*> _annoFiles;
// do we care about strandedness when counting coverage? // do we care about strandedness when counting coverage?
bool _forceStrand; bool _forceStrand;
bool _reportCounts; bool _reportCounts;
bool _reportBoth; bool _reportBoth;
// private function for reporting coverage information // private function for reporting coverage information
void ReportAnnotations(); void ReportAnnotations();
void OpenAnnoFiles(); void OpenAnnoFiles();
void CloseAnnoFiles(); void CloseAnnoFiles();
void PrintHeader(); void PrintHeader();
void InitializeMainFile(); void InitializeMainFile();
}; };
#endif /* ANNOTATEBED_H */ #endif /* ANNOTATEBED_H */
...@@ -25,135 +25,135 @@ void ShowHelp(void); ...@@ -25,135 +25,135 @@ void ShowHelp(void);
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
// our configuration variables // our configuration variables
bool showHelp = false; bool showHelp = false;
// input file // input file
string mainFile; string mainFile;
// parm flags // parm flags
bool forceStrand = false; bool forceStrand = false;
bool haveBed = false; bool haveBed = false;
bool haveFiles = false; bool haveFiles = false;
bool haveTitles = false; bool haveTitles = false;
bool reportCounts = false; bool reportCounts = false;
bool reportBoth = false; bool reportBoth = false;
// list of annotation files / names // list of annotation files / names
vector<string> inputFiles; vector<string> inputFiles;
vector<string> inputTitles; vector<string> inputTitles;
// check to see if we should print out some help // check to see if we should print out some help
if(argc <= 1) showHelp = true; if(argc <= 1) showHelp = true;
for(int i = 1; i < argc; i++) { for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]); int parameterLength = (int)strlen(argv[i]);
if((PARAMETER_CHECK("-h", 2, parameterLength)) || if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
(PARAMETER_CHECK("--help", 5, parameterLength))) { (PARAMETER_CHECK("--help", 5, parameterLength))) {
showHelp = true; showHelp = true;
} }
} }
if(showHelp) ShowHelp(); if(showHelp) ShowHelp();
// do some parsing (all of these parameters require 2 strings) // do some parsing (all of these parameters require 2 strings)
for(int i = 1; i < argc; i++) { for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]); int parameterLength = (int)strlen(argv[i]);
if(PARAMETER_CHECK("-i", 2, parameterLength)) { if(PARAMETER_CHECK("-i", 2, parameterLength)) {
if ((i+1) < argc) { if ((i+1) < argc) {
haveBed = true; haveBed = true;
mainFile = argv[i + 1]; mainFile = argv[i + 1];
i++; i++;
} }
} }
else if(PARAMETER_CHECK("-files", 6, parameterLength)) { else if(PARAMETER_CHECK("-files", 6, parameterLength)) {
if ((i+1) < argc) { if ((i+1) < argc) {
haveFiles = true; haveFiles = true;
i = i+1; i = i+1;
string file = argv[i]; string file = argv[i];
while (file[0] != '-' && i < argc) { while (file[0] != '-' && i < argc) {
inputFiles.push_back(file); inputFiles.push_back(file);
i++; i++;
if (i < argc) if (i < argc)
file = argv[i]; file = argv[i];
} }
i--; i--;
} }
} }
else if(PARAMETER_CHECK("-names", 6, parameterLength)) { else if(PARAMETER_CHECK("-names", 6, parameterLength)) {
if ((i+1) < argc) { if ((i+1) < argc) {
haveTitles = true; haveTitles = true;
i = i+1; i = i+1;
string title = argv[i]; string title = argv[i];
while (title[0] != '-' && i < argc) { while (title[0] != '-' && i < argc) {
inputTitles.push_back(title); inputTitles.push_back(title);
i++; i++;
if (i < argc) if (i < argc)
title = argv[i]; title = argv[i];
} }
i--; i--;
} }
} }
else if(PARAMETER_CHECK("-counts", 7, parameterLength)) { else if(PARAMETER_CHECK("-counts", 7, parameterLength)) {
reportCounts = true; reportCounts = true;
} }
else if(PARAMETER_CHECK("-both", 5, parameterLength)) { else if(PARAMETER_CHECK("-both", 5, parameterLength)) {
reportBoth = true; reportBoth = true;
} }
else if (PARAMETER_CHECK("-s", 2, parameterLength)) { else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
forceStrand = true; forceStrand = true;
} }
else { else {
cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true; showHelp = true;
} }
} }
// make sure we have both input files // make sure we have both input files
if (!haveBed || !haveFiles) { if (!haveBed || !haveFiles) {
cerr << endl << "*****" << endl << "*****ERROR: Need -i and -files files. " << endl << "*****" << endl; cerr << endl << "*****" << endl << "*****ERROR: Need -i and -files files. " << endl << "*****" << endl;
showHelp = true; showHelp = true;
} }
if (!showHelp) { if (!showHelp) {
BedAnnotate *ba = new BedAnnotate(mainFile, inputFiles, inputTitles, forceStrand, reportCounts, reportBoth);