Commit b6c80bbc authored by Aaron's avatar Aaron
Browse files

Prettied up the code before pushing to github.

parent 537dd5fa
......@@ -13,7 +13,7 @@
#include "annotateBed.h"
// build
BedAnnotate::BedAnnotate(const string &mainFile, const vector<string> &annoFileNames,
BedAnnotate::BedAnnotate(const string &mainFile, const vector<string> &annoFileNames,
const vector<string> &annoTitles, bool forceStrand, bool reportCounts, bool reportBoth) :
_mainFile(mainFile),
......@@ -24,36 +24,36 @@ BedAnnotate::BedAnnotate(const string &mainFile, const vector<string> &annoFileN
_reportBoth(reportBoth)
{
_bed = new BedFile(_mainFile);
}
}
// destroy and delete the open file pointers
BedAnnotate::~BedAnnotate(void) {
delete _bed;
delete _bed;
CloseAnnoFiles();
}
void BedAnnotate::OpenAnnoFiles() {
for (size_t i=0; i < _annoFileNames.size(); ++i) {
BedFile *file = new BedFile(_annoFileNames[i]);
file->Open();
_annoFiles.push_back(file);
}
for (size_t i=0; i < _annoFileNames.size(); ++i) {
BedFile *file = new BedFile(_annoFileNames[i]);
file->Open();
_annoFiles.push_back(file);
}
}
void BedAnnotate::CloseAnnoFiles() {
for (size_t i=0; i < _annoFiles.size(); ++i) {
BedFile *file = _annoFiles[i];
delete file;
_annoFiles[i] = NULL;
}
for (size_t i=0; i < _annoFiles.size(); ++i) {
BedFile *file = _annoFiles[i];
delete file;
_annoFiles[i] = NULL;
}
}
void BedAnnotate::PrintHeader() {
// print a hash to indicate header and then write a tab
// print a hash to indicate header and then write a tab
// for each field in the main file.
printf("#");
for (size_t i = 0; i < _bed->bedType; ++i)
......@@ -74,135 +74,135 @@ void BedAnnotate::PrintHeader() {
void BedAnnotate::InitializeMainFile() {
// process each chromosome
masterBedCovListMap::iterator chromItr = _bed->bedCovListMap.begin();
masterBedCovListMap::iterator chromEnd = _bed->bedCovListMap.end();
for (; chromItr != chromEnd; ++chromItr) {
// for each chrom, process each bin
binsToBedCovLists::iterator binItr = chromItr->second.begin();
binsToBedCovLists::iterator binEnd = chromItr->second.end();
for (; binItr != binEnd; ++binItr) {
// process each chromosome
masterBedCovListMap::iterator chromItr = _bed->bedCovListMap.begin();
masterBedCovListMap::iterator chromEnd = _bed->bedCovListMap.end();
for (; chromItr != chromEnd; ++chromItr) {
// for each chrom, process each bin
binsToBedCovLists::iterator binItr = chromItr->second.begin();
binsToBedCovLists::iterator binEnd = chromItr->second.end();
for (; binItr != binEnd; ++binItr) {
// initialize BEDCOVLIST in this chrom/bin
vector<BEDCOVLIST>::iterator bedItr = binItr->second.begin();
vector<BEDCOVLIST>::iterator bedEnd = binItr->second.end();
for (; bedItr != bedEnd; ++bedItr) {
// initialize the depthMaps, counts, etc. for each anno file.
vector<BEDCOVLIST>::iterator bedItr = binItr->second.begin();
vector<BEDCOVLIST>::iterator bedEnd = binItr->second.end();
for (; bedItr != bedEnd; ++bedItr) {
// initialize the depthMaps, counts, etc. for each anno file.
for (size_t i = 0; i < _annoFiles.size(); ++i) {
map<unsigned int, DEPTH> dummy;
bedItr->depthMapList.push_back(dummy);
bedItr->counts.push_back(0);
bedItr->minOverlapStarts.push_back(INT_MAX);
}
}
}
}
}
}
}
void BedAnnotate::AnnotateBed() {
// load the "main" bed file into a map so
// that we can easily compare each annoFile to it for overlaps
_bed->loadBedCovListFileIntoMap();
// load the "main" bed file into a map so
// that we can easily compare each annoFile to it for overlaps
_bed->loadBedCovListFileIntoMap();
// open the annotations files for processing;
OpenAnnoFiles();
OpenAnnoFiles();
// initialize counters, depths, etc. for the main file
InitializeMainFile();
// annotate the main file with the coverage from the annotation files.
// annotate the main file with the coverage from the annotation files.
for (size_t annoIndex = 0; annoIndex < _annoFiles.size(); ++annoIndex) {
// grab the current annotation file.
BedFile *anno = _annoFiles[annoIndex];
BedFile *anno = _annoFiles[annoIndex];
int lineNum = 0;
BED a, nullBed;
BedLineStatus bedStatus;
// process each entry in the current anno file
while ((bedStatus = anno->GetNextBed(a, lineNum)) != BED_INVALID) {
if (bedStatus == BED_VALID) {
_bed->countListHits(a, annoIndex, _forceStrand);
a = nullBed;
}
}
}
// report the annotations of the main file from the anno file.
ReportAnnotations();
// close the annotations files;
CloseAnnoFiles();
BED a, nullBed;
BedLineStatus bedStatus;
// process each entry in the current anno file
while ((bedStatus = anno->GetNextBed(a, lineNum)) != BED_INVALID) {
if (bedStatus == BED_VALID) {
_bed->countListHits(a, annoIndex, _forceStrand);
a = nullBed;
}
}
}
// report the annotations of the main file from the anno file.
ReportAnnotations();
// close the annotations files;
CloseAnnoFiles();
}
void BedAnnotate::ReportAnnotations() {
if (_annoTitles.size() > 0) {
PrintHeader();
}
// process each chromosome
masterBedCovListMap::const_iterator chromItr = _bed->bedCovListMap.begin();
masterBedCovListMap::const_iterator chromEnd = _bed->bedCovListMap.end();
for (; chromItr != chromEnd; ++chromItr) {
// for each chrom, process each bin
binsToBedCovLists::const_iterator binItr = chromItr->second.begin();
binsToBedCovLists::const_iterator binEnd = chromItr->second.end();
for (; binItr != binEnd; ++binItr) {
// for each chrom & bin, compute and report
// process each chromosome
masterBedCovListMap::const_iterator chromItr = _bed->bedCovListMap.begin();
masterBedCovListMap::const_iterator chromEnd = _bed->bedCovListMap.end();
for (; chromItr != chromEnd; ++chromItr) {
// for each chrom, process each bin
binsToBedCovLists::const_iterator binItr = chromItr->second.begin();
binsToBedCovLists::const_iterator binEnd = chromItr->second.end();
for (; binItr != binEnd; ++binItr) {
// for each chrom & bin, compute and report
// the observed coverage for each feature
vector<BEDCOVLIST>::const_iterator bedItr = binItr->second.begin();
vector<BEDCOVLIST>::const_iterator bedEnd = binItr->second.end();
for (; bedItr != bedEnd; ++bedItr) {
// print the main BED entry.
_bed->reportBedTab(*bedItr);
vector<BEDCOVLIST>::const_iterator bedItr = binItr->second.begin();
vector<BEDCOVLIST>::const_iterator bedEnd = binItr->second.end();
for (; bedItr != bedEnd; ++bedItr) {
// print the main BED entry.
_bed->reportBedTab(*bedItr);
// now report the coverage from each annotation file.
for (size_t i = 0; i < _annoFiles.size(); ++i) {
unsigned int totalLength = 0;
int zeroDepthCount = 0; // number of bases with zero depth
int depth = 0; // tracks the depth at the current base
// the start is either the first base in the feature OR
// the leftmost position of an overlapping feature. e.g. (s = start):
// A ----------
// B s ------------
int start = min(bedItr->minOverlapStarts[i], bedItr->start);
map<unsigned int, DEPTH>::const_iterator depthItr;
map<unsigned int, DEPTH>::const_iterator depthEnd;
// compute the coverage observed at each base in the feature marching from start to end.
for (CHRPOS pos = start+1; pos <= bedItr->end; pos++) {
// map pointer grabbing the starts and ends observed at this position
depthItr = bedItr->depthMapList[i].find(pos);
unsigned int totalLength = 0;
int zeroDepthCount = 0; // number of bases with zero depth
int depth = 0; // tracks the depth at the current base
// the start is either the first base in the feature OR
// the leftmost position of an overlapping feature. e.g. (s = start):
// A ----------
// B s ------------
int start = min(bedItr->minOverlapStarts[i], bedItr->start);
map<unsigned int, DEPTH>::const_iterator depthItr;
map<unsigned int, DEPTH>::const_iterator depthEnd;
// compute the coverage observed at each base in the feature marching from start to end.
for (CHRPOS pos = start+1; pos <= bedItr->end; pos++) {
// map pointer grabbing the starts and ends observed at this position
depthItr = bedItr->depthMapList[i].find(pos);
depthEnd = bedItr->depthMapList[i].end();
// increment coverage if starts observed at this position.
if (depthItr != depthEnd)
depth += depthItr->second.starts;
// update zero depth
if ((pos > bedItr->start) && (pos <= bedItr->end) && (depth == 0))
zeroDepthCount++;
// decrement coverage if ends observed at this position.
if (depthItr != depthEnd)
depth = depth - depthItr->second.ends;
}
// Summarize the coverage for the current interval,
CHRPOS length = bedItr->end - bedItr->start;
totalLength += length;
int nonZeroBases = (length - zeroDepthCount);
float fractCovered = (float) nonZeroBases / length;
if (_reportCounts == false && _reportBoth == false)
printf("%f\t", fractCovered);
else if (_reportCounts == true && _reportBoth == false)
printf("%d\t", bedItr->counts[i]);
// increment coverage if starts observed at this position.
if (depthItr != depthEnd)
depth += depthItr->second.starts;
// update zero depth
if ((pos > bedItr->start) && (pos <= bedItr->end) && (depth == 0))
zeroDepthCount++;
// decrement coverage if ends observed at this position.
if (depthItr != depthEnd)
depth = depth - depthItr->second.ends;
}
// Summarize the coverage for the current interval,
CHRPOS length = bedItr->end - bedItr->start;
totalLength += length;
int nonZeroBases = (length - zeroDepthCount);
float fractCovered = (float) nonZeroBases / length;
if (_reportCounts == false && _reportBoth == false)
printf("%f\t", fractCovered);
else if (_reportCounts == true && _reportBoth == false)
printf("%d\t", bedItr->counts[i]);
else if (_reportCounts == false && _reportBoth == true)
printf("%d\t%f\t", bedItr->counts[i], fractCovered);
}
// print newline for next feature.
printf("%d\t%f\t", bedItr->counts[i], fractCovered);
}
// print newline for next feature.
printf("\n");
}
}
}
}
}
}
}
......@@ -9,7 +9,7 @@
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#ifndef ANNOTATEBED_H
#ifndef ANNOTATEBED_H
#define ANNOTATEBED_H
#include "bedFile.h"
......@@ -35,41 +35,41 @@ class BedAnnotate {
public:
// constructor
BedAnnotate(const string &mainFile, const vector<string> &annoFileNames,
const vector<string> &annoTitles, bool forceStrand, bool reportCounts, bool reportBoth);
// constructor
BedAnnotate(const string &mainFile, const vector<string> &annoFileNames,
const vector<string> &annoTitles, bool forceStrand, bool reportCounts, bool reportBoth);
// destructor
~BedAnnotate(void);
// annotate the master file with all of the annotation files.
void AnnotateBed();
// destructor
~BedAnnotate(void);
// annotate the master file with all of the annotation files.
void AnnotateBed();
private:
// input files.
string _mainFile;
// input files.
string _mainFile;
vector<string> _annoFileNames;
vector<string> _annoTitles;
// instance of a bed file class.
// instance of a bed file class.
BedFile *_bed;
vector<BedFile*> _annoFiles;
// do we care about strandedness when counting coverage?
bool _forceStrand;
// do we care about strandedness when counting coverage?
bool _forceStrand;
bool _reportCounts;
bool _reportBoth;
// private function for reporting coverage information
void ReportAnnotations();
// private function for reporting coverage information
void ReportAnnotations();
void OpenAnnoFiles();
void CloseAnnoFiles();
void PrintHeader();
void InitializeMainFile();
};
#endif /* ANNOTATEBED_H */
......@@ -25,135 +25,135 @@ void ShowHelp(void);
int main(int argc, char* argv[]) {
// our configuration variables
bool showHelp = false;
// input file
string mainFile;
// parm flags
bool forceStrand = false;
bool haveBed = false;
bool haveFiles = false;
bool haveTitles = false;
// our configuration variables
bool showHelp = false;
// input file
string mainFile;
// parm flags
bool forceStrand = false;
bool haveBed = false;
bool haveFiles = false;
bool haveTitles = false;
bool reportCounts = false;
bool reportBoth = false;
bool reportBoth = false;
// list of annotation files / names
vector<string> inputFiles;
vector<string> inputTitles;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
(PARAMETER_CHECK("--help", 5, parameterLength))) {
showHelp = true;
}
}
if(showHelp) ShowHelp();
// do some parsing (all of these parameters require 2 strings)
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if(PARAMETER_CHECK("-i", 2, parameterLength)) {
if ((i+1) < argc) {
haveBed = true;
mainFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-files", 6, parameterLength)) {
if ((i+1) < argc) {
haveFiles = true;
vector<string> inputFiles;
vector<string> inputTitles;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
(PARAMETER_CHECK("--help", 5, parameterLength))) {
showHelp = true;
}
}
if(showHelp) ShowHelp();
// do some parsing (all of these parameters require 2 strings)
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if(PARAMETER_CHECK("-i", 2, parameterLength)) {
if ((i+1) < argc) {
haveBed = true;
mainFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-files", 6, parameterLength)) {
if ((i+1) < argc) {
haveFiles = true;
i = i+1;
string file = argv[i];
while (file[0] != '-' && i < argc) {
while (file[0] != '-' && i < argc) {
inputFiles.push_back(file);
i++;
if (i < argc)
file = argv[i];
}
}
i--;
}
}
else if(PARAMETER_CHECK("-names", 6, parameterLength)) {
if ((i+1) < argc) {
haveTitles = true;
}
}
else if(PARAMETER_CHECK("-names", 6, parameterLength)) {
if ((i+1) < argc) {
haveTitles = true;
i = i+1;
string title = argv[i];
while (title[0] != '-' && i < argc) {
while (title[0] != '-' && i < argc) {
inputTitles.push_back(title);
i++;
if (i < argc)
title = argv[i];
}
}
i--;
}
}
else if(PARAMETER_CHECK("-counts", 7, parameterLength)) {
reportCounts = true;
}
else if(PARAMETER_CHECK("-both", 5, parameterLength)) {
reportBoth = true;
}
else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
forceStrand = true;
}
else {
cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true;
}
}
// make sure we have both input files
if (!haveBed || !haveFiles) {
cerr << endl << "*****" << endl << "*****ERROR: Need -i and -files files. " << endl << "*****" << endl;
showHelp = true;
}
if (!showHelp) {
BedAnnotate *ba = new BedAnnotate(mainFile, inputFiles, inputTitles, forceStrand, reportCounts, reportBoth);
}
}
else if(PARAMETER_CHECK("-counts", 7, parameterLength)) {
reportCounts = true;
}
else if(PARAMETER_CHECK("-both", 5, parameterLength)) {
reportBoth = true;
}
else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
forceStrand = true;
}
else {
cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true;
}
}
// make sure we have both input files
if (!haveBed || !haveFiles) {
cerr << endl << "*****" << endl << "*****ERROR: Need -i and -files files. " << endl << "*****" << endl;
showHelp = true;
}
if (!showHelp) {
BedAnnotate *ba = new BedAnnotate(mainFile, inputFiles, inputTitles, forceStrand, reportCounts, reportBoth);
ba->AnnotateBed();
delete ba;
return 0;
}
else {
ShowHelp();
}
delete ba;
return 0;
}
else {
ShowHelp();
}
}
void ShowHelp(void) {
cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
cerr << "Summary: Annotates the depth & breadth of coverage of features from multiple files" << endl;
cerr << "\t on the intervals in -i." << endl << endl;
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -files FILE1 FILE2 .. FILEn" << endl << endl;
cerr << "Options: " << endl;
cerr << "\t-names\t" << "A list of names (one / file) to describe each file in -i." << endl;
cerr << "\t\tThese names will be printed as a header line." << endl << endl;
cerr << "\t-counts\t" << "Report the count of features in each file that overlap -i." << endl;
cerr << "\t\t- Default is to report the fraction of -i covered by each file." << endl << endl;
cerr << "\t-both\t" << "Report the counts followed by the % coverage." << endl;
cerr << "\t\t- Default is to report the fraction of -i covered by each file." << endl << endl;
cerr << "\t-s\t" << "Force strandedness. That is, only include hits in A that" << endl;
cerr << "\t\toverlap B on the same strand." << endl;
cerr << "\t\t- By default, hits are included without respect to strand." << endl << endl;
exit(1);
cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
cerr << "Summary: Annotates the depth & breadth of coverage of features from multiple files" << endl;