Skip to content
Snippets Groups Projects
Commit 98532d54 authored by Aaron's avatar Aaron
Browse files

Added BEDGRAPH functionality to genomeCoverageBed. From Gordon Assaf, CSHL.

parent c619fea6
No related branches found
No related tags found
No related merge requests found
...@@ -15,15 +15,16 @@ ...@@ -15,15 +15,16 @@
/* /*
Constructor Constructor
*/ */
BedCoverage::BedCoverage(string &bedFile, string &genomeFile, bool &eachBase, bool &startSites, int &max) { BedCoverage::BedCoverage(string &bedFile, string &genomeFile, bool &eachBase, bool &startSites, bool &bedGraph, int &max) {
this->bedFile = bedFile; this->bedFile = bedFile;
this->genomeFile = genomeFile; this->genomeFile = genomeFile;
this->eachBase = eachBase; this->eachBase = eachBase;
this->startSites = startSites; this->startSites = startSites;
this->max = max; this->bedGraph = bedGraph;
this->max = max;
this->bed = new BedFile(bedFile); this->bed = new BedFile(bedFile);
} }
...@@ -139,14 +140,14 @@ void BedCoverage::CoverageBeds(istream &bedInput) { ...@@ -139,14 +140,14 @@ void BedCoverage::CoverageBeds(istream &bedInput) {
// process the results of the last chromosome. // process the results of the last chromosome.
ReportChromCoverage(chromCov, chromSizes[currChrom], currChrom, chromDepthHist); ReportChromCoverage(chromCov, chromSizes[currChrom], currChrom, chromDepthHist);
if (! this->eachBase) { if (this->eachBase == false && this->bedGraph == false) {
ReportGenomeCoverage(chromSizes, chromDepthHist); ReportGenomeCoverage(chromSizes, chromDepthHist);
} }
} }
void BedCoverage::ReportChromCoverage(vector<DEPTH> &chromCov, int &chromSize, string &chrom, chromHistMap &chromDepthHist) { void BedCoverage::ReportChromCoverage(const vector<DEPTH> &chromCov, int &chromSize, string &chrom, chromHistMap &chromDepthHist) {
if (this->eachBase) { if (this->eachBase) {
int depth = 0; // initialize the depth int depth = 0; // initialize the depth
...@@ -159,6 +160,9 @@ void BedCoverage::ReportChromCoverage(vector<DEPTH> &chromCov, int &chromSize, s ...@@ -159,6 +160,9 @@ void BedCoverage::ReportChromCoverage(vector<DEPTH> &chromCov, int &chromSize, s
depth = depth - chromCov[pos].ends; depth = depth - chromCov[pos].ends;
} }
} }
else if (this->bedGraph) {
ReportChromCoverageBedGraph(chromCov, chromSize, chrom);
}
else { else {
int depth = 0; // initialize the depth int depth = 0; // initialize the depth
...@@ -232,6 +236,46 @@ void BedCoverage::ReportGenomeCoverage(map<string, int> &chromSizes, chromHistMa ...@@ -232,6 +236,46 @@ void BedCoverage::ReportGenomeCoverage(map<string, int> &chromSizes, chromHistMa
} }
void BedCoverage::ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, int &chromSize, string &chrom) {
int depth = 0; // initialize the depth
int lastStart = -1 ;
int lastDepth = -1 ;
for (int pos = 0; pos < chromSize; pos++) {
depth += chromCov[pos].starts;
if (depth == 0 && lastDepth != -1) {
// We've found a new block of zero coverage, so report
// the previous block of non-zero coverage.
cout << chrom << "\t" << lastStart << "\t" << pos << "\t" << lastDepth << endl;
lastDepth = -1;
lastStart = -1;
}
else if (depth > 0 && depth != lastDepth) {
// Coverage depth has changed, print the last interval coverage (if any)
if (lastDepth != -1) {
cout << chrom << "\t" << lastStart << "\t" << pos << "\t" << lastDepth << endl;
}
//Set current position as the new interval start + depth
lastDepth = depth;
lastStart = pos;
}
// Default: the depth has not changed, so we will not print anything.
// Proceed until the depth changes.
// Update depth
depth = depth - chromCov[pos].ends;
}
//Print information about the last position
if (lastDepth != -1) {
cout << chrom << "\t" << lastStart << "\t" << chromSize << "\t" << lastDepth << endl;
}
}
void BedCoverage::DetermineBedInput() { void BedCoverage::DetermineBedInput() {
if (bed->bedFile != "stdin") { // process a file if (bed->bedFile != "stdin") { // process a file
ifstream beds(bed->bedFile.c_str(), ios::in); ifstream beds(bed->bedFile.c_str(), ios::in);
......
...@@ -34,14 +34,18 @@ class BedCoverage { ...@@ -34,14 +34,18 @@ class BedCoverage {
public: public:
// constructor // constructor
BedCoverage(string &, string &, bool &, bool &, int &); BedCoverage(string &bedFile, string &genomeFile, bool &eachBase, bool &startSites, bool &bedGraph, int &max);
// destructor // destructor
~BedCoverage(void); ~BedCoverage(void);
void CoverageBeds(istream &bedInput); void CoverageBeds(istream &bedInput);
void ReportChromCoverage(vector<DEPTH> &, int &, string &, chromHistMap&);
void ReportGenomeCoverage(map<string, int> &, chromHistMap&); void ReportChromCoverage(const vector<DEPTH> &, int &chromSize, string &chrom, chromHistMap&);
void ReportGenomeCoverage(map<string, int> &chromSizes, chromHistMap &chromDepthHist);
void ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, int &chromSize, string &chrom);
void DetermineBedInput(); void DetermineBedInput();
...@@ -51,10 +55,10 @@ private: ...@@ -51,10 +55,10 @@ private:
string genomeFile; string genomeFile;
bool eachBase; bool eachBase;
bool startSites; bool startSites;
bool bedGraph;
int max; int max;
// The BED file from which to compute coverage. // The BED file from which to compute coverage.
BedFile *bed; BedFile *bed;
chromDepthMap chromCov; chromDepthMap chromCov;
}; };
...@@ -34,10 +34,11 @@ int main(int argc, char* argv[]) { ...@@ -34,10 +34,11 @@ int main(int argc, char* argv[]) {
string genomeFile; string genomeFile;
int max = 999999999; int max = 999999999;
bool haveBed = false; bool haveBed = false;
bool haveGenome = false; bool haveGenome = false;
bool startSites = false; bool startSites = false;
bool eachBase = false; bool bedGraph = false;
bool eachBase = false;
// check to see if we should print out some help // check to see if we should print out some help
if(argc <= 1) showHelp = true; if(argc <= 1) showHelp = true;
...@@ -75,6 +76,9 @@ int main(int argc, char* argv[]) { ...@@ -75,6 +76,9 @@ int main(int argc, char* argv[]) {
else if(PARAMETER_CHECK("-d", 2, parameterLength)) { else if(PARAMETER_CHECK("-d", 2, parameterLength)) {
eachBase = true; eachBase = true;
} }
else if(PARAMETER_CHECK("-bg", 3, parameterLength)) {
bedGraph = true;
}
else if(PARAMETER_CHECK("-max", 4, parameterLength)) { else if(PARAMETER_CHECK("-max", 4, parameterLength)) {
if ((i+1) < argc) { if ((i+1) < argc) {
max = atoi(argv[i + 1]); max = atoi(argv[i + 1]);
...@@ -92,10 +96,14 @@ int main(int argc, char* argv[]) { ...@@ -92,10 +96,14 @@ int main(int argc, char* argv[]) {
cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl; cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl;
showHelp = true; showHelp = true;
} }
if (bedGraph && eachBase) {
cerr << endl << "*****" << endl << "*****ERROR: Use -d or -bedgraph, not both" << endl << "*****" << endl;
showHelp = true;
}
if (!showHelp) { if (!showHelp) {
BedCoverage *bc = new BedCoverage(bedFile, genomeFile, eachBase, startSites, max); BedCoverage *bc = new BedCoverage(bedFile, genomeFile, eachBase, startSites, bedGraph, max);
bc->DetermineBedInput(); bc->DetermineBedInput();
return 0; return 0;
...@@ -119,8 +127,12 @@ void ShowHelp(void) { ...@@ -119,8 +127,12 @@ void ShowHelp(void) {
cerr << "\t-d\t" << "Report the depth at each genome position." << endl; cerr << "\t-d\t" << "Report the depth at each genome position." << endl;
cerr << "\t\tDefault behavior is to report a histogram." << endl << endl; cerr << "\t\tDefault behavior is to report a histogram." << endl << endl;
cerr << "\t-bg\t" << "Report depth in BedGraph format. For details, see:" << endl;
cerr << "\t\tgenome.ucsc.edu/goldenPath/help/bedgraph.html" << endl << endl;
cerr << "\t-max\t" << "Combine all positions with a depth >= max into" << endl; cerr << "\t-max\t" << "Combine all positions with a depth >= max into" << endl;
cerr << "\t\ta single bin in the histogram." << endl; cerr << "\t\ta single bin in the histogram. Irrelevant" << endl;
cerr << "\t\tfor -d and -bedGraph" << endl;
cerr << "\t\t- (INTEGER)" << endl << endl; cerr << "\t\t- (INTEGER)" << endl << endl;
cerr << "Notes: " << endl; cerr << "Notes: " << endl;
...@@ -140,7 +152,7 @@ void ShowHelp(void) { ...@@ -140,7 +152,7 @@ void ShowHelp(void) {
cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl; cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl;
cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl; cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl;
cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e /" << endl; cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e /" << endl;
cerr << "\t\"select chrom, size from hg19.chromInfo\" > hg19.genome" << endl << endl; cerr << "\t\"select chrom, size from hg19.chromInfo\" | awk 'NR>1' > hg19.genome" << endl << endl;
// end the program here // end the program here
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment