From f82d2d225ac99287a5be11dd7f1e5447c43087fe Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Thu, 17 Nov 2011 15:53:00 -0500 Subject: [PATCH] Added intervals option to tagBam. --- src/tagBam/tagBam.cpp | 36 +++++++++++++++++++++++++++--------- src/tagBam/tagBam.h | 3 ++- src/tagBam/tagBamMain.cpp | 30 ++++++++++++++++++++++++++---- 3 files changed, 55 insertions(+), 14 deletions(-) diff --git a/src/tagBam/tagBam.cpp b/src/tagBam/tagBam.cpp index f8d1d7ea..d7474e04 100644 --- a/src/tagBam/tagBam.cpp +++ b/src/tagBam/tagBam.cpp @@ -15,7 +15,8 @@ // build TagBam::TagBam(const string &bamFile, const vector<string> &annoFileNames, const vector<string> &annoLables, const string &tag, - bool useNames, bool useScores, bool sameStrand, bool diffStrand, float overlapFraction): + bool useNames, bool useScores, bool useIntervals, + bool sameStrand, bool diffStrand, float overlapFraction): _bamFile(bamFile), _annoFileNames(annoFileNames), @@ -23,6 +24,7 @@ TagBam::TagBam(const string &bamFile, const vector<string> &annoFileNames, _tag(tag), _useNames(useNames), _useScores(useScores), + _useIntervals(useIntervals), _sameStrand(sameStrand), _diffStrand(diffStrand), _overlapFraction(overlapFraction) @@ -94,7 +96,7 @@ void TagBam::Tag() { // grab the current annotation file. BedFile *anno = _annoFiles[i]; - if (!_useNames && !_useScores) { + if (!_useNames && !_useScores && !_useIntervals) { // add the label for this annotation file to tag if there is overlap if (anno->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _sameStrand, _diffStrand, _overlapFraction)) { @@ -102,7 +104,7 @@ void TagBam::Tag() { } } // use the score field - else if (!_useNames && _useScores) { + else if (!_useNames && _useScores && !_useIntervals) { anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand); for (size_t i = 0; i < hits.size(); ++i) { annotations << hits[i].score; @@ -112,11 +114,27 @@ void TagBam::Tag() { hits.clear(); } // use the name field from the annotation files to populate tag - else if (_useNames && !_useScores) { + else if (_useNames && !_useScores && !_useIntervals) { anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand); - for (size_t i = 0; i < hits.size(); ++i) { - annotations << hits[i].name; - if (i < hits.size() - 1) annotations << ","; + for (size_t j = 0; j < hits.size(); ++i) { + annotations << hits[j].name; + if (j < hits.size() - 1) annotations << ","; + } + if (hits.size() > 0) annotations << ";"; + hits.clear(); + } + // use the name field from the annotation files to populate tag + else if (!_useNames && !_useScores && _useIntervals) { + anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand); + for (size_t j = 0; j < hits.size(); ++j) { + annotations << _annoLabels[i] << ":" << + hits[j].chrom << ":" << + hits[j].start << "-" << + hits[j].end << "," << + hits[j].name << "," << + hits[j].score << "," << + hits[j].strand; + if (j < hits.size() - 1) annotations << ","; } if (hits.size() > 0) annotations << ";"; hits.clear(); @@ -126,11 +144,11 @@ void TagBam::Tag() { if (annotations.str().size() > 0) { al.AddTag(_tag, "Z", annotations.str().substr(0, annotations.str().size() - 1)); // get rid of the last ";" } - writer.SaveAlignment(al); } + writer.SaveAlignment(al); } reader.Close(); - + writer.Close(); // close the annotations files; CloseAnnoFiles(); } diff --git a/src/tagBam/tagBam.h b/src/tagBam/tagBam.h index 689dd0ac..082f38b7 100644 --- a/src/tagBam/tagBam.h +++ b/src/tagBam/tagBam.h @@ -41,7 +41,7 @@ public: // constructor TagBam(const string &bamFile, const vector<string> &annoFileNames, const vector<string> &annoLabels, const string &tag, - bool useNames, bool useScores, bool sameStrand, + bool useNames, bool useScores, bool useIntervals, bool sameStrand, bool diffStrand, float overlapFraction); // destructor @@ -66,6 +66,7 @@ private: // should we use the name field from the annotation files? bool _useNames; bool _useScores; + bool _useIntervals; // do we care about strandedness when tagging? bool _sameStrand; diff --git a/src/tagBam/tagBamMain.cpp b/src/tagBam/tagBamMain.cpp index 6bef5734..b4c750dc 100644 --- a/src/tagBam/tagBamMain.cpp +++ b/src/tagBam/tagBamMain.cpp @@ -38,6 +38,7 @@ int main(int argc, char* argv[]) { bool haveFraction = false; bool useNames = false; bool useScores = false; + bool useIntervals = false; bool sameStrand = false; bool diffStrand = false; bool haveBam = false; @@ -109,6 +110,9 @@ int main(int argc, char* argv[]) { else if (PARAMETER_CHECK("-scores", 7, parameterLength)) { useScores = true; } + else if (PARAMETER_CHECK("-intervals", 10, parameterLength)) { + useIntervals = true; + } else if (PARAMETER_CHECK("-s", 2, parameterLength)) { sameStrand = true; } @@ -156,13 +160,29 @@ int main(int argc, char* argv[]) { cerr << endl << "*****" << endl << "*****ERROR: Use -scores or -names, not both. " << endl << "*****" << endl; showHelp = true; } + if (useScores && useIntervals) { + cerr << endl << "*****" << endl << "*****ERROR: Use -scores or -intervals, not both. " << endl << "*****" << endl; + showHelp = true; + } + if (useNames && useIntervals) { + cerr << endl << "*****" << endl << "*****ERROR: Use -names or -intervals, not both. " << endl << "*****" << endl; + showHelp = true; + } + if (!haveLabels && useIntervals) { + cerr << endl << "*****" << endl << "*****ERROR: Supply -labels when using -intervals. " << endl << "*****" << endl; + showHelp = true; + } if (haveTag && tag.size() > 2) { cerr << endl << "*****" << endl << "*****ERROR: Custom tags should be at most two characters per the SAM specification. " << endl << "*****" << endl; showHelp = true; } + if (!showHelp) { - TagBam *ba = new TagBam(bamFile, inputFiles, inputLabels, tag, useNames, useScores, sameStrand, diffStrand, overlapFraction); + TagBam *ba = new TagBam(bamFile, inputFiles, inputLabels, + tag, useNames, useScores, + useIntervals, sameStrand, diffStrand, + overlapFraction); ba->Tag(); delete ba; return 0; @@ -201,9 +221,11 @@ void ShowHelp(void) { cerr << "\t-names\t" << "Use the name field from the annotation files to populate tags." << endl; cerr << "\t\tBy default, the -labels values are used." << endl << endl; - cerr << "\t-scores\t" << "A list of 1-based columns for each annotation file" << endl; - cerr << "\t\tin which a color can be found." << endl << endl; - + cerr << "\t-scores\t" << "Use the score field from the annotation files to populate tags." << endl; + cerr << "\t\tBy default, the -labels values are used." << endl << endl; + + cerr << "\t-intervals\t" << "Use the full interval (including name, score, and strand) to populate tags." << endl; + cerr << "\t\t\tRequires the -labels option to identify from which file the interval came." << endl << endl; exit(1); } -- GitLab