From f2433c5c78ac5eab63a871635de38cf69a582f5a Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Mon, 5 Sep 2011 11:02:49 -0400 Subject: [PATCH] Added -s, -S, and -names to tagBam. --- src/tagBam/tagBam.cpp | 33 +++++++++++++++++++++++---------- src/tagBam/tagBam.h | 11 +++++++---- src/tagBam/tagBamMain.cpp | 39 +++++++++++++++++++++++++++++++++------ 3 files changed, 63 insertions(+), 20 deletions(-) diff --git a/src/tagBam/tagBam.cpp b/src/tagBam/tagBam.cpp index 56174357..d06cf9ed 100644 --- a/src/tagBam/tagBam.cpp +++ b/src/tagBam/tagBam.cpp @@ -15,13 +15,15 @@ // build TagBam::TagBam(const string &bamFile, const vector<string> &annoFileNames, const vector<string> &annoLables, const string &tag, - bool forceStrand, float overlapFraction) : + bool useNames, bool sameStrand, bool diffStrand, float overlapFraction): _bamFile(bamFile), _annoFileNames(annoFileNames), _annoLabels(annoLables), _tag(tag), - _forceStrand(forceStrand), + _useNames(useNames), + _sameStrand(sameStrand), + _diffStrand(diffStrand), _overlapFraction(overlapFraction) {} @@ -50,10 +52,6 @@ void TagBam::CloseAnnoFiles() { } } -bool TagBam::FindOneOrMoreOverlap(const BED &a, BedFile *bedFile) { - return bedFile->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, - _forceStrand, _overlapFraction); -} void TagBam::Tag() { @@ -77,14 +75,15 @@ void TagBam::Tag() { // rip through the BAM file and test for overlaps with each annotation file. BamAlignment al; + vector<BED> hits; while (reader.GetNextAlignment(al)) { if (al.IsMapped() == true) { BED a; a.chrom = refs.at(al.RefID).RefName; a.start = al.Position; a.end = al.GetEndPosition(false, false); - if (al.IsReverseStrand()) a.strand = "-"; a.strand = "+"; + if (al.IsReverseStrand()) a.strand = "-"; ostringstream annotations; // annotate the BAM file based on overlaps with the annotation files. @@ -92,9 +91,23 @@ void TagBam::Tag() { { // grab the current annotation file. BedFile *anno = _annoFiles[i]; - // add the label for this annotation file to tag if there is overlap - if (FindOneOrMoreOverlap(a, anno)) { - annotations << _annoLabels[i] << ";"; + + if (!_useNames) { + // add the label for this annotation file to tag if there is overlap + if (anno->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _sameStrand, _diffStrand, _overlapFraction)) + { + annotations << _annoLabels[i] << ";"; + } + } + // use the name field from the annotation files to populate tag + else { + anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand); + for (size_t i = 0; i < hits.size(); ++i) { + annotations << hits[i].name; + if (i < hits.size() - 1) annotations << ","; + } + if (hits.size() > 0) annotations << ";"; + hits.clear(); } } // were there any overlaps with which to make a tag? diff --git a/src/tagBam/tagBam.h b/src/tagBam/tagBam.h index 821d0044..10ade9ad 100644 --- a/src/tagBam/tagBam.h +++ b/src/tagBam/tagBam.h @@ -41,7 +41,7 @@ public: // constructor TagBam(const string &bamFile, const vector<string> &annoFileNames, const vector<string> &annoLabels, const string &tag, - bool forceStrand, float overlapFraction); + bool useNames, bool sameStrand, bool diffStrand, float overlapFraction); // destructor ~TagBam(void); @@ -61,8 +61,12 @@ private: BedFile *_bed; vector<BedFile*> _annoFiles; + // should we use the name field from the annotation files? + bool _useNames; + // do we care about strandedness when tagging? - bool _forceStrand; + bool _sameStrand; + bool _diffStrand; float _overlapFraction; // private function for reporting coverage information @@ -71,7 +75,6 @@ private: void OpenAnnoFiles(); void CloseAnnoFiles(); - - bool FindOneOrMoreOverlap(const BED &a, BedFile *bedFile); + }; #endif /* TAGBAM_H */ diff --git a/src/tagBam/tagBamMain.cpp b/src/tagBam/tagBamMain.cpp index d2c1a5b5..acd9c70e 100644 --- a/src/tagBam/tagBamMain.cpp +++ b/src/tagBam/tagBamMain.cpp @@ -36,7 +36,9 @@ int main(int argc, char* argv[]) { // parm flags bool haveTag = false; bool haveFraction = false; - bool forceStrand = false; + bool useNames = false; + bool sameStrand = false; + bool diffStrand = false; bool haveBam = false; bool haveFiles = false; bool haveLabels = false; @@ -99,8 +101,14 @@ int main(int argc, char* argv[]) { i--; } } + else if (PARAMETER_CHECK("-names", 6, parameterLength)) { + useNames = true; + } else if (PARAMETER_CHECK("-s", 2, parameterLength)) { - forceStrand = true; + sameStrand = true; + } + else if (PARAMETER_CHECK("-S", 2, parameterLength)) { + diffStrand = true; } else if(PARAMETER_CHECK("-f", 2, parameterLength)) { if ((i+1) < argc) { @@ -123,8 +131,20 @@ int main(int argc, char* argv[]) { } // make sure we have both input files - if (!haveBam || !haveFiles || !haveLabels) { - cerr << endl << "*****" << endl << "*****ERROR: Need -i, -files, and -labels. " << endl << "*****" << endl; + if (!haveBam || !haveFiles) { + cerr << endl << "*****" << endl << "*****ERROR: Need -i, -files" << endl << "*****" << endl; + showHelp = true; + } + if (!useNames && !haveLabels) { + cerr << endl << "*****" << endl << "*****ERROR: Need -labels or -names" << endl << "*****" << endl; + showHelp = true; + } + if (sameStrand && diffStrand) { + cerr << endl << "*****" << endl << "*****ERROR: Use -s or -S, not both. " << endl << "*****" << endl; + showHelp = true; + } + if (haveLabels && useNames) { + cerr << endl << "*****" << endl << "*****ERROR: Use -labels or -names, not both. " << endl << "*****" << endl; showHelp = true; } if (haveTag && tag.size() > 2) { @@ -133,7 +153,7 @@ int main(int argc, char* argv[]) { } if (!showHelp) { - TagBam *ba = new TagBam(bamFile, inputFiles, inputLabels, tag, forceStrand, overlapFraction); + TagBam *ba = new TagBam(bamFile, inputFiles, inputLabels, tag, useNames, sameStrand, diffStrand, overlapFraction); ba->Tag(); delete ba; return 0; @@ -156,7 +176,10 @@ void ShowHelp(void) { cerr << "Options: " << endl; - cerr << "\t-s\t" << "Force strandedness. That is, only tag alignments that have the same" << endl; + cerr << "\t-s\t" << "Require overlaps on the same strand. That is, only tag alignments that have the same" << endl; + cerr << "\t\tstrand as a feature in the annotation file(s)." << endl << endl; + + cerr << "\t-S\t" << "Require overlaps on the opposite strand. That is, only tag alignments that have the opposite" << endl; cerr << "\t\tstrand as a feature in the annotation file(s)." << endl << endl; cerr << "\t-f\t" << "Minimum overlap required as a fraction of the alignment." << endl; @@ -166,5 +189,9 @@ void ShowHelp(void) { cerr << "\t-tag\t" << "Dictate what the tag should be. Default is YB." << endl; cerr << "\t\t- STRING (two characters, e.g., YK)" << endl << endl; + cerr << "\t-names\t" << "Use the name field from the annotation files to populate tags." << endl; + cerr << "\t\tBy default, the -labels values are used." << endl << endl; + + exit(1); } -- GitLab