From f2433c5c78ac5eab63a871635de38cf69a582f5a Mon Sep 17 00:00:00 2001
From: Aaron <aaronquinlan@gmail.com>
Date: Mon, 5 Sep 2011 11:02:49 -0400
Subject: [PATCH] Added -s, -S, and -names to tagBam.

---
 src/tagBam/tagBam.cpp     | 33 +++++++++++++++++++++++----------
 src/tagBam/tagBam.h       | 11 +++++++----
 src/tagBam/tagBamMain.cpp | 39 +++++++++++++++++++++++++++++++++------
 3 files changed, 63 insertions(+), 20 deletions(-)

diff --git a/src/tagBam/tagBam.cpp b/src/tagBam/tagBam.cpp
index 56174357..d06cf9ed 100644
--- a/src/tagBam/tagBam.cpp
+++ b/src/tagBam/tagBam.cpp
@@ -15,13 +15,15 @@
 // build
 TagBam::TagBam(const string &bamFile, const vector<string> &annoFileNames,
             const vector<string> &annoLables, const string &tag,
-            bool forceStrand, float overlapFraction) :
+            bool useNames, bool sameStrand, bool diffStrand, float overlapFraction):
 
     _bamFile(bamFile),
     _annoFileNames(annoFileNames),
     _annoLabels(annoLables),
     _tag(tag),
-    _forceStrand(forceStrand),
+    _useNames(useNames),
+    _sameStrand(sameStrand),
+    _diffStrand(diffStrand),
     _overlapFraction(overlapFraction)
 {}
 
@@ -50,10 +52,6 @@ void TagBam::CloseAnnoFiles() {
     }
 }
 
-bool TagBam::FindOneOrMoreOverlap(const BED &a, BedFile *bedFile) {
-    return bedFile->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand,
-                                                _forceStrand, _overlapFraction);
-}
 
 void TagBam::Tag() {
 
@@ -77,14 +75,15 @@ void TagBam::Tag() {
 
     // rip through the BAM file and test for overlaps with each annotation file.
     BamAlignment al;
+    vector<BED> hits;
     while (reader.GetNextAlignment(al)) {
         if (al.IsMapped() == true) {
             BED a;
             a.chrom = refs.at(al.RefID).RefName;
             a.start = al.Position;
             a.end   = al.GetEndPosition(false, false);
-            if (al.IsReverseStrand()) a.strand = "-";
             a.strand = "+";
+            if (al.IsReverseStrand()) a.strand = "-";
             
             ostringstream annotations;
             // annotate the BAM file based on overlaps with the annotation files.
@@ -92,9 +91,23 @@ void TagBam::Tag() {
             {
                 // grab the current annotation file.
                 BedFile *anno = _annoFiles[i];
-                // add the label for this annotation file to tag if there is overlap
-                if (FindOneOrMoreOverlap(a, anno)) {
-                    annotations << _annoLabels[i] << ";";
+                
+                if (!_useNames) {
+                    // add the label for this annotation file to tag if there is overlap
+                    if (anno->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _sameStrand, _diffStrand, _overlapFraction)) 
+                    {
+                        annotations << _annoLabels[i] << ";";
+                    }
+                }
+                // use the name field from the annotation files to populate tag
+                else {
+                    anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand);
+                    for (size_t i = 0; i < hits.size(); ++i) {
+                        annotations << hits[i].name;
+                        if (i < hits.size() - 1) annotations << ",";
+                    }
+                    if (hits.size() > 0) annotations << ";";
+                    hits.clear();
                 }
             }
             // were there any overlaps with which to make a tag?
diff --git a/src/tagBam/tagBam.h b/src/tagBam/tagBam.h
index 821d0044..10ade9ad 100644
--- a/src/tagBam/tagBam.h
+++ b/src/tagBam/tagBam.h
@@ -41,7 +41,7 @@ public:
     // constructor
     TagBam(const string &bamFile, const vector<string> &annoFileNames,
                 const vector<string> &annoLabels, const string &tag,
-                bool forceStrand, float overlapFraction);
+                bool useNames, bool sameStrand, bool diffStrand, float overlapFraction);
 
     // destructor
     ~TagBam(void);
@@ -61,8 +61,12 @@ private:
     BedFile *_bed;
     vector<BedFile*> _annoFiles;
 
+    // should we use the name field from the annotation files?
+    bool _useNames;
+    
     // do we care about strandedness when tagging?
-    bool _forceStrand;
+    bool _sameStrand;
+    bool _diffStrand;
     float _overlapFraction;
 
     // private function for reporting coverage information
@@ -71,7 +75,6 @@ private:
     void OpenAnnoFiles();
 
     void CloseAnnoFiles();
-    
-    bool FindOneOrMoreOverlap(const BED &a, BedFile *bedFile);
+
 };
 #endif /* TAGBAM_H */
diff --git a/src/tagBam/tagBamMain.cpp b/src/tagBam/tagBamMain.cpp
index d2c1a5b5..acd9c70e 100644
--- a/src/tagBam/tagBamMain.cpp
+++ b/src/tagBam/tagBamMain.cpp
@@ -36,7 +36,9 @@ int main(int argc, char* argv[]) {
     // parm flags
     bool haveTag        = false;
     bool haveFraction   = false;
-    bool forceStrand    = false;
+    bool useNames       = false;
+    bool sameStrand     = false;
+    bool diffStrand     = false;
     bool haveBam        = false;
     bool haveFiles      = false;
     bool haveLabels     = false;
@@ -99,8 +101,14 @@ int main(int argc, char* argv[]) {
                 i--;
             }
         }
+        else if (PARAMETER_CHECK("-names", 6, parameterLength)) {
+            useNames = true;
+        }
         else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
-            forceStrand = true;
+            sameStrand = true;
+        }
+        else if (PARAMETER_CHECK("-S", 2, parameterLength)) {
+            diffStrand = true;
         }
         else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
             if ((i+1) < argc) {
@@ -123,8 +131,20 @@ int main(int argc, char* argv[]) {
     }
 
     // make sure we have both input files
-    if (!haveBam || !haveFiles || !haveLabels) {
-        cerr << endl << "*****" << endl << "*****ERROR: Need -i, -files, and -labels. " << endl << "*****" << endl;
+    if (!haveBam || !haveFiles) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -i, -files" << endl << "*****" << endl;
+        showHelp = true;
+    }
+    if (!useNames && !haveLabels) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -labels or -names" << endl << "*****" << endl;
+        showHelp = true;
+    }
+    if (sameStrand && diffStrand) {
+        cerr << endl << "*****" << endl << "*****ERROR: Use -s or -S, not both. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+    if (haveLabels && useNames) {
+        cerr << endl << "*****" << endl << "*****ERROR: Use -labels or -names, not both. " << endl << "*****" << endl;
         showHelp = true;
     }
     if (haveTag && tag.size() > 2) {
@@ -133,7 +153,7 @@ int main(int argc, char* argv[]) {
     }
 
     if (!showHelp) {
-        TagBam *ba = new TagBam(bamFile, inputFiles, inputLabels, tag, forceStrand, overlapFraction);
+        TagBam *ba = new TagBam(bamFile, inputFiles, inputLabels, tag, useNames, sameStrand, diffStrand, overlapFraction);
         ba->Tag();
         delete ba;
         return 0;
@@ -156,7 +176,10 @@ void ShowHelp(void) {
 
     cerr << "Options: " << endl;
 
-    cerr << "\t-s\t"            << "Force strandedness.  That is, only tag alignments that have the same" << endl;
+    cerr << "\t-s\t"            << "Require overlaps on the same strand.  That is, only tag alignments that have the same" << endl;
+    cerr                        << "\t\tstrand as a feature in the annotation file(s)." << endl << endl;
+
+    cerr << "\t-S\t"            << "Require overlaps on the opposite strand.  That is, only tag alignments that have the opposite" << endl;
     cerr                        << "\t\tstrand as a feature in the annotation file(s)." << endl << endl;
 
     cerr << "\t-f\t"            << "Minimum overlap required as a fraction of the alignment." << endl;
@@ -166,5 +189,9 @@ void ShowHelp(void) {
     cerr << "\t-tag\t"          << "Dictate what the tag should be. Default is YB." << endl;
     cerr                        << "\t\t- STRING (two characters, e.g., YK)" << endl << endl;
     
+    cerr << "\t-names\t"        << "Use the name field from the annotation files to populate tags." << endl;
+    cerr                        << "\t\tBy default, the -labels values are used." << endl << endl;
+    
+    
     exit(1);
 }
-- 
GitLab