From 96e2ac601c1392b6d62d1ff2c0978e6860ff7f2d Mon Sep 17 00:00:00 2001
From: Aaron <aaronquinlan@gmail.com>
Date: Thu, 3 Mar 2011 10:57:34 -0500
Subject: [PATCH] Optionally set masking character (-mc) in maskFastaFromBed. 
 Thanks to Can Alkan.

---
 src/maskFastaFromBed/maskFastaFromBed.cpp     | 24 ++++++++----------
 src/maskFastaFromBed/maskFastaFromBed.h       |  4 ++-
 src/maskFastaFromBed/maskFastaFromBedMain.cpp | 25 +++++++++++++++----
 3 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/src/maskFastaFromBed/maskFastaFromBed.cpp b/src/maskFastaFromBed/maskFastaFromBed.cpp
index 654ff6db..2bab1226 100644
--- a/src/maskFastaFromBed/maskFastaFromBed.cpp
+++ b/src/maskFastaFromBed/maskFastaFromBed.cpp
@@ -13,21 +13,17 @@
 #include "maskFastaFromBed.h"
 
 
-MaskFastaFromBed::MaskFastaFromBed(string &fastaInFile, string &bedFile, string &fastaOutFile, bool &softMask) {
-
-    _softMask = false;
-    if (softMask) {
-        _softMask = true;
-    }
-
-    _fastaInFile = fastaInFile;
-    _bedFile = bedFile;
+MaskFastaFromBed::MaskFastaFromBed(const string &fastaInFile,  const string &bedFile, 
+                                   const string &fastaOutFile, bool softMask, char maskChar) {
+    _softMask     = softMask;
+    _fastaInFile  = fastaInFile;
+    _bedFile      = bedFile;
     _fastaOutFile = fastaOutFile;
-
-    _bed = new BedFile(_bedFile);
+    _maskChar     = maskChar;
+    _bed          = new BedFile(_bedFile);
 
     _bed->loadBedFileIntoMapNoBin();
-
+    // start masking.
     MaskFasta();
 }
 
@@ -102,7 +98,7 @@ void MaskFastaFromBed::MaskFasta() {
                         currDNA.replace(start, length, replacement);
                     }
                     else {
-                        string hardmask(length, 'N');
+                        string hardmask(length, _maskChar);
                         currDNA.replace(start, length, hardmask);
                     }
                 }
@@ -133,7 +129,7 @@ void MaskFastaFromBed::MaskFasta() {
                 currDNA.replace(start, length, replacement);
             }
             else {
-                string hardmask(length, 'N');
+                string hardmask(length, _maskChar);
                 currDNA.replace(start, length, hardmask);
             }
         }
diff --git a/src/maskFastaFromBed/maskFastaFromBed.h b/src/maskFastaFromBed/maskFastaFromBed.h
index 1b7a3590..4cf68c84 100644
--- a/src/maskFastaFromBed/maskFastaFromBed.h
+++ b/src/maskFastaFromBed/maskFastaFromBed.h
@@ -28,7 +28,8 @@ class MaskFastaFromBed {
 public:
 
     // constructor
-    MaskFastaFromBed(string &fastaInFile, string &bedFile, string &fastaOutFile, bool &softMask);
+    MaskFastaFromBed(const string &fastaInFile,  const string &bedFile, 
+                     const string &fastaOutFile, bool softMask, char maskChar);
 
     // destructor
     ~MaskFastaFromBed(void);
@@ -41,6 +42,7 @@ private:
     string _fastaInFile;
     string _bedFile;
     string _fastaOutFile;
+    char   _maskChar;     // typically "N", but user's can choose something else, e.g., "X"
 
     // instance of a bed file class.
     BedFile *_bed;
diff --git a/src/maskFastaFromBed/maskFastaFromBedMain.cpp b/src/maskFastaFromBed/maskFastaFromBedMain.cpp
index 0978436d..7fce56bb 100644
--- a/src/maskFastaFromBed/maskFastaFromBedMain.cpp
+++ b/src/maskFastaFromBed/maskFastaFromBedMain.cpp
@@ -36,11 +36,12 @@ int main(int argc, char* argv[]) {
     // output files
     string fastaOutFile;
 
-    // checks for existence of parameters
-    bool haveFastaIn = false;
-    bool haveBed = false;
+    // defaults for parameters
+    bool haveFastaIn  = false;
+    bool haveBed      = false;
     bool haveFastaOut = false;
-    bool softMask = false;
+    bool softMask     = false;
+    char maskChar     = 'N';
 
     // check to see if we should print out some help
     if(argc <= 1) showHelp = true;
@@ -85,6 +86,19 @@ int main(int argc, char* argv[]) {
         else if(PARAMETER_CHECK("-soft", 5, parameterLength)) {
             softMask = true;
         }
+        else if(PARAMETER_CHECK("-mc", 3, parameterLength)) {
+            if ((i+1) < argc) {
+                string mask = argv[i + 1];
+                if (mask.size() > 1) {
+                    cerr << "*****ERROR: The mask character (-mc) should be a single character.*****" << endl << endl;
+                    showHelp = true;
+                }
+                else {
+                    maskChar = mask[0];
+                }
+                i++;
+            }
+        }
         else {
             cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
             showHelp = true;
@@ -97,7 +111,7 @@ int main(int argc, char* argv[]) {
 
     if (!showHelp) {
 
-        MaskFastaFromBed *maskFasta = new MaskFastaFromBed(fastaInFile, bedFile, fastaOutFile, softMask);
+        MaskFastaFromBed *maskFasta = new MaskFastaFromBed(fastaInFile, bedFile, fastaOutFile, softMask, maskChar);
         delete maskFasta;
         return 0;
     }
@@ -124,6 +138,7 @@ void ShowHelp(void) {
     cerr << "\t-fo\tOutput FASTA file" << endl;
     cerr << "\t-soft\tEnforce \"soft\" masking.  That is, instead of masking with Ns," << endl;
     cerr << "\t\tmask with lower-case bases." << endl;
+    cerr << "\t-mc\tReplace masking character.  That is, instead of masking with Ns, use another character." << endl;
 
     // end the program here
     exit(1);
-- 
GitLab