15% speedup for the tokenizer.

59d77134 · Aaron · 45952e64 · 59d77134
Commit 59d77134 authored 13 years ago by Aaron
--- a/src/utils/lineFileUtilities/lineFileUtilities.h
+++ b/src/utils/lineFileUtilities/lineFileUtilities.h
@@ -3,15 +3,11 @@
 #include <vector>
 #include <string>
-#include <algorithm>
+#include <cstring>
 #include <sstream>
 using namespace std;
-// split a line from a file into a vector of strings.  token = "\t"
-//void Tokenize(const string &str, vector<string>& tokens, const string &delimiter = "\t");
-//void Tokenize(const string &str, vector<int>& tokens,    const string &delimiter = "\t");
 // templated function to convert objects to strings
 template <typename T>
 inline
@@ -21,37 +17,33 @@ std::string ToString(const T & value) {
    return ss.str();
 }
+// tokenize into a list of strings.
 inline
-void Tokenize(const string &str, vector<string> &tokens, const string &delimiter = "\t") {
+void Tokenize(const string &str, vector<string> &elems, const string &delimiter = "\t") 
-    // Skip delimiters at beginning.
+{
-    string::size_type lastPos = str.find_first_not_of(delimiter, 0);
+    char* tok;
-    // Find first "non-delimiter".
+    char cchars [str.size()+1];
-    string::size_type pos     = str.find_first_of(delimiter, lastPos);
+    char* cstr = &cchars[0];
+    strcpy(cstr, str.c_str());
-    while (string::npos != pos || string::npos != lastPos) {
+    tok = strtok(cstr, delimiter.c_str());
-        // Found a token, add it to the vector.
+    while (tok != NULL) {
-        tokens.push_back(str.substr(lastPos, pos - lastPos));
+        elems.push_back(tok);
-        // Skip delimiters.  Note the "not_of"
+        tok = strtok(NULL, delimiter.c_str());
-        lastPos = str.find_first_not_of(delimiter, pos);
-        // Find next "non-delimiter"
-        pos = str.find_first_of(delimiter, lastPos);
    }
 }
+// tokenize into a list of integers
 inline
-void Tokenize(const string &str, vector<int> &tokens, const string &delimiter = "\t") {
+void Tokenize(const string &str, vector<int> &elems, const string &delimiter = "\t") 
-    // Skip delimiters at beginning.
+{
-    string::size_type lastPos = str.find_first_not_of(delimiter, 0);
+    char* tok;
-    // Find first "non-delimiter".
+    char cchars [str.size()+1];
-    string::size_type pos     = str.find_first_of(delimiter, lastPos);
+    char* cstr = &cchars[0];
+    strcpy(cstr, str.c_str());
-    while (string::npos != pos || string::npos != lastPos) {
+    tok = strtok(cstr, delimiter.c_str());
-        // Found a token, add it to the vector.
+    while (tok != NULL) {
-        tokens.push_back(atoi(str.substr(lastPos, pos - lastPos).c_str()));
+        elems.push_back(atoi(tok));
-        // Skip delimiters.  Note the "not_of"
+        tok = strtok(NULL, delimiter.c_str());
-        lastPos = str.find_first_not_of(delimiter, pos);
-        // Find next "non-delimiter"
-        pos = str.find_first_of(delimiter, lastPos);
    }
 }