Skip to content
Snippets Groups Projects
Commit 59d77134 authored by Aaron's avatar Aaron
Browse files

15% speedup for the tokenizer.

parent 45952e64
No related branches found
No related tags found
No related merge requests found
...@@ -3,15 +3,11 @@ ...@@ -3,15 +3,11 @@
#include <vector> #include <vector>
#include <string> #include <string>
#include <algorithm> #include <cstring>
#include <sstream> #include <sstream>
using namespace std; using namespace std;
// split a line from a file into a vector of strings. token = "\t"
//void Tokenize(const string &str, vector<string>& tokens, const string &delimiter = "\t");
//void Tokenize(const string &str, vector<int>& tokens, const string &delimiter = "\t");
// templated function to convert objects to strings // templated function to convert objects to strings
template <typename T> template <typename T>
inline inline
...@@ -21,37 +17,33 @@ std::string ToString(const T & value) { ...@@ -21,37 +17,33 @@ std::string ToString(const T & value) {
return ss.str(); return ss.str();
} }
// tokenize into a list of strings.
inline inline
void Tokenize(const string &str, vector<string> &tokens, const string &delimiter = "\t") { void Tokenize(const string &str, vector<string> &elems, const string &delimiter = "\t")
// Skip delimiters at beginning. {
string::size_type lastPos = str.find_first_not_of(delimiter, 0); char* tok;
// Find first "non-delimiter". char cchars [str.size()+1];
string::size_type pos = str.find_first_of(delimiter, lastPos); char* cstr = &cchars[0];
strcpy(cstr, str.c_str());
while (string::npos != pos || string::npos != lastPos) { tok = strtok(cstr, delimiter.c_str());
// Found a token, add it to the vector. while (tok != NULL) {
tokens.push_back(str.substr(lastPos, pos - lastPos)); elems.push_back(tok);
// Skip delimiters. Note the "not_of" tok = strtok(NULL, delimiter.c_str());
lastPos = str.find_first_not_of(delimiter, pos);
// Find next "non-delimiter"
pos = str.find_first_of(delimiter, lastPos);
} }
} }
// tokenize into a list of integers
inline inline
void Tokenize(const string &str, vector<int> &tokens, const string &delimiter = "\t") { void Tokenize(const string &str, vector<int> &elems, const string &delimiter = "\t")
// Skip delimiters at beginning. {
string::size_type lastPos = str.find_first_not_of(delimiter, 0); char* tok;
// Find first "non-delimiter". char cchars [str.size()+1];
string::size_type pos = str.find_first_of(delimiter, lastPos); char* cstr = &cchars[0];
strcpy(cstr, str.c_str());
while (string::npos != pos || string::npos != lastPos) { tok = strtok(cstr, delimiter.c_str());
// Found a token, add it to the vector. while (tok != NULL) {
tokens.push_back(atoi(str.substr(lastPos, pos - lastPos).c_str())); elems.push_back(atoi(tok));
// Skip delimiters. Note the "not_of" tok = strtok(NULL, delimiter.c_str());
lastPos = str.find_first_not_of(delimiter, pos);
// Find next "non-delimiter"
pos = str.find_first_of(delimiter, lastPos);
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment