Commit 0e3768eb authored by Aaron's avatar Aaron
Browse files

Allow input files to have consecutive tabs (i.e., empty fields)

parent cb4efc54
......@@ -50,8 +50,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)/windowBed \
$(SRC_DIR)/windowMaker
UTIL_SUBDIRS = $(SRC_DIR)/utils/lineFileUtilities \
$(SRC_DIR)/utils/bedFile \
UTIL_SUBDIRS = $(SRC_DIR)/utils/bedFile \
$(SRC_DIR)/utils/bedGraphFile \
$(SRC_DIR)/utils/chromsweep \
$(SRC_DIR)/utils/gzstream \
......
......@@ -254,8 +254,8 @@ void ConvertBedToBam(const BED &bed, BamAlignment &bam, map<string, int, std::le
unsigned int blockCount = atoi(bed.fields[9].c_str());
vector<int> blockSizes, blockStarts;
Tokenize(bed.fields[10], blockSizes, ",");
Tokenize(bed.fields[11], blockStarts, ",");
Tokenize(bed.fields[10], blockSizes, ',');
Tokenize(bed.fields[11], blockStarts, ',');
// make sure this is a well-formed BED12 entry.
if (blockSizes.size() != blockCount) {
......
......@@ -88,7 +88,7 @@ int getoverlap_main(int argc, char* argv[]) {
// Split the column string sent by the user into discrete column numbers
// A comma separated string is expected.
vector<string> posColumns;
Tokenize(columns, posColumns, ",");
Tokenize(columns, posColumns, ',');
if (posColumns.size() != 4) {
cerr << endl << "*****" << endl << "*****ERROR: Please specify 4, comma-separated position columns. " << endl << "*****" << endl;
......
......@@ -161,7 +161,7 @@ int groupby_main(int argc, char* argv[]) {
// split the opsString into discrete operations and make sure they are all valid.
vector<string> ops;
opsString.erase(remove_if(opsString.begin(),opsString.end(),::isspace),opsString.end());
Tokenize(opsString, ops, ",");
Tokenize(opsString, ops, ',');
for( size_t i = 0; i < ops.size(); i++ ) {
if ((ops[i] != "sum") && (ops[i] != "max") && (ops[i] != "min") && (ops[i] != "mean") &&
(ops[i] != "mode") && (ops[i] != "median") && (ops[i] != "antimode") && (ops[i] != "stdev") &&
......@@ -177,10 +177,10 @@ int groupby_main(int argc, char* argv[]) {
// Split the column string sent by the user into discrete column numbers
// A comma separated string is expected.
vector<int> groupColumnsInt;
Tokenize(groupColumnsString, groupColumnsInt, ",");
Tokenize(groupColumnsString, groupColumnsInt, ',');
vector<int> opColumnsInt;
Tokenize(opsColumnString, opColumnsInt, ",");
Tokenize(opsColumnString, opColumnsInt, ',');
// sanity check the group columns
for(size_t i = 0; i < groupColumnsInt.size(); ++i) {
......
......@@ -91,8 +91,8 @@ void GetBedBlocks(const BED &bed, bedVector &bedBlocks) {
vector<int> sizes;
vector<int> starts;
Tokenize(blockSizes, sizes, ",");
Tokenize(blockStarts, starts, ",");
Tokenize(blockSizes, sizes, ',');
Tokenize(blockStarts, starts, ',');
if ( sizes.size() != (size_t) blockCount || starts.size() != (size_t) blockCount ) {
cerr << "Input error: found interval with block-counts not matching starts/sizes on line." << endl;
......
OBJ_DIR = ../../../obj/
BIN_DIR = ../../../bin/
UTILITIES_DIR = ../../utils/
# -------------------
# define our includes
# -------------------
INCLUDES =
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= lineFileUtilities.cpp lineFileUtilities.h
OBJECTS= $(SOURCES:.cpp=.o)
_EXT_OBJECTS=
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
$(BUILT_OBJECTS): $(SOURCES)
@echo " * compiling" $(*F).cpp
@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
$(EXT_OBJECTS):
@$(MAKE) --no-print-directory -C $(INCLUDES)
clean:
@echo "Cleaning up."
@rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
.PHONY: clean
\ No newline at end of file
//
// lineFileUtilities.cpp
// BEDTools
//
// Created by Aaron Quinlan Spring 2009.
// Copyright 2009 Aaron Quinlan. All rights reserved.
//
// Summary: Contains common functions for processing text files.
//
#include <sstream>
#include <iostream>
#include "lineFileUtilities.h"
......@@ -6,6 +6,7 @@
#include <cstring>
#include <cstdlib>
#include <sstream>
#include <iostream>
using namespace std;
......@@ -20,31 +21,28 @@ std::string ToString(const T & value) {
// tokenize into a list of strings.
inline
void Tokenize(const string &str, vector<string> &elems, const string &delimiter = "\t")
void Tokenize(const string &str, vector<string> &elems, char delimiter = '\t')
{
char* tok;
char cchars [str.size()+1];
char* cstr = &cchars[0];
strcpy(cstr, str.c_str());
tok = strtok(cstr, delimiter.c_str());
while (tok != NULL) {
elems.push_back(tok);
tok = strtok(NULL, delimiter.c_str());
// http://stackoverflow.com/questions/236129/how-to-split-a-string-in-c/236803#236803
// NOTE: this approach intentionally allows consecutive delimiters
std::stringstream ss(str);
std::string item;
while(getline(ss, item, delimiter)) {
elems.push_back(item);
}
}
// tokenize into a list of integers
inline
void Tokenize(const string &str, vector<int> &elems, const string &delimiter = "\t")
void Tokenize(const string &str, vector<int> &elems, char delimiter = '\t')
{
char* tok;
char cchars [str.size()+1];
char* cstr = &cchars[0];
strcpy(cstr, str.c_str());
tok = strtok(cstr, delimiter.c_str());
while (tok != NULL) {
elems.push_back(atoi(tok));
tok = strtok(NULL, delimiter.c_str());
// http://stackoverflow.com/questions/236129/how-to-split-a-string-in-c/236803#236803
// NOTE: this approach intentionally allows consecutive delimiters
std::stringstream ss(str);
std::string item;
while(getline(ss, item, delimiter)) {
elems.push_back(atoi(item.c_str()));
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment