Commit 0e3768eb authored by Aaron's avatar Aaron
Browse files

Allow input files to have consecutive tabs (i.e., empty fields)

parent cb4efc54
...@@ -50,8 +50,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \ ...@@ -50,8 +50,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)/windowBed \ $(SRC_DIR)/windowBed \
$(SRC_DIR)/windowMaker $(SRC_DIR)/windowMaker
UTIL_SUBDIRS = $(SRC_DIR)/utils/lineFileUtilities \ UTIL_SUBDIRS = $(SRC_DIR)/utils/bedFile \
$(SRC_DIR)/utils/bedFile \
$(SRC_DIR)/utils/bedGraphFile \ $(SRC_DIR)/utils/bedGraphFile \
$(SRC_DIR)/utils/chromsweep \ $(SRC_DIR)/utils/chromsweep \
$(SRC_DIR)/utils/gzstream \ $(SRC_DIR)/utils/gzstream \
......
...@@ -254,8 +254,8 @@ void ConvertBedToBam(const BED &bed, BamAlignment &bam, map<string, int, std::le ...@@ -254,8 +254,8 @@ void ConvertBedToBam(const BED &bed, BamAlignment &bam, map<string, int, std::le
unsigned int blockCount = atoi(bed.fields[9].c_str()); unsigned int blockCount = atoi(bed.fields[9].c_str());
vector<int> blockSizes, blockStarts; vector<int> blockSizes, blockStarts;
Tokenize(bed.fields[10], blockSizes, ","); Tokenize(bed.fields[10], blockSizes, ',');
Tokenize(bed.fields[11], blockStarts, ","); Tokenize(bed.fields[11], blockStarts, ',');
// make sure this is a well-formed BED12 entry. // make sure this is a well-formed BED12 entry.
if (blockSizes.size() != blockCount) { if (blockSizes.size() != blockCount) {
......
...@@ -88,7 +88,7 @@ int getoverlap_main(int argc, char* argv[]) { ...@@ -88,7 +88,7 @@ int getoverlap_main(int argc, char* argv[]) {
// Split the column string sent by the user into discrete column numbers // Split the column string sent by the user into discrete column numbers
// A comma separated string is expected. // A comma separated string is expected.
vector<string> posColumns; vector<string> posColumns;
Tokenize(columns, posColumns, ","); Tokenize(columns, posColumns, ',');
if (posColumns.size() != 4) { if (posColumns.size() != 4) {
cerr << endl << "*****" << endl << "*****ERROR: Please specify 4, comma-separated position columns. " << endl << "*****" << endl; cerr << endl << "*****" << endl << "*****ERROR: Please specify 4, comma-separated position columns. " << endl << "*****" << endl;
......
...@@ -161,7 +161,7 @@ int groupby_main(int argc, char* argv[]) { ...@@ -161,7 +161,7 @@ int groupby_main(int argc, char* argv[]) {
// split the opsString into discrete operations and make sure they are all valid. // split the opsString into discrete operations and make sure they are all valid.
vector<string> ops; vector<string> ops;
opsString.erase(remove_if(opsString.begin(),opsString.end(),::isspace),opsString.end()); opsString.erase(remove_if(opsString.begin(),opsString.end(),::isspace),opsString.end());
Tokenize(opsString, ops, ","); Tokenize(opsString, ops, ',');
for( size_t i = 0; i < ops.size(); i++ ) { for( size_t i = 0; i < ops.size(); i++ ) {
if ((ops[i] != "sum") && (ops[i] != "max") && (ops[i] != "min") && (ops[i] != "mean") && if ((ops[i] != "sum") && (ops[i] != "max") && (ops[i] != "min") && (ops[i] != "mean") &&
(ops[i] != "mode") && (ops[i] != "median") && (ops[i] != "antimode") && (ops[i] != "stdev") && (ops[i] != "mode") && (ops[i] != "median") && (ops[i] != "antimode") && (ops[i] != "stdev") &&
...@@ -177,10 +177,10 @@ int groupby_main(int argc, char* argv[]) { ...@@ -177,10 +177,10 @@ int groupby_main(int argc, char* argv[]) {
// Split the column string sent by the user into discrete column numbers // Split the column string sent by the user into discrete column numbers
// A comma separated string is expected. // A comma separated string is expected.
vector<int> groupColumnsInt; vector<int> groupColumnsInt;
Tokenize(groupColumnsString, groupColumnsInt, ","); Tokenize(groupColumnsString, groupColumnsInt, ',');
vector<int> opColumnsInt; vector<int> opColumnsInt;
Tokenize(opsColumnString, opColumnsInt, ","); Tokenize(opsColumnString, opColumnsInt, ',');
// sanity check the group columns // sanity check the group columns
for(size_t i = 0; i < groupColumnsInt.size(); ++i) { for(size_t i = 0; i < groupColumnsInt.size(); ++i) {
......
...@@ -91,8 +91,8 @@ void GetBedBlocks(const BED &bed, bedVector &bedBlocks) { ...@@ -91,8 +91,8 @@ void GetBedBlocks(const BED &bed, bedVector &bedBlocks) {
vector<int> sizes; vector<int> sizes;
vector<int> starts; vector<int> starts;
Tokenize(blockSizes, sizes, ","); Tokenize(blockSizes, sizes, ',');
Tokenize(blockStarts, starts, ","); Tokenize(blockStarts, starts, ',');
if ( sizes.size() != (size_t) blockCount || starts.size() != (size_t) blockCount ) { if ( sizes.size() != (size_t) blockCount || starts.size() != (size_t) blockCount ) {
cerr << "Input error: found interval with block-counts not matching starts/sizes on line." << endl; cerr << "Input error: found interval with block-counts not matching starts/sizes on line." << endl;
......
OBJ_DIR = ../../../obj/
BIN_DIR = ../../../bin/
UTILITIES_DIR = ../../utils/
# -------------------
# define our includes
# -------------------
INCLUDES =
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= lineFileUtilities.cpp lineFileUtilities.h
OBJECTS= $(SOURCES:.cpp=.o)
_EXT_OBJECTS=
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
$(BUILT_OBJECTS): $(SOURCES)
@echo " * compiling" $(*F).cpp
@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
$(EXT_OBJECTS):
@$(MAKE) --no-print-directory -C $(INCLUDES)
clean:
@echo "Cleaning up."
@rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
.PHONY: clean
\ No newline at end of file
//
// lineFileUtilities.cpp
// BEDTools
//
// Created by Aaron Quinlan Spring 2009.
// Copyright 2009 Aaron Quinlan. All rights reserved.
//
// Summary: Contains common functions for processing text files.
//
#include <sstream>
#include <iostream>
#include "lineFileUtilities.h"
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <cstring> #include <cstring>
#include <cstdlib> #include <cstdlib>
#include <sstream> #include <sstream>
#include <iostream>
using namespace std; using namespace std;
...@@ -20,31 +21,28 @@ std::string ToString(const T & value) { ...@@ -20,31 +21,28 @@ std::string ToString(const T & value) {
// tokenize into a list of strings. // tokenize into a list of strings.
inline inline
void Tokenize(const string &str, vector<string> &elems, const string &delimiter = "\t") void Tokenize(const string &str, vector<string> &elems, char delimiter = '\t')
{ {
char* tok; // http://stackoverflow.com/questions/236129/how-to-split-a-string-in-c/236803#236803
char cchars [str.size()+1]; // NOTE: this approach intentionally allows consecutive delimiters
char* cstr = &cchars[0]; std::stringstream ss(str);
strcpy(cstr, str.c_str()); std::string item;
tok = strtok(cstr, delimiter.c_str()); while(getline(ss, item, delimiter)) {
while (tok != NULL) { elems.push_back(item);
elems.push_back(tok);
tok = strtok(NULL, delimiter.c_str());
} }
} }
// tokenize into a list of integers // tokenize into a list of integers
inline inline
void Tokenize(const string &str, vector<int> &elems, const string &delimiter = "\t") void Tokenize(const string &str, vector<int> &elems, char delimiter = '\t')
{ {
char* tok;
char cchars [str.size()+1]; // http://stackoverflow.com/questions/236129/how-to-split-a-string-in-c/236803#236803
char* cstr = &cchars[0]; // NOTE: this approach intentionally allows consecutive delimiters
strcpy(cstr, str.c_str()); std::stringstream ss(str);
tok = strtok(cstr, delimiter.c_str()); std::string item;
while (tok != NULL) { while(getline(ss, item, delimiter)) {
elems.push_back(atoi(tok)); elems.push_back(atoi(item.c_str()));
tok = strtok(NULL, delimiter.c_str());
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment