Commit dff512dc authored by Assaf Gordon's avatar Assaf Gordon
Browse files

WindowMaker: add output ID option.

Allows user to name each window based on window number, source interval name, or both.
parent df95985d
......@@ -11,11 +11,12 @@ Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "windowMaker.h"
WindowMaker::WindowMaker(string &fileName, INPUT_FILE_TYPE input_file_type, uint32_t size, uint32_t step)
WindowMaker::WindowMaker(string &fileName, ID_METHOD id_method, INPUT_FILE_TYPE input_file_type, uint32_t size, uint32_t step)
: _size(size)
, _step(step)
, _count(0)
, _window_method(FIXED_WINDOW_SIZE)
, _id_method(id_method)
{
if (input_file_type==GENOME_FILE)
MakeWindowsFromGenome(fileName);
......@@ -23,11 +24,12 @@ WindowMaker::WindowMaker(string &fileName, INPUT_FILE_TYPE input_file_type, uint
MakeWindowsFromBED(fileName);
}
WindowMaker::WindowMaker(string &fileName, INPUT_FILE_TYPE input_file_type, uint32_t count)
WindowMaker::WindowMaker(string &fileName, ID_METHOD id_method, INPUT_FILE_TYPE input_file_type, uint32_t count)
: _size(0)
, _step(0)
, _count(count)
, _window_method(FIXED_WINDOW_COUNT)
, _id_method(id_method)
{
if (input_file_type==GENOME_FILE)
MakeWindowsFromGenome(fileName);
......@@ -75,12 +77,14 @@ void WindowMaker::MakeBEDWindow(const BED& interval)
}
void WindowMaker::MakeFixedSizeWindow(const BED& interval) {
for (uint32_t start = interval.start; start <= interval.end; start += _step) {
uint32_t i=1;
for (uint32_t start = interval.start; start <= interval.end; start += _step, ++i) {
string name = GenerateID(interval,i);
if ((start + _size) <= interval.end) {
cout << interval.chrom << "\t" << start << "\t" << start + _size << endl;
cout << interval.chrom << "\t" << start << "\t" << start + _size << name << endl;
}
else if (start < interval.end) {
cout << interval.chrom << "\t" << start << "\t" << interval.end << endl;
cout << interval.chrom << "\t" << start << "\t" << interval.end << name << endl;
}
}
}
......@@ -91,8 +95,28 @@ void WindowMaker::MakeFixedCountWindow(const BED& interval) {
if (window_size==0 || interval_size==0)
return;
for (uint32_t start = interval.start; start <= interval.end; start += window_size) {
uint32_t i=1;
for (uint32_t start = interval.start; start <= interval.end; start += window_size, ++i) {
string name = GenerateID(interval,i);
uint32_t end = min(start + window_size,interval.end);
cout << interval.chrom << "\t" << start << "\t" << end << endl;
cout << interval.chrom << "\t" << start << "\t" << end << name << endl;
}
}
\ No newline at end of file
}
string WindowMaker::GenerateID(const BED& interval, uint32_t window_index) const {
stringstream s;
switch(_id_method) {
case ID_SOURCE_ID:
s << "\t" << interval.name;
break;
case ID_WINDOW_NUMBER:
s << "\t" << window_index;
break;
case ID_SOURCE_ID_WINDOW_NUMBER:
s << "\t" << interval.name << "_" << window_index;
default:
case ID_NONE:
break;
}
return s.str();
}
......@@ -29,10 +29,16 @@ public:
FIXED_WINDOW_SIZE,
FIXED_WINDOW_COUNT
};
enum ID_METHOD {
ID_NONE,
ID_WINDOW_NUMBER,
ID_SOURCE_ID,
ID_SOURCE_ID_WINDOW_NUMBER
};
// constructor
WindowMaker(string &fileName, INPUT_FILE_TYPE input_file_type, uint32_t count);
WindowMaker(string &fileName, INPUT_FILE_TYPE input_file_type, uint32_t size, uint32_t step);
WindowMaker(string &fileName, ID_METHOD id_method, INPUT_FILE_TYPE input_file_type, uint32_t count);
WindowMaker(string &fileName, ID_METHOD id_method, INPUT_FILE_TYPE input_file_type, uint32_t size, uint32_t step);
// destructor
~WindowMaker(void);
......@@ -45,9 +51,12 @@ private:
uint32_t _step;
uint32_t _count;
WINDOW_METHOD _window_method;
ID_METHOD _id_method;
void MakeBEDWindow(const BED& interval);
void MakeFixedSizeWindow(const BED& interval);
void MakeFixedCountWindow(const BED& interval);
};
\ No newline at end of file
string GenerateID(const BED& interval, uint32_t window_index) const;
};
......@@ -32,6 +32,7 @@ int windowmaker_main(int argc, char* argv[]) {
// input files
string inputFile;
WindowMaker::INPUT_FILE_TYPE inputFileType = WindowMaker::GENOME_FILE;
WindowMaker::ID_METHOD idMethod = WindowMaker::ID_NONE;
// parms
uint32_t size = 0;
......@@ -96,6 +97,21 @@ int windowmaker_main(int argc, char* argv[]) {
i++;
}
}
else if(PARAMETER_CHECK("-i", 2, parameterLength)) {
if ((i+1) < argc) {
if (strcmp(argv[i+1],"winnum")==0)
idMethod = WindowMaker::ID_WINDOW_NUMBER;
else if (strcmp(argv[i+1],"srcwinnum")==0)
idMethod = WindowMaker::ID_SOURCE_ID_WINDOW_NUMBER;
else if (strcmp(argv[i+1],"src")==0)
idMethod = WindowMaker::ID_SOURCE_ID;
else {
cerr << endl << "*****ERROR: Invalid ID method (" << argv[i+1] << "). Possible values are: winnum, srcwinnum" << endl << endl ;
showHelp = true;
}
i++;
}
}
else {
cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true;
......@@ -122,9 +138,9 @@ int windowmaker_main(int argc, char* argv[]) {
if (!showHelp) {
WindowMaker *wm = NULL;
if (haveCount)
wm = new WindowMaker(inputFile, inputFileType, count);
wm = new WindowMaker(inputFile, idMethod, inputFileType, count);
if (haveSize)
wm = new WindowMaker(inputFile, inputFileType, size, step);
wm = new WindowMaker(inputFile, idMethod, inputFileType, size, step);
delete wm;
}
else {
......@@ -169,6 +185,15 @@ void windowmaker_help(void) {
cerr << "\t\tto fixed number of windows (i.e. same number of windows, with" << endl;
cerr << "\t\tvarying window sizes)." << endl << endl;
cerr << "ID Naming Options: " << endl;
cerr << "\t-i src|winnum|srcwinnum" << endl;
cerr << "\t\tThe default output is 3 columns: chrom, start, end ." << endl;
cerr << "\t\tWith this option, a name column will be added." << endl;
cerr << "\t\t \"-i src\" - use the source interval's name." << endl;
cerr << "\t\t \"-i winnum\" - use the window number as the ID (e.g. 1,2,3,4...)." << endl;
cerr << "\t\t \"-i srcwinnum\" - use the source interval's name with the window number." << endl;
cerr << "\t\tSee below for usage examples." << endl << endl;
cerr << "Notes: " << endl;
cerr << "\t(1) The genome file should tab delimited and structured as follows:" << endl;
cerr << "\t <chromName><TAB><chromSize>" << endl << endl;
......@@ -229,6 +254,41 @@ void windowmaker_help(void) {
cerr << " ..." << endl;
cerr << endl;
cerr << " # Add a name column, based on the window number: "<< endl;
cerr << " $ cat input.bed" << endl;
cerr << " chr5 60000 70000 AAA" << endl;
cerr << " chr5 73000 90000 BBB" << endl;
cerr << " chr5 100000 101000 CCC" << endl;
cerr << " $ " << PROGRAM_NAME << " -b input.bed -n 3 -i winnum" << endl;
cerr << " chr5 60000 63334 1" << endl;
cerr << " chr5 63334 66668 2" << endl;
cerr << " chr5 66668 70000 3" << endl;
cerr << " chr5 73000 78667 1" << endl;
cerr << " chr5 78667 84334 2" << endl;
cerr << " chr5 84334 90000 3" << endl;
cerr << " chr5 100000 100334 1" << endl;
cerr << " chr5 100334 100668 2" << endl;
cerr << " chr5 100668 101000 3" << endl;
cerr << " ..." << endl;
cerr << endl;
cerr << " # Add a name column, based on the source ID + window number: "<< endl;
cerr << " $ cat input.bed" << endl;
cerr << " chr5 60000 70000 AAA" << endl;
cerr << " chr5 73000 90000 BBB" << endl;
cerr << " chr5 100000 101000 CCC" << endl;
cerr << " $ " << PROGRAM_NAME << " -b input.bed -n 3 -i srcwinnum" << endl;
cerr << " chr5 60000 63334 AAA_1" << endl;
cerr << " chr5 63334 66668 AAA_2" << endl;
cerr << " chr5 66668 70000 AAA_3" << endl;
cerr << " chr5 73000 78667 BBB_1" << endl;
cerr << " chr5 78667 84334 BBB_2" << endl;
cerr << " chr5 84334 90000 BBB_3" << endl;
cerr << " chr5 100000 100334 CCC_1" << endl;
cerr << " chr5 100334 100668 CCC_2" << endl;
cerr << " chr5 100668 101000 CCC_3" << endl;
cerr << " ..." << endl;
cerr << endl;
......@@ -237,4 +297,5 @@ void windowmaker_help(void) {
exit(1);
}
\ No newline at end of file
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment