From e88f6c388d1b9e67804f998b1423ab9fae6fbdb0 Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Fri, 12 Mar 2010 10:34:46 -0500 Subject: [PATCH] Fixed yet another bug in the -nms option in mergeBed. Now using a map to track the unique names of the features in a merged block. The consequence is that names will not be reported in the order they were added. --- src/mergeBed/mergeBed.cpp | 74 ++++++++++++++++++++------------------- src/mergeBed/mergeBed.h | 2 ++ 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/src/mergeBed/mergeBed.cpp b/src/mergeBed/mergeBed.cpp index e1ca8e99..b577e466 100755 --- a/src/mergeBed/mergeBed.cpp +++ b/src/mergeBed/mergeBed.cpp @@ -12,6 +12,18 @@ #include "lineFileUtilities.h" #include "mergeBed.h" + +void ReportMergedNames(const map<string, bool> &names) { + unsigned int n = 0; + map<string, bool>::const_iterator nameItr = names.begin(); + map<string, bool>::const_iterator nameEnd = names.end(); + for (; nameItr != nameEnd; ++nameItr) { + if (n < (names.size() - 1)) {cout << nameItr->first << ";";} + else {cout << nameItr->first;} + n++; + } +} + // =============== // = Constructor = // =============== @@ -52,37 +64,40 @@ void BedMerge::MergeBed() { int minStart = INT_MAX; int maxEnd = 0; bool OIP = false; // OIP = Overlap In Progress. Lame, I realize. - unsigned int prev = 0; + int prev = -1; unsigned int curr = 0; int mergeCount = 1; - vector<string> names; + map<string, bool> names; // loop through the BED entries for this chromosome // and look for overlaps - for (curr = 1; curr < bedList.size(); ++curr) { + for (curr = 0; curr < bedList.size(); ++curr) { + // make sure prev points to an actual element + if (prev < 0) { + prev = curr; + continue; + } + // Is there an overlap between the current and previous entries? if ( overlaps(bedList[prev].start, bedList[prev].end, bedList[curr].start, bedList[curr].end) >= this->maxDistance) { - OIP = true; mergeCount++; minStart = min(bedList[prev].start, min(minStart, bedList[curr].start)); maxEnd = max(bedList[prev].end, max(maxEnd, bedList[curr].end)); - - //names.push_back(bedList[prev].name); - names.push_back(bedList[curr].name); + + names[bedList[prev].name] = true; + names[bedList[curr].name] = true; } else if ( overlaps(minStart, maxEnd, bedList[curr].start, bedList[curr].end) >= this->maxDistance) { mergeCount++; minStart = min(minStart, bedList[curr].start); maxEnd = max(maxEnd, bedList[curr].end); - - names.push_back(bedList[curr].name); + names[bedList[curr].name] = true; } else { - // was there an overlap befor the current entry broke it? if (OIP) { if (this->numEntries) { @@ -90,10 +105,7 @@ void BedMerge::MergeBed() { } else if (this->reportNames) { cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << "\t"; - for (unsigned int n = 0; n < names.size(); ++n) { - if (n < (names.size() - 1)) {cout << names[n] << ";";} - else {cout << names[n];} - } + ReportMergedNames(names); cout << endl; } else { @@ -117,8 +129,9 @@ void BedMerge::MergeBed() { mergeCount = 1; minStart = INT_MAX; maxEnd = 0; + names.clear(); - names.push_back(bedList[prev].name); + names[bedList[curr].name] = true; } prev = curr; } @@ -130,10 +143,7 @@ void BedMerge::MergeBed() { } else if (this->reportNames) { cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << "\t"; - for (unsigned int n = 0; n < names.size(); ++n) { - if (n < (names.size() - 1)) {cout << names[n] << ";";} - else {cout << names[n];} - } + ReportMergedNames(names); cout << endl; } else { @@ -185,7 +195,7 @@ void BedMerge::MergeBedStranded() { unsigned int curr = 0; int mergeCount = 1; int numOnStrand = 0; - vector<string> names; + map<string, bool> names; // loop through the BED entries for this chromosome // and look for overlaps @@ -211,23 +221,21 @@ void BedMerge::MergeBedStranded() { } if ( overlaps(bedList[prev].start, bedList[prev].end, - bedList[curr].start, bedList[curr].end) >= this->maxDistance) { - + bedList[curr].start, bedList[curr].end) >= this->maxDistance) { OIP = true; mergeCount++; minStart = min(bedList[prev].start, min(minStart, bedList[curr].start)); maxEnd = max(bedList[prev].end, max(maxEnd, bedList[curr].end)); - names.push_back(bedList[curr].name); + names[bedList[prev].name] = true; + names[bedList[curr].name] = true; } else if ( overlaps(minStart, maxEnd, bedList[curr].start, bedList[curr].end) >= this->maxDistance) { - mergeCount++; minStart = min(minStart, bedList[curr].start); maxEnd = max(maxEnd, bedList[curr].end); - - names.push_back(bedList[curr].name); + names[bedList[curr].name] = true; } else { @@ -238,10 +246,7 @@ void BedMerge::MergeBedStranded() { } else if (this->reportNames) { cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << "\t"; - for (unsigned int n = 0; n < names.size(); ++n) { - if (n < (names.size() - 1)) {cout << names[n] << ";";} - else {cout << names[n];} - } + ReportMergedNames(names); cout << "\t" << strands[s] << endl; } else { @@ -267,8 +272,8 @@ void BedMerge::MergeBedStranded() { maxEnd = 0; names.clear(); - // add the name of the - names.push_back(bedList[curr].name); + // add the name of the current element in prep for the next block + names[bedList[curr].name] = true; } prev = curr; } @@ -280,10 +285,7 @@ void BedMerge::MergeBedStranded() { } else if (this->reportNames) { cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << "\t"; - for (unsigned int n = 0; n < names.size(); ++n) { - if (n < (names.size() - 1)) {cout << names[n] << ";";} - else {cout << names[n];} - } + ReportMergedNames(names); cout << "\t" << strands[s] << endl; } else { diff --git a/src/mergeBed/mergeBed.h b/src/mergeBed/mergeBed.h index d6330985..336c5d55 100755 --- a/src/mergeBed/mergeBed.h +++ b/src/mergeBed/mergeBed.h @@ -19,6 +19,8 @@ using namespace std; +void ReportMergedNames(const map<string, bool> &names); + //************************************************ // Class methods and elements //************************************************ -- GitLab