From 4667a7ab7deb5c494bfaac31861f9fd9eb3325ee Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Fri, 4 Nov 2011 11:23:51 -0400 Subject: [PATCH] -novo option in bamToBed --- src/bamToBed/bamToBed.cpp | 103 ++++++++++++++++++---------- src/utils/chromsweep/chromsweep.cpp | 1 + 2 files changed, 68 insertions(+), 36 deletions(-) diff --git a/src/bamToBed/bamToBed.cpp b/src/bamToBed/bamToBed.cpp index 04818cf6..f23ddd30 100644 --- a/src/bamToBed/bamToBed.cpp +++ b/src/bamToBed/bamToBed.cpp @@ -35,11 +35,15 @@ using namespace std; // function declarations void ShowHelp(void); -void ConvertBamToBed(const string &bamFile, const bool &useEditDistance, const string &bamTag, - const bool &writeBed12, const bool &obeySplits, const string &color, const bool &useCigar); +void ConvertBamToBed(const string &bamFile, bool useEditDistance, const string &bamTag, + bool writeBed12, bool obeySplits, const string &color, + bool useCigar, bool useNovoalign); + void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance); -void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, bool obeySplits, bool useCigar); +void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, + const string &bamTag, bool obeySplits, bool useCigar, bool useNovoalign); + void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, string color = "255,0,0"); void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, bool useEditDistance); @@ -70,6 +74,7 @@ int main(int argc, char* argv[]) { bool useAlignmentScore = false; bool useCigar = false; bool obeySplits = false; + bool useNovoalign = false; // custom for Quinlan/Hall research // check to see if we should print out some help @@ -113,6 +118,9 @@ int main(int argc, char* argv[]) { else if(PARAMETER_CHECK("-as", 3, parameterLength)) { useAlignmentScore = true; } + else if(PARAMETER_CHECK("-novo", 5, parameterLength)) { + useNovoalign = true; + } else if(PARAMETER_CHECK("-color", 6, parameterLength)) { if ((i+1) < argc) { haveColor = true; @@ -161,7 +169,7 @@ int main(int argc, char* argv[]) { // if there are no problems, let's convert BAM to BED or BEDPE if (!showHelp) { if (writeBedPE == false) - ConvertBamToBed(bamFile, useEditDistance, tag, writeBed12, obeySplits, color, useCigar); // BED or "blocked BED" + ConvertBamToBed(bamFile, useEditDistance, tag, writeBed12, obeySplits, color, useCigar, useNovoalign); // BED or "blocked BED" else ConvertBamToBedpe(bamFile, useEditDistance); // BEDPE } @@ -213,8 +221,9 @@ void ShowHelp(void) { } -void ConvertBamToBed(const string &bamFile, const bool &useEditDistance, const string &bamTag, - const bool &writeBed12, const bool &obeySplits, const string &color, const bool &useCigar) { +void ConvertBamToBed(const string &bamFile, bool useEditDistance, const string &bamTag, + bool writeBed12, bool obeySplits, const string &color, + bool useCigar, bool useNovoalign) { // open the BAM file BamReader reader; reader.Open(bamFile); @@ -228,7 +237,7 @@ void ConvertBamToBed(const string &bamFile, const bool &useEditDistance, const s while (reader.GetNextAlignment(bam)) { if (bam.IsMapped() == true) { if (writeBed12 == false) // BED - PrintBed(bam, refs, useEditDistance, bamTag, obeySplits, useCigar); + PrintBed(bam, refs, useEditDistance, bamTag, obeySplits, useCigar, useNovoalign); else //"blocked" BED PrintBed12(bam, refs, useEditDistance, bamTag, color); } @@ -332,7 +341,8 @@ string BuildCigarString(const vector<CigarOp> &cigar) { } -void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, bool obeySplits, bool useCigar) { +void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, + const string &bamTag, bool obeySplits, bool useCigar, bool useNovoalign) { // set the strand string strand = "+"; @@ -348,41 +358,62 @@ void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDista // report the entire BAM footprint as a single BED entry if (obeySplits == false) { - // report the alignment in BED6 format. - if (useEditDistance == false && bamTag == "") { - printf("%s\t%d\t%d\t\%s\t%d\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position, - alignmentEnd, name.c_str(), bam.MapQuality, strand.c_str()); - } - else if (useEditDistance == true && bamTag == "") { - uint32_t editDistance; - if (bam.GetTag("NM", editDistance)) { - printf("%s\t%d\t%d\t\%s\t%u\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position, - alignmentEnd, name.c_str(), editDistance, strand.c_str()); + + if (!useNovoalign) { + // report the alignment in BED6 format. + if (useEditDistance == false && bamTag == "") { + printf("%s\t%d\t%d\t\%s\t%d\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position, + alignmentEnd, name.c_str(), bam.MapQuality, strand.c_str()); } - else { - cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n"; - exit(1); + else if (useEditDistance == true && bamTag == "") { + uint32_t editDistance; + if (bam.GetTag("NM", editDistance)) { + printf("%s\t%d\t%d\t\%s\t%u\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position, + alignmentEnd, name.c_str(), editDistance, strand.c_str()); + } + else { + cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n"; + exit(1); + } } - } - else if (useEditDistance == false && bamTag != "") { - int32_t tagValue; - if (bam.GetTag(bamTag, tagValue)) { - printf("%s\t%d\t%d\t\%s\t%d\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position, - alignmentEnd, name.c_str(), tagValue, strand.c_str()); + else if (useEditDistance == false && bamTag != "") { + int32_t tagValue; + if (bam.GetTag(bamTag, tagValue)) { + printf("%s\t%d\t%d\t\%s\t%d\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position, + alignmentEnd, name.c_str(), tagValue, strand.c_str()); + } + else { + cerr << "The requested tag (" << bamTag << ") was not found in the BAM file. Exiting\n"; + exit(1); + } + } + + // does the user want CIGAR as well? + if (useCigar == false) { + printf("\n"); } else { - cerr << "The requested tag (" << bamTag << ") was not found in the BAM file. Exiting\n"; - exit(1); + string cigar = BuildCigarString(bam.CigarData); + printf("\t%s\n", cigar.c_str()); } } - - // does the user want CIGAR as well? - if (useCigar == false) { - printf("\n"); - } else { - string cigar = BuildCigarString(bam.CigarData); - printf("\t%s\n", cigar.c_str()); + // special BED format for Hydra using Novoalign. + uint32_t editDistance; + uint32_t numMappings; + + if (!bam.GetTag("NM", editDistance)) { + cerr << "Unable to extract NM. Verify that your BAM was generated by Novoalign." << endl; + exit(1); + } + if (!bam.GetTag("ZN", numMappings)) { + // if ZN is missing, this means just one alignment was found. + numMappings = 1; + } + printf("%s\t%d\t%d\t\%s\t%u_%u_%u\t%s\n", + refs.at(bam.RefID).RefName.c_str(), bam.Position, + alignmentEnd, name.c_str(), + bam.MapQuality, editDistance, numMappings, strand.c_str()); } } // Report each chunk of the BAM alignment as a discrete BED entry diff --git a/src/utils/chromsweep/chromsweep.cpp b/src/utils/chromsweep/chromsweep.cpp index e4dcf560..0ae73b59 100644 --- a/src/utils/chromsweep/chromsweep.cpp +++ b/src/utils/chromsweep/chromsweep.cpp @@ -125,6 +125,7 @@ bool ChromSweep::ChromChange() } } + bool ChromSweep::IsValidHit(const BED &query, const BED &db) { // do we have an overlap in the DB? if (overlaps(query.start, query.end, db.start, db.end) > 0) { -- GitLab