From 10e9012200ed5a956559cb6e6b842c25fc073823 Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Mon, 7 Nov 2011 12:47:05 -0500 Subject: [PATCH] bwa mode --- src/bamToBed/bamToBed.cpp | 43 +++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/src/bamToBed/bamToBed.cpp b/src/bamToBed/bamToBed.cpp index f23ddd30..b825c524 100644 --- a/src/bamToBed/bamToBed.cpp +++ b/src/bamToBed/bamToBed.cpp @@ -37,12 +37,12 @@ void ShowHelp(void); void ConvertBamToBed(const string &bamFile, bool useEditDistance, const string &bamTag, bool writeBed12, bool obeySplits, const string &color, - bool useCigar, bool useNovoalign); + bool useCigar, bool useNovoalign, bool useBWA); void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance); void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, - const string &bamTag, bool obeySplits, bool useCigar, bool useNovoalign); + const string &bamTag, bool obeySplits, bool useCigar, bool useNovoalign, bool useBWA); void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, string color = "255,0,0"); void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, @@ -75,6 +75,7 @@ int main(int argc, char* argv[]) { bool useCigar = false; bool obeySplits = false; bool useNovoalign = false; // custom for Quinlan/Hall research + bool useBWA = false; // custom for Quinlan/Hall research // check to see if we should print out some help @@ -121,6 +122,9 @@ int main(int argc, char* argv[]) { else if(PARAMETER_CHECK("-novo", 5, parameterLength)) { useNovoalign = true; } + else if(PARAMETER_CHECK("-bwa", 4, parameterLength)) { + useBWA = true; + } else if(PARAMETER_CHECK("-color", 6, parameterLength)) { if ((i+1) < argc) { haveColor = true; @@ -169,7 +173,7 @@ int main(int argc, char* argv[]) { // if there are no problems, let's convert BAM to BED or BEDPE if (!showHelp) { if (writeBedPE == false) - ConvertBamToBed(bamFile, useEditDistance, tag, writeBed12, obeySplits, color, useCigar, useNovoalign); // BED or "blocked BED" + ConvertBamToBed(bamFile, useEditDistance, tag, writeBed12, obeySplits, color, useCigar, useNovoalign, useBWA); // BED or "blocked BED" else ConvertBamToBedpe(bamFile, useEditDistance); // BEDPE } @@ -223,7 +227,7 @@ void ShowHelp(void) { void ConvertBamToBed(const string &bamFile, bool useEditDistance, const string &bamTag, bool writeBed12, bool obeySplits, const string &color, - bool useCigar, bool useNovoalign) { + bool useCigar, bool useNovoalign, bool useBWA) { // open the BAM file BamReader reader; reader.Open(bamFile); @@ -237,7 +241,7 @@ void ConvertBamToBed(const string &bamFile, bool useEditDistance, const string & while (reader.GetNextAlignment(bam)) { if (bam.IsMapped() == true) { if (writeBed12 == false) // BED - PrintBed(bam, refs, useEditDistance, bamTag, obeySplits, useCigar, useNovoalign); + PrintBed(bam, refs, useEditDistance, bamTag, obeySplits, useCigar, useNovoalign, useBWA); else //"blocked" BED PrintBed12(bam, refs, useEditDistance, bamTag, color); } @@ -342,7 +346,7 @@ string BuildCigarString(const vector<CigarOp> &cigar) { void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, - const string &bamTag, bool obeySplits, bool useCigar, bool useNovoalign) { + const string &bamTag, bool obeySplits, bool useCigar, bool useNovoalign, bool useBWA) { // set the strand string strand = "+"; @@ -359,7 +363,7 @@ void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDista // report the entire BAM footprint as a single BED entry if (obeySplits == false) { - if (!useNovoalign) { + if (!useNovoalign && !useBWA) { // report the alignment in BED6 format. if (useEditDistance == false && bamTag == "") { printf("%s\t%d\t%d\t\%s\t%d\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position, @@ -397,7 +401,7 @@ void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDista printf("\t%s\n", cigar.c_str()); } } - else { + else if (useNovoalign && !useBWA) { // special BED format for Hydra using Novoalign. uint32_t editDistance; uint32_t numMappings; @@ -415,6 +419,29 @@ void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDista alignmentEnd, name.c_str(), bam.MapQuality, editDistance, numMappings, strand.c_str()); } + else if (!useNovoalign && useBWA) { + // special BED format for Hydra using Novoalign. + uint32_t editDistance; + uint32_t numMappings; + uint32_t x0 = 1; + uint32_t x1 = 0; + + if (!bam.GetTag("NM", editDistance)) { + cerr << "Unable to extract NM. Verify that your BAM was generated by Novoalign." << endl; + exit(1); + } + if (!bam.GetTag("X0", x0) && !bam.GetTag("X1", x1)) { + // if ZN is missing, this means just one alignment was found. + numMappings = 1; + } + else { + numMappings = x0 + x1; + } + printf("%s\t%d\t%d\t\%s\t%u_%u_%u\t%s\n", + refs.at(bam.RefID).RefName.c_str(), bam.Position, + alignmentEnd, name.c_str(), + bam.MapQuality, editDistance, numMappings, strand.c_str()); + } } // Report each chunk of the BAM alignment as a discrete BED entry // For example 10M100N10M would be reported as two seprate BED entries of length 10 -- GitLab