Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
multiBamCov.cpp 4.11 KiB
/*****************************************************************************
  multiBamCov.cpp

  (c) 2009 - Aaron Quinlan
  Hall Laboratory
  Department of Biochemistry and Molecular Genetics
  University of Virginia
  aaronquinlan@gmail.com

  Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "lineFileUtilities.h"
#include "multiBamCov.h"
#include "api/BamMultiReader.h"


/*
    Constructor
*/
MultiCovBam::MultiCovBam(const vector<string> &bam_files, const string bed_file, 
                         int minQual, bool properOnly,
                         bool keepDuplicates, bool keepFailedQC)
:
_bam_files(bam_files),
_bed_file(bed_file),
_minQual(minQual),
_properOnly(properOnly),
_keepDuplicates(keepDuplicates),
_keepFailedQC(keepFailedQC)
{
	_bed = new BedFile(_bed_file);
    LoadBamFileMap();
}


/*
    Destructor
*/
MultiCovBam::~MultiCovBam(void) 
{}



void MultiCovBam::CollectCoverage()
{
    BamMultiReader reader;
    
    if ( !reader.Open(_bam_files) )
    {
        cerr << "Could not open input BAM files." << endl;
        exit(1);
    }
    else
    {
        // attempt to find index files
        reader.LocateIndexes();

        // if index data available for all BAM files, we can use SetRegion
        if ( reader.HasIndexes() ) {
            BED bed, nullBed;
            int lineNum = 0;
            BedLineStatus bedStatus;

            _bed->Open();
            // loop through each BED entry, jump to it, 
            // and collect coverage from each BAM
            while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID)
            {
                if (bedStatus == BED_VALID)
                {
                    // initialize counts for each file to 0
                    vector<int> counts(_bam_files.size(), 0);
                    // get the BAM refId for this chrom.
                    int refId = reader.GetReferenceID(bed.chrom);
                    // set up a BamRegion to which to attempt to jump
                    BamRegion region(refId, (int)bed.start, refId, (int)bed.end);
                    
                    // everything checks out, just iterate through specified region, counting alignments
                    if ( (refId != -1) && (reader.SetRegion(region)) ) {
                        BamAlignment al;
                        while ( reader.GetNextAlignment(al) )
                        {
                            bool duplicate = al.IsDuplicate();
                            bool failedQC  = al.IsFailedQC();
                            if (_keepDuplicates) duplicate = false;
                            if (_keepFailedQC)    failedQC = false;
                            // map qual must exceed minimum
                            if ((al.MapQuality >= _minQual) && (!duplicate) && (!failedQC)) {
                                // ignore if not properly paired and we actually care.
                                if (_properOnly && !al.IsProperPair())
                                    continue;

                                // lookup the offset of the file name and tabulate 
                                //coverage for the appropriate file
                                counts[bamFileMap[al.Filename]]++;
                            }
                        }
                    }
                    // report the cov at this interval for each file and reset
                    _bed->reportBedTab(bed);
                    ReportCounts(counts);
                    bed = nullBed;
                }
            }
            _bed->Close();
        }
        else {
            cerr << "Could not find indexes." << endl;
            reader.Close();
            exit(1);
        }
    }
}


void MultiCovBam::LoadBamFileMap(void) 
{
    for (size_t i = 0; i < _bam_files.size(); ++i)
    {
        bamFileMap[_bam_files[i]] = i;
    }
}

void MultiCovBam::ReportCounts(const vector<int> &counts) 
{
    for (size_t i = 0; i < counts.size(); ++i)
    {
        if (i < counts.size() - 1)
            cout << counts[i] << "\t";
        else
            cout << counts[i];
    }
    cout << endl;
}