Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
bedFilePE.cpp 7.69 KiB
// 
//  bedFilePE.cpp
//  BEDTools
//  
//  Created by Aaron Quinlan Spring 2009.
//  Copyright 2009 Aaron Quinlan. All rights reserved.
//
//  Summary:  Contains common functions for finding BED overlaps.
//
//  Acknowledgments: Much of the code herein is taken from Jim Kent's
//                   BED processing code.  I am grateful for his elegant
//					 genome binning algorithm and therefore use it extensively.


#include "bedFilePE.h"

//***********************************************
// Common functions
//***********************************************

// Constructor
BedFilePE::BedFilePE(string &bedFile) {
	this->bedFile = bedFile;
}

// Destructorc
BedFilePE::~BedFilePE(void) {
}


/*
	reportBedPETab
	
	Writes the _original_ BED entry for A.
	Works for BEDPE only.
*/
void BedFilePE::reportBedPETab(const BEDPE &a) {

	if (this->bedType == 6) {
		printf("%s\t%d\t%d\t%s\t%d\t%d\t", a.chrom1.c_str(), a.start1, a.end1,
		 									a.chrom2.c_str(), a.start2, a.end2);
	}
	else if (this->bedType == 7) {
		printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t", a.chrom1.c_str(), a.start1, a.end1,
		 									a.chrom2.c_str(), a.start2, a.end2,
											a.name.c_str());
	}	
	else if (this->bedType == 8) {
		printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%d\t", a.chrom1.c_str(), a.start1, a.end1,
		 									a.chrom2.c_str(), a.start2, a.end2,
											a.name.c_str(), a.score);
	}
	else if (this->bedType == 10) {
		printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%d\t%s\t%s\t", a.chrom1.c_str(), a.start1, a.end1,
		 									a.chrom2.c_str(), a.start2, a.end2,
											a.name.c_str(), a.score, a.strand1.c_str(), a.strand2.c_str());
	}
}



/*
	reportBedPENewLine
	
	Writes the _original_ BED entry for A.
	Works for BEDPE only.
*/
void BedFilePE::reportBedPENewLine(const BEDPE &a) {

	if (this->bedType == 6) {
		printf("%s\t%d\t%d\t%s\t%d\t%d\n", a.chrom1.c_str(), a.start1, a.end1,
		 									a.chrom2.c_str(), a.start2, a.end2);
	}
	else if (this->bedType == 7) {
		printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\n", a.chrom1.c_str(), a.start1, a.end1,
		 									a.chrom2.c_str(), a.start2, a.end2,
											a.name.c_str());
	}	
	else if (this->bedType == 8) {
		printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%d\n", a.chrom1.c_str(), a.start1, a.end1,
		 									a.chrom2.c_str(), a.start2, a.end2,
											a.name.c_str(), a.score);
	}
	else if (this->bedType == 10) {
		printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%d\t%s\t%s\n", a.chrom1.c_str(), a.start1, a.end1,
		 									a.chrom2.c_str(), a.start2, a.end2,
											a.name.c_str(), a.score, a.strand1.c_str(), a.strand2.c_str());
	}
}



bool BedFilePE::parseBedPELine (BEDPE &bed, const vector<string> &lineVector, const int &lineNum) {

	if ((lineNum == 1) && (lineVector.size() >= 3)) {

		this->bedType = lineVector.size();

		if (this->bedType == 6) {
			bed.chrom1 = lineVector[0];
			bed.start1 = atoi(lineVector[1].c_str());
			bed.end1 = atoi(lineVector[2].c_str());
						
			bed.chrom2 = lineVector[3];
			bed.start2 = atoi(lineVector[4].c_str());
			bed.end2 = atoi(lineVector[5].c_str());

			return true;
		}
		else if (this->bedType == 7) {
			bed.chrom1 = lineVector[0];
			bed.start1 = atoi(lineVector[1].c_str());
			bed.end1 = atoi(lineVector[2].c_str());
						
			bed.chrom2 = lineVector[3];
			bed.start2 = atoi(lineVector[4].c_str());
			bed.end2 = atoi(lineVector[5].c_str());

			bed.name = lineVector[6];
			return true;
		}	
		else if (this->bedType == 8) {
			bed.chrom1 = lineVector[0];
			bed.start1 = atoi(lineVector[1].c_str());
			bed.end1 = atoi(lineVector[2].c_str());
						
			bed.chrom2 = lineVector[3];
			bed.start2 = atoi(lineVector[4].c_str());
			bed.end2 = atoi(lineVector[5].c_str());

			bed.name = lineVector[6];
			bed.score = atoi(lineVector[7].c_str());
			return true;
		}	
		else if (this->bedType == 10) {
			bed.chrom1 = lineVector[0];
			bed.start1 = atoi(lineVector[1].c_str());
			bed.end1 = atoi(lineVector[2].c_str());
						
			bed.chrom2 = lineVector[3];
			bed.start2 = atoi(lineVector[4].c_str());
			bed.end2 = atoi(lineVector[5].c_str());

			bed.name = lineVector[6];
			bed.score = atoi(lineVector[7].c_str());

			bed.strand1 = lineVector[8];
			bed.strand2 = lineVector[9];

			return true;
		}
		else {
			cerr << "Unexpected number of fields: " << lineNum << ".  Verify that your files are TAB-delimited and that your BEDPE file has 6,7,8 or 10 fields.  Exiting..." << endl;
			exit(1);
		}
		
		if (bed.start1 > bed.end1) {
			cerr << "Error: malformed BED entry at line " << lineNum << ". Start1 was greater than End1. Ignoring it and moving on." << endl;
			return false;
		}
		else if (bed.start2 > bed.end2) {
			cerr << "Error: malformed BED entry at line " << lineNum << ". Start2 was greater than End2. Ignoring it and moving on." << endl;
			return false;
		}
		else if ( (bed.start1 < 0) || (bed.end1 < 0) || (bed.start2 < 0) || (bed.end2 < 0) ) {
			cerr << "Error: malformed BED entry at line " << lineNum << ". Coordinate <= 0. Ignoring it and moving on." << endl;
			return false;
		}
	}
	else if ( (lineNum > 1) && (lineVector.size() == this->bedType)) {

		if (this->bedType == 6) {
			bed.chrom1 = lineVector[0];
			bed.start1 = atoi(lineVector[1].c_str());
			bed.end1 = atoi(lineVector[2].c_str());
						
			bed.chrom2 = lineVector[3];
			bed.start2 = atoi(lineVector[4].c_str());
			bed.end2 = atoi(lineVector[5].c_str());

			return true;
		}
		else if (this->bedType == 7) {
			bed.chrom1 = lineVector[0];
			bed.start1 = atoi(lineVector[1].c_str());
			bed.end1 = atoi(lineVector[2].c_str());
						
			bed.chrom2 = lineVector[3];
			bed.start2 = atoi(lineVector[4].c_str());
			bed.end2 = atoi(lineVector[5].c_str());

			bed.name = lineVector[6];
			return true;
		}	
		else if (this->bedType == 8) {
			bed.chrom1 = lineVector[0];
			bed.start1 = atoi(lineVector[1].c_str());
			bed.end1 = atoi(lineVector[2].c_str());
						
			bed.chrom2 = lineVector[3];
			bed.start2 = atoi(lineVector[4].c_str());
			bed.end2 = atoi(lineVector[5].c_str());

			bed.name = lineVector[6];
			bed.score = atoi(lineVector[7].c_str());
			return true;
		}	
		else if (this->bedType == 10) {
			bed.chrom1 = lineVector[0];
			bed.start1 = atoi(lineVector[1].c_str());
			bed.end1 = atoi(lineVector[2].c_str());
						
			bed.chrom2 = lineVector[3];
			bed.start2 = atoi(lineVector[4].c_str());
			bed.end2 = atoi(lineVector[5].c_str());

			bed.name = lineVector[6];
			bed.score = atoi(lineVector[7].c_str());

			bed.strand1 = lineVector[8];
			bed.strand2 = lineVector[9];

			return true;
		}
		else {
			cerr << "Unexpected number of fields: " << lineNum << ".  Verify that your files are TAB-delimited and that your BEDPE file has 6,7,8 or 10 fields.  Exiting..." << endl;
			exit(1);
		}

		if (bed.start1 > bed.end1) {
			cerr << "Error: malformed BED entry at line " << lineNum << ". Start1 was greater than End1. Ignoring it and moving on." << endl;
			return false;
		}
		else if (bed.start2 > bed.end2) {
			cerr << "Error: malformed BED entry at line " << lineNum << ". Start2 was greater than End2. Ignoring it and moving on." << endl;
			return false;
		}
		else if ( (bed.start1 < 0) || (bed.end1 < 0) || (bed.start2 < 0) || (bed.end2 < 0) ) {
			cerr << "Error: malformed BED entry at line " << lineNum << ". Coordinate <= 0. Ignoring it and moving on." << endl;
			return false;
		}
	}
	else if (lineVector.size() == 1) {
		cerr << "Only one BED field detected: " << lineNum << ".  Verify that your files are TAB-delimited.  Exiting..." << endl;
		exit(1);		
	}
	else if (lineVector.size() != this->bedType) {
		cerr << "Differing number of BED fields encountered at line: " << lineNum << ".  Exiting..." << endl;
		exit(1);
	}
	else if (lineVector.size() < 8) {
		cerr << "TAB delimited BED file with 10 fields (chrom1, start1, end1, strand1, chrom2, start2, end2, strand2) is required.  Exiting..." << endl;
		exit(1);
	}
	return false;
}