Skip to content
Snippets Groups Projects
Commit aa3d3959 authored by Aaron's avatar Aaron
Browse files

Update to fastaFromBed and intersectBed.

	1. Added the -wo option to intersectBed.
	2. Added the -s option to fastaFromBed.
parent e88f6c38
No related branches found
No related tags found
No related merge requests found
...@@ -12,7 +12,8 @@ ...@@ -12,7 +12,8 @@
#include "lineFileUtilities.h" #include "lineFileUtilities.h"
#include "fastaFromBed.h" #include "fastaFromBed.h"
Bed2Fa::Bed2Fa(bool &useName, string &dbFile, string &bedFile, string &fastaOutFile, bool &useFasta) { Bed2Fa::Bed2Fa(bool &useName, string &dbFile, string &bedFile, string &fastaOutFile,
bool &useFasta, bool &useStrand) {
if (useName) { if (useName) {
this->useName = true; this->useName = true;
...@@ -22,7 +23,8 @@ Bed2Fa::Bed2Fa(bool &useName, string &dbFile, string &bedFile, string &fastaOutF ...@@ -22,7 +23,8 @@ Bed2Fa::Bed2Fa(bool &useName, string &dbFile, string &bedFile, string &fastaOutF
this->bedFile = bedFile; this->bedFile = bedFile;
this->fastaOutFile = fastaOutFile; this->fastaOutFile = fastaOutFile;
this->useFasta = useFasta; this->useFasta = useFasta;
this->useStrand = useStrand;
this->bed = new BedFile(this->bedFile); this->bed = new BedFile(this->bedFile);
} }
...@@ -78,17 +80,35 @@ void Bed2Fa::ExtractDNA() { ...@@ -78,17 +80,35 @@ void Bed2Fa::ExtractDNA() {
unsigned int start = bedList[i].start; unsigned int start = bedList[i].start;
unsigned int end = bedList[i].end; unsigned int end = bedList[i].end;
if ( (start <= currDNA.size()) && (end <= currDNA.size()) ) { if ( (start <= currDNA.size()) && (end <= currDNA.size()) ) {
string dna = currDNA.substr(bedList[i].start, ((bedList[i].end - bedList[i].start))); string dna = currDNA.substr(bedList[i].start, ((bedList[i].end - bedList[i].start)));
// revcomp if necessary. Thanks to Thomas Doktor.
if ((this->useStrand == true) && (bedList[i].strand == "-")) {
reverseComplement(dna);
}
if (!(this->useName)) { if (!(this->useName)) {
if (this->useFasta == true) { if (this->useFasta == true) {
faOut << ">" << currChrom << ":" if (this->useStrand == true) {
<< bedList[i].start << "-" << bedList[i].end << endl << dna << endl; faOut << ">" << currChrom << ":" << bedList[i].start << "-"
<< bedList[i].end << "(" << bedList[i].strand << ")" << endl << dna << endl;
}
else {
faOut << ">" << currChrom << ":" << bedList[i].start << "-"
<< bedList[i].end << endl << dna << endl;
}
} }
else { else {
faOut << currChrom << ":" << bedList[i].start << "-" << bedList[i].end if (this->useStrand == true) {
<< "\t" << dna << endl; faOut << currChrom << ":" << bedList[i].start << "-"
<< bedList[i].end << "(" << bedList[i].strand << ")"
<< "\t" << dna << endl;
}
else {
faOut << currChrom << ":" << bedList[i].start << "-" << bedList[i].end
<< "\t" << dna << endl;
}
} }
} }
else { else {
...@@ -120,18 +140,36 @@ void Bed2Fa::ExtractDNA() { ...@@ -120,18 +140,36 @@ void Bed2Fa::ExtractDNA() {
unsigned int start = bedList[i].start; unsigned int start = bedList[i].start;
unsigned int end = bedList[i].end; unsigned int end = bedList[i].end;
if ( (start <= currDNA.size()) && (end <= currDNA.size()) ) { if ( (start <= currDNA.size()) && (end <= currDNA.size()) ) {
string dna = currDNA.substr(bedList[i].start, ((bedList[i].end - bedList[i].start))); string dna = currDNA.substr(bedList[i].start, ((bedList[i].end - bedList[i].start)));
// revcomp if necessary. Thanks to Thomas Doktor.
if ((this->useStrand == true) && (bedList[i].strand == "-")) {
reverseComplement(dna);
}
if (!(this->useName)) { if (!(this->useName)) {
if (this->useFasta == true) { if (this->useFasta == true) {
faOut << ">" << currChrom << ":" if (this->useStrand == true) {
<< bedList[i].start << "-" << bedList[i].end << endl << dna << endl; faOut << ">" << currChrom << ":" << bedList[i].start << "-"
<< bedList[i].end << "(" << bedList[i].strand << ")" << endl << dna << endl;
}
else {
faOut << ">" << currChrom << ":" << bedList[i].start << "-"
<< bedList[i].end << endl << dna << endl;
}
} }
else { else {
faOut << currChrom << ":" << bedList[i].start << "-" << bedList[i].end if (this->useStrand == true) {
<< "\t" << dna << endl; faOut << currChrom << ":" << bedList[i].start << "-"
<< bedList[i].end << "(" << bedList[i].strand << ")"
<< "\t" << dna << endl;
}
else {
faOut << currChrom << ":" << bedList[i].start << "-" << bedList[i].end
<< "\t" << dna << endl;
}
} }
} }
else { else {
...@@ -142,10 +180,10 @@ void Bed2Fa::ExtractDNA() { ...@@ -142,10 +180,10 @@ void Bed2Fa::ExtractDNA() {
faOut << bedList[i].name << "\t" << dna << endl; faOut << bedList[i].name << "\t" << dna << endl;
} }
} }
} }
else cerr << "Feature (" << bedList[i].chrom << ":" << start << "-" << end << ") beyond " else cerr << "Feature (" << bedList[i].chrom << ":" << start << "-" << end << ") beyond "
<< currChrom << " size (" << currDNA.size() << " bp). Skipping." << endl; << currChrom << " size (" << currDNA.size() << " bp). Skipping." << endl;
} }
currDNA = ""; currDNA = "";
} }
......
...@@ -28,7 +28,8 @@ class Bed2Fa { ...@@ -28,7 +28,8 @@ class Bed2Fa {
public: public:
// constructor // constructor
Bed2Fa(bool &, string &, string &, string &, bool &); Bed2Fa(bool &useName, string &dbFile, string &bedFile, string &fastaOutFile,
bool &useFasta, bool &useStrand);
// destructor // destructor
~Bed2Fa(void); ~Bed2Fa(void);
...@@ -42,6 +43,7 @@ private: ...@@ -42,6 +43,7 @@ private:
string bedFile; string bedFile;
string fastaOutFile; string fastaOutFile;
bool useFasta; bool useFasta;
bool useStrand;
// instance of a bed file class. // instance of a bed file class.
BedFile *bed; BedFile *bed;
......
...@@ -42,6 +42,7 @@ int main(int argc, char* argv[]) { ...@@ -42,6 +42,7 @@ int main(int argc, char* argv[]) {
bool haveFastaOut = false; bool haveFastaOut = false;
bool useNameOnly = false; bool useNameOnly = false;
bool useFasta = true; bool useFasta = true;
bool useStrand = false;
// check to see if we should print out some help // check to see if we should print out some help
if(argc <= 1) showHelp = true; if(argc <= 1) showHelp = true;
...@@ -89,6 +90,9 @@ int main(int argc, char* argv[]) { ...@@ -89,6 +90,9 @@ int main(int argc, char* argv[]) {
else if(PARAMETER_CHECK("-tab", 4, parameterLength)) { else if(PARAMETER_CHECK("-tab", 4, parameterLength)) {
useFasta = false; useFasta = false;
} }
else if(PARAMETER_CHECK("-s", 2, parameterLength)) {
useStrand = true;
}
else { else {
cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true; showHelp = true;
...@@ -101,7 +105,7 @@ int main(int argc, char* argv[]) { ...@@ -101,7 +105,7 @@ int main(int argc, char* argv[]) {
if (!showHelp) { if (!showHelp) {
Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta); Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta, useStrand);
b2f->ExtractDNA(); b2f->ExtractDNA();
return 0; return 0;
} }
...@@ -125,8 +129,13 @@ void ShowHelp(void) { ...@@ -125,8 +129,13 @@ void ShowHelp(void) {
cerr << "\t-bed\tBED file of ranges to extract from -fi" << endl; cerr << "\t-bed\tBED file of ranges to extract from -fi" << endl;
cerr << "\t-fo\tOutput file (can be FASTA or TAB-delimited)" << endl; cerr << "\t-fo\tOutput file (can be FASTA or TAB-delimited)" << endl;
cerr << "\t-name\tUse the BED name field (#4) for the FASTA header" << endl; cerr << "\t-name\tUse the BED name field (#4) for the FASTA header" << endl;
cerr << "\t-tab\tWrite output in TAB delimited format." << endl; cerr << "\t-tab\tWrite output in TAB delimited format." << endl;
cerr << "\t\tDefault is FASTA format." << endl; cerr << "\t\t- Default is FASTA format." << endl << endl;
cerr << "\t-s\tForce strandedness. If the feature occupies the antisense strand," << endl;
cerr << "\t\tthe sequence will be reverse complemented." << endl;
cerr << "\t\t- By default, strand information is ignored." << endl << endl;
......
...@@ -17,23 +17,25 @@ ...@@ -17,23 +17,25 @@
Constructor Constructor
*/ */
BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit, BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,
bool writeA, bool writeB, float overlapFraction, bool writeA, bool writeB, bool writeOverlap, float overlapFraction,
bool noHit, bool writeCount, bool forceStrand, bool reciprocal, bool noHit, bool writeCount, bool forceStrand, bool reciprocal,
bool bamInput, bool bamOutput) { bool bamInput, bool bamOutput) {
this->bedAFile = bedAFile; this->bedAFile = bedAFile;
this->bedBFile = bedBFile; this->bedBFile = bedBFile;
this->anyHit = anyHit; this->anyHit = anyHit;
this->noHit = noHit; this->noHit = noHit;
this->writeA = writeA; this->writeA = writeA;
this->writeB = writeB; this->writeB = writeB;
this->writeCount = writeCount; this->writeOverlap = writeOverlap;
this->writeCount = writeCount;
this->overlapFraction = overlapFraction; this->overlapFraction = overlapFraction;
this->forceStrand = forceStrand; this->forceStrand = forceStrand;
this->reciprocal = reciprocal; this->reciprocal = reciprocal;
this->bamInput = bamInput; this->bamInput = bamInput;
this->bamOutput = bamOutput; this->bamOutput = bamOutput;
// create new BED file objects for A and B
this->bedA = new BedFile(bedAFile); this->bedA = new BedFile(bedAFile);
this->bedB = new BedFile(bedBFile); this->bedB = new BedFile(bedBFile);
} }
...@@ -80,7 +82,7 @@ bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) { ...@@ -80,7 +82,7 @@ bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) {
hitsFound = true; hitsFound = true;
numOverlaps++; // we have another hit for A numOverlaps++; // we have another hit for A
/*
if (!writeB && printable) { if (!writeB && printable) {
if (writeA) bedA->reportBedNewLine(a); if (writeA) bedA->reportBedNewLine(a);
else bedA->reportBedRangeNewLine(a,s,e); else bedA->reportBedRangeNewLine(a,s,e);
...@@ -95,6 +97,30 @@ bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) { ...@@ -95,6 +97,30 @@ bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) {
bedB->reportBedNewLine(*h); bedB->reportBedNewLine(*h);
} }
} }
*/
// new
if (printable == true) {
if (writeA == false && writeB == false && writeOverlap == false) {
bedA->reportBedRangeNewLine(a,s,e);
}
else if (writeA == true && writeB == true) {
bedA->reportBedTab(a);
bedB->reportBedNewLine(*h);
}
else if (writeA == true) {
bedA->reportBedNewLine(a);
}
else if (writeB == true) {
bedA->reportBedRangeTab(a,s,e);
bedB->reportBedNewLine(*h);
}
else if (writeOverlap == true) {
bedA->reportBedTab(a);
bedB->reportBedTab(*h);
printf("%d\n", overlapBases);
}
}
} }
else { // the user wants there to be sufficient reciprocal overlap else { // the user wants there to be sufficient reciprocal overlap
int bLength = (h->end - h->start); int bLength = (h->end - h->start);
......
...@@ -34,7 +34,7 @@ public: ...@@ -34,7 +34,7 @@ public:
// constructor // constructor
BedIntersect(string bedAFile, string bedBFile, bool anyHit, BedIntersect(string bedAFile, string bedBFile, bool anyHit,
bool writeA, bool writeB, float overlapFraction, bool writeA, bool writeB, bool writeOverlap, float overlapFraction,
bool noHit, bool writeCount, bool forceStrand, bool reciprocal, bool noHit, bool writeCount, bool forceStrand, bool reciprocal,
bool bamInput, bool bamOutput); bool bamInput, bool bamOutput);
...@@ -63,6 +63,7 @@ private: ...@@ -63,6 +63,7 @@ private:
bool writeA; bool writeA;
bool writeB; bool writeB;
bool writeCount; bool writeCount;
bool writeOverlap;
bool forceStrand; bool forceStrand;
bool reciprocal; bool reciprocal;
float overlapFraction; float overlapFraction;
......
...@@ -36,18 +36,19 @@ int main(int argc, char* argv[]) { ...@@ -36,18 +36,19 @@ int main(int argc, char* argv[]) {
// input arguments // input arguments
float overlapFraction = 1E-9; float overlapFraction = 1E-9;
bool haveBedA = false; bool haveBedA = false;
bool haveBedB = false; bool haveBedB = false;
bool noHit = false; bool noHit = false;
bool anyHit = false; bool anyHit = false;
bool writeA = false; bool writeA = false;
bool writeB = false; bool writeB = false;
bool writeCount = false; bool writeCount = false;
bool haveFraction = false; bool writeOverlap = false;
bool haveFraction = false;
bool reciprocalFraction = false; bool reciprocalFraction = false;
bool forceStrand = false; bool forceStrand = false;
bool inputIsBam = false; bool inputIsBam = false;
bool outputIsBam = true; bool outputIsBam = true;
// check to see if we should print out some help // check to see if we should print out some help
if(argc <= 1) showHelp = true; if(argc <= 1) showHelp = true;
...@@ -110,6 +111,9 @@ int main(int argc, char* argv[]) { ...@@ -110,6 +111,9 @@ int main(int argc, char* argv[]) {
else if(PARAMETER_CHECK("-wb", 3, parameterLength)) { else if(PARAMETER_CHECK("-wb", 3, parameterLength)) {
writeB = true; writeB = true;
} }
else if(PARAMETER_CHECK("-wo", 3, parameterLength)) {
writeOverlap = true;
}
else if(PARAMETER_CHECK("-c", 2, parameterLength)) { else if(PARAMETER_CHECK("-c", 2, parameterLength)) {
writeCount = true; writeCount = true;
} }
...@@ -143,6 +147,21 @@ int main(int argc, char* argv[]) { ...@@ -143,6 +147,21 @@ int main(int argc, char* argv[]) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -c, not both." << endl << "*****" << endl; cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -c, not both." << endl << "*****" << endl;
showHelp = true; showHelp = true;
} }
if (writeCount && writeOverlap) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -wo, not both." << endl << "*****" << endl;
showHelp = true;
}
if (writeA && writeOverlap) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -wa OR -wo, not both." << endl << "*****" << endl;
showHelp = true;
}
if (writeB && writeOverlap) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -wo, not both." << endl << "*****" << endl;
showHelp = true;
}
if (reciprocalFraction && !haveFraction) { if (reciprocalFraction && !haveFraction) {
cerr << endl << "*****" << endl << "*****ERROR: If using -r, you need to define -f." << endl << "*****" << endl; cerr << endl << "*****" << endl << "*****ERROR: If using -r, you need to define -f." << endl << "*****" << endl;
...@@ -153,11 +172,21 @@ int main(int argc, char* argv[]) { ...@@ -153,11 +172,21 @@ int main(int argc, char* argv[]) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -c, not both." << endl << "*****" << endl; cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -c, not both." << endl << "*****" << endl;
showHelp = true; showHelp = true;
} }
if (anyHit && writeB) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -wb, not both." << endl << "*****" << endl;
showHelp = true;
}
if (anyHit && writeOverlap) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -wo, not both." << endl << "*****" << endl;
showHelp = true;
}
if (!showHelp) { if (!showHelp) {
BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeA, writeB, BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeA, writeB, writeOverlap,
overlapFraction, noHit, writeCount, forceStrand, overlapFraction, noHit, writeCount, forceStrand,
reciprocalFraction, inputIsBam, outputIsBam); reciprocalFraction, inputIsBam, outputIsBam);
bi->DetermineBedInput(); bi->DetermineBedInput();
...@@ -186,18 +215,24 @@ void ShowHelp(void) { ...@@ -186,18 +215,24 @@ void ShowHelp(void) {
cerr << "\t\tis to write output in BAM when using -abam." << endl << endl; cerr << "\t\tis to write output in BAM when using -abam." << endl << endl;
cerr << "\t-wa\t" << "Write the original entry in A for each overlap." << endl << endl; cerr << "\t-wa\t" << "Write the original entry in A for each overlap." << endl << endl;
cerr << "\t-wb\t" << "Write the original entry in B for each overlap." << endl; cerr << "\t-wb\t" << "Write the original entry in B for each overlap." << endl;
cerr << "\t\t- Useful for knowing _what_ A overlaps. Restricted by -f." << endl << endl; cerr << "\t\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r." << endl << endl;
cerr << "\t-wo\t" << "Write the original A and B entries plus the number of base" << endl;
cerr << "\t\tpairs of overlap between the two features." << endl;
cerr << "\t\t- Overlaps restricted by -f and -r." << endl << endl;
cerr << "\t-u\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl; cerr << "\t-u\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;
cerr << "\t\t- In other words, just report the fact >=1 hit was found." << endl << endl; cerr << "\t\t- In other words, just report the fact >=1 hit was found." << endl;
cerr << "\t\t- Overlaps restricted by -f and -r." << endl << endl;
cerr << "\t-c\t" << "For each entry in A, report the number of overlaps with B." << endl; cerr << "\t-c\t" << "For each entry in A, report the number of overlaps with B." << endl;
cerr << "\t\t- Reports 0 for A entries that have no overlap with B." << endl; cerr << "\t\t- Reports 0 for A entries that have no overlap with B." << endl;
cerr << "\t\t- Overlaps restricted by -f." << endl << endl; cerr << "\t\t- Overlaps restricted by -f and -r." << endl << endl;
cerr << "\t-v\t" << "Only report those entries in A that have _no overlaps_ with B." << endl; cerr << "\t-v\t" << "Only report those entries in A that have _no overlaps_ with B." << endl;
cerr << "\t\t- Similar to \"grep -v.\"" << endl << endl; cerr << "\t\t- Similar to \"grep -v\" (an homage)." << endl << endl;
cerr << "\t-f\t" << "Minimum overlap required as a fraction of A." << endl; cerr << "\t-f\t" << "Minimum overlap required as a fraction of A." << endl;
cerr << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl; cerr << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl;
...@@ -206,7 +241,6 @@ void ShowHelp(void) { ...@@ -206,7 +241,6 @@ void ShowHelp(void) {
cerr << "\t-r\t" << "Require that the fraction overlap be reciprocal for A and B." << endl; cerr << "\t-r\t" << "Require that the fraction overlap be reciprocal for A and B." << endl;
cerr << "\t\t- In other words, if -f is 0.90 and -r is used, this requires" << endl; cerr << "\t\t- In other words, if -f is 0.90 and -r is used, this requires" << endl;
cerr << "\t\t that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl; cerr << "\t\t that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl;
cerr << "\t-s\t" << "Force strandedness. That is, only report hits in B that" << endl; cerr << "\t-s\t" << "Force strandedness. That is, only report hits in B that" << endl;
cerr << "\t\toverlap A on the same strand." << endl; cerr << "\t\toverlap A on the same strand." << endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment