Skip to content
Snippets Groups Projects
Commit f82d2d22 authored by Aaron's avatar Aaron
Browse files

Added intervals option to tagBam.

parent a43e8f68
No related branches found
No related tags found
No related merge requests found
...@@ -15,7 +15,8 @@ ...@@ -15,7 +15,8 @@
// build // build
TagBam::TagBam(const string &bamFile, const vector<string> &annoFileNames, TagBam::TagBam(const string &bamFile, const vector<string> &annoFileNames,
const vector<string> &annoLables, const string &tag, const vector<string> &annoLables, const string &tag,
bool useNames, bool useScores, bool sameStrand, bool diffStrand, float overlapFraction): bool useNames, bool useScores, bool useIntervals,
bool sameStrand, bool diffStrand, float overlapFraction):
_bamFile(bamFile), _bamFile(bamFile),
_annoFileNames(annoFileNames), _annoFileNames(annoFileNames),
...@@ -23,6 +24,7 @@ TagBam::TagBam(const string &bamFile, const vector<string> &annoFileNames, ...@@ -23,6 +24,7 @@ TagBam::TagBam(const string &bamFile, const vector<string> &annoFileNames,
_tag(tag), _tag(tag),
_useNames(useNames), _useNames(useNames),
_useScores(useScores), _useScores(useScores),
_useIntervals(useIntervals),
_sameStrand(sameStrand), _sameStrand(sameStrand),
_diffStrand(diffStrand), _diffStrand(diffStrand),
_overlapFraction(overlapFraction) _overlapFraction(overlapFraction)
...@@ -94,7 +96,7 @@ void TagBam::Tag() { ...@@ -94,7 +96,7 @@ void TagBam::Tag() {
// grab the current annotation file. // grab the current annotation file.
BedFile *anno = _annoFiles[i]; BedFile *anno = _annoFiles[i];
if (!_useNames && !_useScores) { if (!_useNames && !_useScores && !_useIntervals) {
// add the label for this annotation file to tag if there is overlap // add the label for this annotation file to tag if there is overlap
if (anno->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _sameStrand, _diffStrand, _overlapFraction)) if (anno->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _sameStrand, _diffStrand, _overlapFraction))
{ {
...@@ -102,7 +104,7 @@ void TagBam::Tag() { ...@@ -102,7 +104,7 @@ void TagBam::Tag() {
} }
} }
// use the score field // use the score field
else if (!_useNames && _useScores) { else if (!_useNames && _useScores && !_useIntervals) {
anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand); anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand);
for (size_t i = 0; i < hits.size(); ++i) { for (size_t i = 0; i < hits.size(); ++i) {
annotations << hits[i].score; annotations << hits[i].score;
...@@ -112,11 +114,27 @@ void TagBam::Tag() { ...@@ -112,11 +114,27 @@ void TagBam::Tag() {
hits.clear(); hits.clear();
} }
// use the name field from the annotation files to populate tag // use the name field from the annotation files to populate tag
else if (_useNames && !_useScores) { else if (_useNames && !_useScores && !_useIntervals) {
anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand); anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand);
for (size_t i = 0; i < hits.size(); ++i) { for (size_t j = 0; j < hits.size(); ++i) {
annotations << hits[i].name; annotations << hits[j].name;
if (i < hits.size() - 1) annotations << ","; if (j < hits.size() - 1) annotations << ",";
}
if (hits.size() > 0) annotations << ";";
hits.clear();
}
// use the name field from the annotation files to populate tag
else if (!_useNames && !_useScores && _useIntervals) {
anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand);
for (size_t j = 0; j < hits.size(); ++j) {
annotations << _annoLabels[i] << ":" <<
hits[j].chrom << ":" <<
hits[j].start << "-" <<
hits[j].end << "," <<
hits[j].name << "," <<
hits[j].score << "," <<
hits[j].strand;
if (j < hits.size() - 1) annotations << ",";
} }
if (hits.size() > 0) annotations << ";"; if (hits.size() > 0) annotations << ";";
hits.clear(); hits.clear();
...@@ -126,11 +144,11 @@ void TagBam::Tag() { ...@@ -126,11 +144,11 @@ void TagBam::Tag() {
if (annotations.str().size() > 0) { if (annotations.str().size() > 0) {
al.AddTag(_tag, "Z", annotations.str().substr(0, annotations.str().size() - 1)); // get rid of the last ";" al.AddTag(_tag, "Z", annotations.str().substr(0, annotations.str().size() - 1)); // get rid of the last ";"
} }
writer.SaveAlignment(al);
} }
writer.SaveAlignment(al);
} }
reader.Close(); reader.Close();
writer.Close();
// close the annotations files; // close the annotations files;
CloseAnnoFiles(); CloseAnnoFiles();
} }
...@@ -41,7 +41,7 @@ public: ...@@ -41,7 +41,7 @@ public:
// constructor // constructor
TagBam(const string &bamFile, const vector<string> &annoFileNames, TagBam(const string &bamFile, const vector<string> &annoFileNames,
const vector<string> &annoLabels, const string &tag, const vector<string> &annoLabels, const string &tag,
bool useNames, bool useScores, bool sameStrand, bool useNames, bool useScores, bool useIntervals, bool sameStrand,
bool diffStrand, float overlapFraction); bool diffStrand, float overlapFraction);
// destructor // destructor
...@@ -66,6 +66,7 @@ private: ...@@ -66,6 +66,7 @@ private:
// should we use the name field from the annotation files? // should we use the name field from the annotation files?
bool _useNames; bool _useNames;
bool _useScores; bool _useScores;
bool _useIntervals;
// do we care about strandedness when tagging? // do we care about strandedness when tagging?
bool _sameStrand; bool _sameStrand;
......
...@@ -38,6 +38,7 @@ int main(int argc, char* argv[]) { ...@@ -38,6 +38,7 @@ int main(int argc, char* argv[]) {
bool haveFraction = false; bool haveFraction = false;
bool useNames = false; bool useNames = false;
bool useScores = false; bool useScores = false;
bool useIntervals = false;
bool sameStrand = false; bool sameStrand = false;
bool diffStrand = false; bool diffStrand = false;
bool haveBam = false; bool haveBam = false;
...@@ -109,6 +110,9 @@ int main(int argc, char* argv[]) { ...@@ -109,6 +110,9 @@ int main(int argc, char* argv[]) {
else if (PARAMETER_CHECK("-scores", 7, parameterLength)) { else if (PARAMETER_CHECK("-scores", 7, parameterLength)) {
useScores = true; useScores = true;
} }
else if (PARAMETER_CHECK("-intervals", 10, parameterLength)) {
useIntervals = true;
}
else if (PARAMETER_CHECK("-s", 2, parameterLength)) { else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
sameStrand = true; sameStrand = true;
} }
...@@ -156,13 +160,29 @@ int main(int argc, char* argv[]) { ...@@ -156,13 +160,29 @@ int main(int argc, char* argv[]) {
cerr << endl << "*****" << endl << "*****ERROR: Use -scores or -names, not both. " << endl << "*****" << endl; cerr << endl << "*****" << endl << "*****ERROR: Use -scores or -names, not both. " << endl << "*****" << endl;
showHelp = true; showHelp = true;
} }
if (useScores && useIntervals) {
cerr << endl << "*****" << endl << "*****ERROR: Use -scores or -intervals, not both. " << endl << "*****" << endl;
showHelp = true;
}
if (useNames && useIntervals) {
cerr << endl << "*****" << endl << "*****ERROR: Use -names or -intervals, not both. " << endl << "*****" << endl;
showHelp = true;
}
if (!haveLabels && useIntervals) {
cerr << endl << "*****" << endl << "*****ERROR: Supply -labels when using -intervals. " << endl << "*****" << endl;
showHelp = true;
}
if (haveTag && tag.size() > 2) { if (haveTag && tag.size() > 2) {
cerr << endl << "*****" << endl << "*****ERROR: Custom tags should be at most two characters per the SAM specification. " << endl << "*****" << endl; cerr << endl << "*****" << endl << "*****ERROR: Custom tags should be at most two characters per the SAM specification. " << endl << "*****" << endl;
showHelp = true; showHelp = true;
} }
if (!showHelp) { if (!showHelp) {
TagBam *ba = new TagBam(bamFile, inputFiles, inputLabels, tag, useNames, useScores, sameStrand, diffStrand, overlapFraction); TagBam *ba = new TagBam(bamFile, inputFiles, inputLabels,
tag, useNames, useScores,
useIntervals, sameStrand, diffStrand,
overlapFraction);
ba->Tag(); ba->Tag();
delete ba; delete ba;
return 0; return 0;
...@@ -201,9 +221,11 @@ void ShowHelp(void) { ...@@ -201,9 +221,11 @@ void ShowHelp(void) {
cerr << "\t-names\t" << "Use the name field from the annotation files to populate tags." << endl; cerr << "\t-names\t" << "Use the name field from the annotation files to populate tags." << endl;
cerr << "\t\tBy default, the -labels values are used." << endl << endl; cerr << "\t\tBy default, the -labels values are used." << endl << endl;
cerr << "\t-scores\t" << "A list of 1-based columns for each annotation file" << endl; cerr << "\t-scores\t" << "Use the score field from the annotation files to populate tags." << endl;
cerr << "\t\tin which a color can be found." << endl << endl; cerr << "\t\tBy default, the -labels values are used." << endl << endl;
cerr << "\t-intervals\t" << "Use the full interval (including name, score, and strand) to populate tags." << endl;
cerr << "\t\t\tRequires the -labels option to identify from which file the interval came." << endl << endl;
exit(1); exit(1);
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment