Skip to content
Snippets Groups Projects
Commit f2433c5c authored by Aaron's avatar Aaron
Browse files

Added -s, -S, and -names to tagBam.

parent 9b61515e
No related branches found
No related tags found
No related merge requests found
......@@ -15,13 +15,15 @@
// build
TagBam::TagBam(const string &bamFile, const vector<string> &annoFileNames,
const vector<string> &annoLables, const string &tag,
bool forceStrand, float overlapFraction) :
bool useNames, bool sameStrand, bool diffStrand, float overlapFraction):
_bamFile(bamFile),
_annoFileNames(annoFileNames),
_annoLabels(annoLables),
_tag(tag),
_forceStrand(forceStrand),
_useNames(useNames),
_sameStrand(sameStrand),
_diffStrand(diffStrand),
_overlapFraction(overlapFraction)
{}
......@@ -50,10 +52,6 @@ void TagBam::CloseAnnoFiles() {
}
}
bool TagBam::FindOneOrMoreOverlap(const BED &a, BedFile *bedFile) {
return bedFile->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand,
_forceStrand, _overlapFraction);
}
void TagBam::Tag() {
......@@ -77,14 +75,15 @@ void TagBam::Tag() {
// rip through the BAM file and test for overlaps with each annotation file.
BamAlignment al;
vector<BED> hits;
while (reader.GetNextAlignment(al)) {
if (al.IsMapped() == true) {
BED a;
a.chrom = refs.at(al.RefID).RefName;
a.start = al.Position;
a.end = al.GetEndPosition(false, false);
if (al.IsReverseStrand()) a.strand = "-";
a.strand = "+";
if (al.IsReverseStrand()) a.strand = "-";
ostringstream annotations;
// annotate the BAM file based on overlaps with the annotation files.
......@@ -92,9 +91,23 @@ void TagBam::Tag() {
{
// grab the current annotation file.
BedFile *anno = _annoFiles[i];
// add the label for this annotation file to tag if there is overlap
if (FindOneOrMoreOverlap(a, anno)) {
annotations << _annoLabels[i] << ";";
if (!_useNames) {
// add the label for this annotation file to tag if there is overlap
if (anno->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _sameStrand, _diffStrand, _overlapFraction))
{
annotations << _annoLabels[i] << ";";
}
}
// use the name field from the annotation files to populate tag
else {
anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand);
for (size_t i = 0; i < hits.size(); ++i) {
annotations << hits[i].name;
if (i < hits.size() - 1) annotations << ",";
}
if (hits.size() > 0) annotations << ";";
hits.clear();
}
}
// were there any overlaps with which to make a tag?
......
......@@ -41,7 +41,7 @@ public:
// constructor
TagBam(const string &bamFile, const vector<string> &annoFileNames,
const vector<string> &annoLabels, const string &tag,
bool forceStrand, float overlapFraction);
bool useNames, bool sameStrand, bool diffStrand, float overlapFraction);
// destructor
~TagBam(void);
......@@ -61,8 +61,12 @@ private:
BedFile *_bed;
vector<BedFile*> _annoFiles;
// should we use the name field from the annotation files?
bool _useNames;
// do we care about strandedness when tagging?
bool _forceStrand;
bool _sameStrand;
bool _diffStrand;
float _overlapFraction;
// private function for reporting coverage information
......@@ -71,7 +75,6 @@ private:
void OpenAnnoFiles();
void CloseAnnoFiles();
bool FindOneOrMoreOverlap(const BED &a, BedFile *bedFile);
};
#endif /* TAGBAM_H */
......@@ -36,7 +36,9 @@ int main(int argc, char* argv[]) {
// parm flags
bool haveTag = false;
bool haveFraction = false;
bool forceStrand = false;
bool useNames = false;
bool sameStrand = false;
bool diffStrand = false;
bool haveBam = false;
bool haveFiles = false;
bool haveLabels = false;
......@@ -99,8 +101,14 @@ int main(int argc, char* argv[]) {
i--;
}
}
else if (PARAMETER_CHECK("-names", 6, parameterLength)) {
useNames = true;
}
else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
forceStrand = true;
sameStrand = true;
}
else if (PARAMETER_CHECK("-S", 2, parameterLength)) {
diffStrand = true;
}
else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
if ((i+1) < argc) {
......@@ -123,8 +131,20 @@ int main(int argc, char* argv[]) {
}
// make sure we have both input files
if (!haveBam || !haveFiles || !haveLabels) {
cerr << endl << "*****" << endl << "*****ERROR: Need -i, -files, and -labels. " << endl << "*****" << endl;
if (!haveBam || !haveFiles) {
cerr << endl << "*****" << endl << "*****ERROR: Need -i, -files" << endl << "*****" << endl;
showHelp = true;
}
if (!useNames && !haveLabels) {
cerr << endl << "*****" << endl << "*****ERROR: Need -labels or -names" << endl << "*****" << endl;
showHelp = true;
}
if (sameStrand && diffStrand) {
cerr << endl << "*****" << endl << "*****ERROR: Use -s or -S, not both. " << endl << "*****" << endl;
showHelp = true;
}
if (haveLabels && useNames) {
cerr << endl << "*****" << endl << "*****ERROR: Use -labels or -names, not both. " << endl << "*****" << endl;
showHelp = true;
}
if (haveTag && tag.size() > 2) {
......@@ -133,7 +153,7 @@ int main(int argc, char* argv[]) {
}
if (!showHelp) {
TagBam *ba = new TagBam(bamFile, inputFiles, inputLabels, tag, forceStrand, overlapFraction);
TagBam *ba = new TagBam(bamFile, inputFiles, inputLabels, tag, useNames, sameStrand, diffStrand, overlapFraction);
ba->Tag();
delete ba;
return 0;
......@@ -156,7 +176,10 @@ void ShowHelp(void) {
cerr << "Options: " << endl;
cerr << "\t-s\t" << "Force strandedness. That is, only tag alignments that have the same" << endl;
cerr << "\t-s\t" << "Require overlaps on the same strand. That is, only tag alignments that have the same" << endl;
cerr << "\t\tstrand as a feature in the annotation file(s)." << endl << endl;
cerr << "\t-S\t" << "Require overlaps on the opposite strand. That is, only tag alignments that have the opposite" << endl;
cerr << "\t\tstrand as a feature in the annotation file(s)." << endl << endl;
cerr << "\t-f\t" << "Minimum overlap required as a fraction of the alignment." << endl;
......@@ -166,5 +189,9 @@ void ShowHelp(void) {
cerr << "\t-tag\t" << "Dictate what the tag should be. Default is YB." << endl;
cerr << "\t\t- STRING (two characters, e.g., YK)" << endl << endl;
cerr << "\t-names\t" << "Use the name field from the annotation files to populate tags." << endl;
cerr << "\t\tBy default, the -labels values are used." << endl << endl;
exit(1);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment