Commit 29fe57fc by Neil Kindlon

### Added closest -fu and -fd, updated help

parent f9fbaac7
 ... ... @@ -74,10 +74,19 @@ void closest_help(void) { cerr << "\t-iu\t" << "Ignore features in B that are upstream of features in A." << endl; cerr << "\t\tThis option requires -D and follows its orientation" << endl; cerr << "\t\trules for determining what is \"upstream\"." << endl; cerr << "\t\trules for determining what is \"upstream\"." << endl << endl; cerr << "\t-id\t" << "Ignore features in B that are downstream of features in A." << endl; cerr << "\t\tThis option requires -D and follows its orientation" << endl; cerr << "\t\trules for determining what is \"downstream\"." << endl; cerr << "\t\trules for determining what is \"downstream\"." << endl << endl; cerr << "\t-fu\t" << "Choose first from features in B that are upstream of features in A." << endl; cerr << "\t\tThis option requires -D and follows its orientation" << endl; cerr << "\t\trules for determining what is \"upstream\"." << endl << endl; cerr << "\t-fd\t" << "Choose first from features in B that are downstream of features in A." << endl; cerr << "\t\tThis option requires -D and follows its orientation" << endl; cerr << "\t\trules for determining what is \"downstream\"." << endl << endl; cerr << "\t-t\t" << "How ties for closest feature are handled. This occurs when two" << endl; cerr << "\t\tfeatures in B have exactly the same \"closeness\" with A." << endl; ... ... @@ -92,6 +101,9 @@ void closest_help(void) { cerr << "\t\t- \"all\" Report closest records among all databases." << endl << endl; cerr << "\t-k\t" << "Report the k closest hits. Default is 1. If tieMode = \"all\", " << endl; cerr << "\t\t- all ties will still be reported." << endl << endl; cerr << "\t-N\t" << "Require that the query and the closest hit have different names." << endl; cerr << "\t\tFor BED, the 4th column is compared." << endl << endl; ... ...
 ... ... @@ -82,6 +82,9 @@ void map_help(void) { cerr << "\t-header\t" << "Print the header from the A file prior to results." << endl << endl; cerr << "\t-prec\t" << "Sets the decimal precision for output (Default: 5)" << endl << endl; cerr << "Notes: " << endl; cerr << "\t(1) Both input files must be sorted by chrom, then start." << endl << endl; ... ...
 ... ... @@ -69,6 +69,8 @@ void merge_help(void) { cerr << "\t-c\t" << "Specify columns from the input file to operate upon (see -o option, below)." << endl; cerr << "\t\tMultiple columns can be specified in a comma-delimited list." << endl << endl; cerr << "\t-prec\t" << "Sets the decimal precision for output (Default: 5)" << endl << endl; KeyListOpsHelp(); cerr << "Notes: " << endl; ... ...
 ... ... @@ -9,6 +9,7 @@ #include #include //#include "FormatGuess.h" #include #include "PushBackStreamBuf.h" #include "InflateStreamBuf.h" ... ... @@ -29,14 +30,29 @@ int nek_sandbox1_main2(int argc,char** argv); int nek_sandbox1_main(int argc,char** argv) { multimap myMap; myMap.insert(pair(1, 'a')); myMap.insert(pair(2, 'b')); myMap.insert(pair(3, 'c')); myMap.insert(pair(3, 'x')); myMap.insert(pair(3, 'y')); myMap.insert(pair(3, 'z')); myMap.insert(pair(4, 'd')); cout << "Multimap contains:" << endl; for (multimap::iterator iter = myMap.begin(); iter != myMap.end(); iter++) { cout << iter-> first << ":" << iter->second << endl; } cout << "Size is " << myMap.size() << "." << endl; // for (int i=0; i < 4000; i++) { // cout << "# This is line " << i << " of a file with a large header." << endl; // } // return 0; if (argc < 2) { cerr << "Error: Need one input file. Use \"-\" for stdin." << endl; } // if (argc < 2) { // cerr << "Error: Need one input file. Use \"-\" for stdin." << endl; // } // ifstream inFileStream(argv[1]); // static const int BUF_SIZE = 8192; // BamTools::Internal::BgzfStream bgStream; ... ...
 ... ... @@ -52,6 +52,7 @@ ContextBase::ContextBase() _seed(0), _forwardOnly(false), _reverseOnly(false), _nameCheckDisabled(false), _hasColumnOpsMethods(false), _keyListOps(NULL), _desiredStrand(FileRecordMergeMgr::ANY_STRAND), ... ... @@ -197,6 +198,9 @@ bool ContextBase::parseCmdArgs(int argc, char **argv, int skipFirstArgs) { else if (strcmp(_argv[_i], "-sortout") == 0) { if (!handle_sortout()) return false; } else if (strcmp(_argv[_i], "-nonamecheck") == 0) { if (!handle_sortout()) return false; } } return true; ... ... @@ -534,6 +538,13 @@ bool ContextBase::handle_sortout() return true; } bool ContextBase::handle_nonamecheck() { setNameCheckDisabled(true); markUsed(_i - _skipFirstArgs); return true; } void ContextBase::setColumnOpsMethods(bool val) { if (val && !_hasColumnOpsMethods) { ... ... @@ -603,6 +614,8 @@ bool ContextBase::parseIoBufSize(QuickString bufStr) } void ContextBase::testNameConventions(const Record *record) { if (getNameCheckDisabled()) return; int fileIdx = record->getFileIdx(); // ... ... @@ -635,7 +648,7 @@ void ContextBase::testNameConventions(const Record *record) { // bool zeroVal = record->hasLeadingZeroInChromName(); bool zeroVal = record->hasLeadingZeroInChromName(hasChr); testChrVal = fileHasLeadingZeroInChromNames(fileIdx); if (testChrVal == UNTESTED) { _fileHasLeadingZeroInChromNames[fileIdx] = zeroVal ? YES : NO; ... ...
 ... ... @@ -116,6 +116,8 @@ public: virtual bool getUseFullBamTags() const { return _useFullBamTags; } virtual void setUseFullBamTags(bool val) { _useFullBamTags = val; } virtual bool getNameCheckDisabled() const { return _nameCheckDisabled; } virtual void setNameCheckDisabled(bool val) { _nameCheckDisabled = val; } // METHODS FOR PROGRAMS WITH USER_SPECIFIED NUMBER // OF OUTPUT RECORDS. ... ... @@ -207,6 +209,7 @@ protected: int _seed; bool _forwardOnly; bool _reverseOnly; bool _nameCheckDisabled; //Members for column operations bool _hasColumnOpsMethods; ... ... @@ -265,6 +268,7 @@ protected: virtual bool handle_null(); virtual bool handle_delim(); virtual bool handle_sortout(); virtual bool handle_nonamecheck(); bool handle_prec(); bool parseIoBufSize(QuickString bufStr); ... ...
 ... ... @@ -18,7 +18,8 @@ ContextClosest::ContextClosest() _diffNames(false), _tieMode(ALL_TIES), _strandedDistMode(REF_DIST), _multiDbMode(EACH_DB) _multiDbMode(EACH_DB), _numClosestHitsWanted(1) { // closest requires sorted input setSortedInput(true); ... ... @@ -66,7 +67,13 @@ bool ContextClosest::parseCmdArgs(int argc, char **argv, int skipFirstArgs){ else if (strcmp(_argv[_i], "-id") == 0) { if (!handle_id()) return false; } else if (strcmp(_argv[_i], "-N") == 0) { else if (strcmp(_argv[_i], "-fu") == 0) { if (!handle_fu()) return false; } else if (strcmp(_argv[_i], "-fd") == 0) { if (!handle_fd()) return false; } else if (strcmp(_argv[_i], "-N") == 0) { if (!handle_N()) return false; } else if (strcmp(_argv[_i], "-t") == 0) { ... ... @@ -75,6 +82,9 @@ bool ContextClosest::parseCmdArgs(int argc, char **argv, int skipFirstArgs){ else if (strcmp(_argv[_i], "-mdb") == 0) { if (!handle_mdb()) return false; } else if (strcmp(_argv[_i], "-k") == 0) { if (!handle_k()) return false; } } return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs); ... ... @@ -106,6 +116,25 @@ bool ContextClosest::isValidState(){ return false; } if ((_forceUpstream || _forceDownstream) && ! _haveStrandedDistMode) { _errorMsg = "\n*****\n*****ERROR: When requesting -fu or -fd, you also need to specify -D.\n*****\n"; return false; } if (_ignoreUpstream && _forceUpstream) { _errorMsg = "\n*****\n*****ERROR: Can't both ignore upstream and force upstream.\n*****\n"; return false; } if (_ignoreDownstream && _forceDownstream) { _errorMsg = "\n*****\n*****ERROR: Can't both ignore downstream and force downstream.\n*****\n"; return false; } if (_sortOutput && _reportDistance) { _errorMsg = "\n*****\n*****ERROR: -sortout (sorted output) is not valid with distance reporting.\n*****\n"; return false; } return true; } ... ... @@ -162,6 +191,18 @@ bool ContextClosest::handle_id() { return true; } bool ContextClosest::handle_fu() { _forceUpstream = true; markUsed(_i - _skipFirstArgs); return true; } bool ContextClosest::handle_fd() { _forceDownstream = true; markUsed(_i - _skipFirstArgs); return true; } bool ContextClosest::handle_N() { _diffNames = true; markUsed(_i - _skipFirstArgs); ... ... @@ -220,3 +261,21 @@ bool ContextClosest::handle_mdb() _errorMsg = "*****ERROR: Request \"each\" or \"last\" for Multiple Database Mode (-mdb)"; return false; } bool ContextClosest::handle_k() { if ((_i+1) < _argc) { if (isNumeric(_argv[_i+1])) { _numClosestHitsWanted = atoi(_argv[_i+1]); if (_numClosestHitsWanted > 0) { markUsed(_i - _skipFirstArgs); _i++; markUsed(_i - _skipFirstArgs); return true; } } } _errorMsg = "\n***** ERROR: -k option must be followed by a postive integer value *****"; return false; }
 ... ... @@ -23,10 +23,13 @@ public: bool ignoreOverlaps() const { return _ignoreOverlaps; } bool ignoreUpstream() const { return _ignoreUpstream; } bool ignoreDownstream() const { return _ignoreDownstream; } bool forceUpstream() const { return _forceUpstream; } bool forceDownstream() const { return _forceDownstream; } bool reportDistance() const { return _reportDistance; } bool signDistance() const { return _signDistance; } bool hasStrandedDistMode() const { return _haveStrandedDistMode; } bool diffNames() const { return _diffNames; } int getNumClosestHitsWanted() const { return _numClosestHitsWanted; } typedef enum { FIRST_TIE, LAST_TIE, ALL_TIES} tieModeType; tieModeType getTieMode() const { return _tieMode; } ... ... @@ -42,6 +45,8 @@ private: bool _ignoreOverlaps; bool _ignoreUpstream; bool _ignoreDownstream; bool _forceUpstream; bool _forceDownstream; bool _reportDistance; bool _signDistance; bool _haveStrandedDistMode; ... ... @@ -49,16 +54,19 @@ private: tieModeType _tieMode; strandedDistanceModeType _strandedDistMode; multiDbModeType _multiDbMode; int _numClosestHitsWanted; bool handle_d(); bool handle_D(); bool handle_io(); bool handle_iu(); bool handle_id(); bool handle_fu(); bool handle_fd(); bool handle_N(); bool handle_t(); bool handle_mdb(); bool handle_k(); }; ... ...
 ... ... @@ -258,17 +258,11 @@ bool Record::hasChrInChromName() const { (str[2] == 'r' || str[2] == 'R')); } bool Record::hasLeadingZeroInChromName() const { bool Record::hasLeadingZeroInChromName(bool chrKnown) const { const char *str = _chrName.c_str(); size_t i=0; while (i < _chrName.size() && !isdigit(str[i])) { if (str[i] == '_') return false; //ignore //leading zero after underscore, as some of the //random and hap chroms are names that way. i++; } return (i < _chrName.size() && str[i] == '0'); // only check to see if the digit zero follows the occurance of // "chr", case insensitive. return (_chrName.size() >= 4 && str[3] == '0' && (chrKnown || hasChrInChromName())); } void Record::print(FILE *fp, bool newline) const { ... ...
 ... ... @@ -137,7 +137,7 @@ public: // virtual static bool isNumericField(int fieldNum) const = 0; bool hasChrInChromName() const; bool hasLeadingZeroInChromName() const; bool hasLeadingZeroInChromName(bool chrKnown = false) const; protected: ... ...
This diff is collapsed.
 ... ... @@ -9,9 +9,66 @@ #define CLOSESWEEP_H_ #include "NewChromsweep.h" #include #include class ContextClosest; class distanceTuple { public: distanceTuple() : _dist(0), _rec(NULL), _isNeg(false) {} distanceTuple(int dist, const Record *rec, bool isNeg = false) : _dist(dist), _rec(rec), _isNeg(isNeg) {} // bool operator < (const distanceTuple & other) const { return (_dist < other._dist); } int _dist; const Record *_rec; bool _isNeg; }; class DistanceTupleSortAscFunctor { public: bool operator()(const distanceTuple & d1, const distanceTuple & d2) const { return ((d1._dist < d2._dist) ? true : (d1._dist == d2._dist ? (d1._isNeg && !d2._isNeg) : false)) ; } }; class RecDistList { public: typedef enum { LEFT, OVERLAP, RIGHT } chromDirType; RecDistList(int maxSize) : _maxUniqueAllowed(maxSize) {} ~RecDistList() { clear(); } bool empty() const { return _recs.empty(); } void clear(); int uniqueSize() const { return _recs.size(); } size_t totalSize() const { return _totalRecs; } bool addRec(int dist, const Record *, chromDirType chromDir); bool exists(int dist) const { return (_recs.find(dist) != _recs.end()); } int furtherestDistance() const { return _recs.rbegin()->first; } typedef vector >elemsType; typedef map distRecsType; typedef distRecsType::iterator iterType; typedef distRecsType::reverse_iterator revIterType; typedef distRecsType::const_iterator constIterType; typedef distRecsType::const_reverse_iterator constRevIterType; int getMaxDist() const { return _recs.empty() ? 0 : _recs.rbegin()->first; } constIterType begin() const { return _recs.begin(); } constIterType end() const { return _recs.end(); } int currDist(constIterType iter) const { return iter->first; } size_t currNumElems(constIterType iter) const { return iter->second->size(); } const Record *firstElem(constIterType iter) const { return (iter->second->at(0)).second; } const Record *lastElem(constIterType iter) const { return (iter->second->at(iter->second->size()-1)).second; } const elemsType *allElems(constIterType iter) const { return iter->second; } int getMaxLeftEndPos() const; private: void insert(int dist, const Record *, chromDirType chromDir); distRecsType _recs; int _maxUniqueAllowed; int _totalRecs; }; class CloseSweep : public NewChromSweep { public: CloseSweep(ContextClosest *context); ... ... @@ -21,13 +78,10 @@ public: private: ContextClosest *_context; typedef vector distRecVecType; vector _minUpstreamRecs; vector _minUpstreamDist; vector _minDownstreamRecs; vector _minDownstreamDist; vector _overlapRecs; int _kClosest; // how many closest hits we want to each query. vector _minUpstreamRecs; vector _minDownstreamRecs; vector _overlapRecs; vector _maxPrevLeftClosestEndPos; vector _maxPrevLeftClosestEndPosReverse; ... ... @@ -48,9 +102,18 @@ private: void finalizeSelections(int dbIdx, RecordKeyVector &retList); void checkMultiDbs(RecordKeyVector &retList); void setLeftClosestEndPos(int dbIdx, const Record *rec); typedef enum { LEFT, OVERLAP, RIGHT } chromDirType; typedef enum { UPSTREAM, INTERSECT, DOWNSTREAM } streamDirType; void setLeftClosestEndPos(int dbIdx); bool beforeLeftClosestEndPos(int dbIdx, const Record *rec); void clearClosestEndPos(int dbIdx); bool canStopScan(const Record *cacheRec, bool ignored, streamDirType streamDir); int addRecsToRetList(const RecDistList::elemsType *recs, int currDist, RecordKeyVector &retList); void addSingleRec(const Record *rec, int currDist, int &hitsUsed, RecordKeyVector &retList); rateOvlpType tryToAddRecord(const Record *cacheRec, int dist, int dbIdx, bool &stopScanning, chromDirType chromDir, streamDirType streamDir); bool purgePointException(int dbIdx); }; ... ...
 ... ... @@ -129,7 +129,7 @@ void NewChromSweep::scanCache(int dbIdx, RecordKeyVector &retList) { if (_currQueryRec->sameChrom(cacheRec) && !_currQueryRec->after(cacheRec)) { if (intersects(_currQueryRec, cacheRec)) { retList.push_back(cacheRec); } else break; // cacheRec is after the query rec, stop scanning. } else if (cacheRec->after(_currQueryRec)) break; // cacheRec is after the query rec, stop scanning. cacheIter = _caches[dbIdx].next(); } else { ... ... @@ -150,6 +150,7 @@ void NewChromSweep::clearCache(int dbIdx) } void NewChromSweep::masterScan(RecordKeyVector &retList) { for (int i=0; i < _numDBs; i++) { if (dbFinished(i) || chromChange(i, retList, true)) { continue; ... ...
 ... ... @@ -666,3 +666,8 @@ rm exp obs cd sortAndNaming bash test-sort-and-naming.sh cd .. cd kclosest bash test-kclosest.sh cd ..
 ... ... @@ -502,7 +502,25 @@ \$BT intersect -a a.bed -b b.bed -f 1.00001 2>&1 > /dev/null | cat - | head -2 | check exp obs rm exp obs ##################################################################bug167_strandSweep.bed # Test that -s works with chromsweep ################################################################## echo " intersect.t41...\c" echo \ "22" > exp \$BT intersect -a bug167_strandSweep.bed -b bug167_strandSweep.bed -sorted -s -wa -wb | wc -l > obs check exp obs rm exp obs ##################################################################bug167_strandSweep.bed # Test that -S works with chromsweep ################################################################## echo " intersect.t42...\c" echo \ "20" > exp \$BT intersect -a bug167_strandSweep.bed -b bug167_strandSweep.bed -sorted -S -wa -wb | wc -l > obs check exp obs rm exp obs rm one_block.bam two_blocks.bam three_blocks.bam ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!