Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
R3
legacy
bedtools2
Commits
2268ee4f
Commit
2268ee4f
authored
Feb 27, 2014
by
nkindlon
Browse files
Refactored KeyListOps, Context, mapFile for KeyListOps re-usability by other tools.
parent
85df2b4a
Changes
47
Hide whitespace changes
Inline
Side-by-side
src/intersectFile/Makefile
View file @
2268ee4f
...
...
@@ -17,6 +17,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I
$(UTILITIES_DIR)
/FileRecordTools/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/FileReaders/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/Records/
\
-I
$(UTILITIES_DIR)
/KeyListOps/
\
-I
$(UTILITIES_DIR)
/RecordOutputMgr/
\
-I
$(UTILITIES_DIR)
/NewChromsweep
\
-I
$(UTILITIES_DIR)
/BinTree
\
...
...
src/mapFile/Makefile
View file @
2268ee4f
...
...
@@ -29,6 +29,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I
$(UTILITIES_DIR)
/FileRecordTools/FileReaders/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/Records/
\
-I
$(UTILITIES_DIR)
/RecordOutputMgr/
\
-I
$(UTILITIES_DIR)
/KeyListOps/
\
-I
$(UTILITIES_DIR)
/NewChromsweep
\
-I
$(UTILITIES_DIR)
/VectorOps
\
-I
$(UTILITIES_DIR)
/BinTree
\
...
...
src/mapFile/mapFile.cpp
View file @
2268ee4f
...
...
@@ -47,74 +47,11 @@ bool FileMap::mapFiles()
RecordKeyList
keySet
(
hitSet
.
getKey
());
RecordKeyList
resultSet
(
hitSet
.
getKey
());
_blockMgr
->
findBlockedOverlaps
(
keySet
,
hitSet
,
resultSet
);
SummarizeHits
(
resultSet
);
_recordOutputMgr
->
printRecord
(
resultSet
.
getKey
(),
_output
);
_recordOutputMgr
->
printRecord
(
resultSet
.
getKey
(),
_context
->
getColumnOpsVal
(
resultSet
));
}
else
{
SummarizeHits
(
hitSet
);
_recordOutputMgr
->
printRecord
(
hitSet
.
getKey
(),
_output
);
_recordOutputMgr
->
printRecord
(
hitSet
.
getKey
(),
_context
->
getColumnOpsVal
(
hitSet
));
}
}
return
true
;
}
void
FileMap
::
ExtractColumnFromHits
(
RecordKeyList
&
hits
)
{
_column_vec
.
clear
();
RecordKeyList
::
const_iterator_type
iter
=
hits
.
begin
();
for
(;
iter
!=
hits
.
end
();
iter
=
hits
.
next
())
{
_column_vec
.
push_back
(
iter
->
value
()
->
getField
(
_context
->
getColumn
()).
str
());
}
}
void
FileMap
::
SummarizeHits
(
RecordKeyList
&
hits
)
{
const
QuickString
&
operation
=
_context
->
getColumnOperation
();
_output
.
clear
();
if
(
hits
.
size
()
==
0
)
{
if
(
operation
==
"count"
||
operation
==
"count_distinct"
)
_output
.
append
(
"0"
);
else
_output
.
append
(
_context
->
getNullValue
().
str
());
return
;
}
_tmp_output
.
str
(
""
);
_tmp_output
.
clear
();
ExtractColumnFromHits
(
hits
);
VectorOps
vo
(
_column_vec
);
if
(
operation
==
"sum"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetSum
();
else
if
(
operation
==
"mean"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMean
();
else
if
(
operation
==
"median"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMedian
();
else
if
(
operation
==
"min"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMin
();
else
if
(
operation
==
"max"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMax
();
else
if
(
operation
==
"absmin"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetAbsMin
();
else
if
(
operation
==
"absmax"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetAbsMax
();
else
if
(
operation
==
"mode"
)
_tmp_output
<<
vo
.
GetMode
();
else
if
(
operation
==
"antimode"
)
_tmp_output
<<
vo
.
GetAntiMode
();
else
if
(
operation
==
"count"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetCount
();
else
if
(
operation
==
"count_distinct"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetCountDistinct
();
else
if
(
operation
==
"collapse"
)
_tmp_output
<<
vo
.
GetCollapse
();
else
if
(
operation
==
"distinct"
)
_tmp_output
<<
vo
.
GetDistinct
();
else
{
cerr
<<
"ERROR: "
<<
operation
<<
" is an unrecognized operation
\n
"
;
exit
(
1
);
}
_output
.
append
(
_tmp_output
.
str
());
}
src/mapFile/mapFile.h
View file @
2268ee4f
...
...
@@ -18,10 +18,11 @@ using namespace std;
#include <iomanip>
#include "VectorOps.h"
#include "RecordKeyList.h"
#include "KeyListOps.h"
#include "ContextMap.h"
using
namespace
std
;
class
ContextMap
;
class
BlockMgr
;
class
RecordOutputMgr
;
...
...
@@ -35,90 +36,8 @@ public:
private:
ContextMap
*
_context
;
Record
*
_queryRec
;
Record
*
_databaseRec
;
BlockMgr
*
_blockMgr
;
RecordOutputMgr
*
_recordOutputMgr
;
vector
<
string
>
_column_vec
;
// vector to hold current column's worth of data
ostringstream
_tmp_output
;
QuickString
_output
;
// placeholder for the results of mapping B to each a in A.
//------------------------------------------------
// private methods
//------------------------------------------------
void
Map
();
void
SummarizeHits
(
RecordKeyList
&
hits
);
void
ExtractColumnFromHits
(
RecordKeyList
&
hits
);
};
#endif
/* MAPFILE_H */
/*
#include "bedFile.h"
#include "chromsweep.h"
#include "VectorOps.h"
#include "api/BamReader.h"
#include "api/BamWriter.h"
#include "api/BamAux.h"
#include "BamAncillary.h"
using namespace BamTools;
#include <vector>
#include <iostream>
#include <algorithm>
#include <numeric>
#include <fstream>
#include <iomanip>
#include <stdlib.h>
using namespace std;
class BedMap {
public:
// constructor
BedMap(string bedAFile, string bedBFile, int column, string operation,
float overlapFraction, bool sameStrand,
bool diffStrand, bool reciprocal,
bool choseNullValue, string nullValue,
bool printHeader);
// destructor
~BedMap(void);
private:
//------------------------------------------------
// private attributes
//------------------------------------------------
string _bedAFile;
string _bedBFile;
int _column;
string _operation;
bool _sameStrand;
bool _diffStrand;
bool _reciprocal;
float _overlapFraction;
string _nullValue;
bool _printHeader;
// instance of a bed file class.
BedFile *_bedA, *_bedB;
vector<string> _column_vec; // vector to hold current column's worth of data
//------------------------------------------------
// private methods
//------------------------------------------------
void Map();
string MapHits(const BED &a, const vector<BED> &hits);
void ExtractColumnFromHits(const vector<BED> &hits);
};
*/
//#endif /* MAPFILE_H */
src/mapFile/mapMain.cpp
View file @
2268ee4f
...
...
@@ -38,144 +38,6 @@ int map_main(int argc, char* argv[]) {
return
retVal
?
0
:
1
;
}
/*
int map_main(int argc, char* argv[]) {
// our configuration variables
bool showHelp = false;
// input files
string bedAFile;
string bedBFile;
int column = 5;
string operation = "sum";
string nullValue = ".";
// input arguments
float overlapFraction = 1E-9;
bool haveBedA = false;
bool haveBedB = false;
bool haveColumn = false;
bool haveOperation = false;
bool haveFraction = false;
bool reciprocalFraction = false;
bool sameStrand = false;
bool diffStrand = false;
bool printHeader = false;
bool choseNullValue = false;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
(PARAMETER_CHECK("--help", 5, parameterLength))) {
showHelp = true;
}
}
if(showHelp) map_help();
// do some parsing (all of these parameters require 2 strings)
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if(PARAMETER_CHECK("-a", 2, parameterLength)) {
if ((i+1) < argc) {
haveBedA = true;
bedAFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
if ((i+1) < argc) {
haveBedB = true;
bedBFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-c", 2, parameterLength)) {
if ((i+1) < argc) {
haveColumn = true;
column = atoi(argv[i + 1]);
i++;
}
}
else if(PARAMETER_CHECK("-o", 2, parameterLength)) {
if ((i+1) < argc) {
haveOperation = true;
operation = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
if ((i+1) < argc) {
haveFraction = true;
overlapFraction = atof(argv[i + 1]);
i++;
}
}
else if(PARAMETER_CHECK("-r", 2, parameterLength)) {
reciprocalFraction = true;
}
else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
sameStrand = true;
}
else if (PARAMETER_CHECK("-S", 2, parameterLength)) {
diffStrand = true;
}
else if (PARAMETER_CHECK("-null", 5, parameterLength)) {
nullValue = argv[i + 1];
choseNullValue = true;
i++;
}
else if(PARAMETER_CHECK("-header", 7, parameterLength)) {
printHeader = true;
}
else {
cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true;
}
}
// make sure we have both input files
if (!haveBedA || !haveBedB) {
cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;
showHelp = true;
}
if (reciprocalFraction && !haveFraction) {
cerr << endl << "*****" << endl << "*****ERROR: If using -r, you need to define -f." << endl << "*****" << endl;
showHelp = true;
}
if (sameStrand && diffStrand) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl;
showHelp = true;
}
if (!showHelp) {
BedMap *bm = new BedMap(bedAFile, bedBFile, column, operation,
overlapFraction, sameStrand,
diffStrand, reciprocalFraction,
choseNullValue, nullValue,
printHeader);
delete bm;
return 0;
}
else {
map_help();
return 0;
}
}
*/
void
map_help
(
void
)
{
cerr
<<
"
\n
Tool: bedtools map (aka mapBed)"
<<
endl
;
...
...
src/nekSandbox1/Makefile
View file @
2268ee4f
...
...
@@ -10,6 +10,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I
$(UTILITIES_DIR)
/FileRecordTools/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/FileReaders
\
-I
$(UTILITIES_DIR)
/FileRecordTools/Records
\
-I
$(UTILITIES_DIR)
/KeyListOps/
\
-I
$(UTILITIES_DIR)
/general
\
-I
$(UTILITIES_DIR)
/NewChromsweep
\
-I
$(UTILITIES_DIR)
/GenomeFile/
\
...
...
src/regressTest/Makefile
View file @
2268ee4f
...
...
@@ -18,6 +18,7 @@ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-I
$(UTILITIES_DIR)
/FileRecordTools/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/FileReaders
\
-I
$(UTILITIES_DIR)
/FileRecordTools/Records
\
-I
$(UTILITIES_DIR)
/KeyListOps/
\
-I
$(UTILITIES_DIR)
/general
# ----------------------------------
...
...
src/sampleFile/Makefile
View file @
2268ee4f
...
...
@@ -17,6 +17,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I
$(UTILITIES_DIR)
/FileRecordTools/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/FileReaders/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/Records/
\
-I
$(UTILITIES_DIR)
/KeyListOps/
\
-I
$(UTILITIES_DIR)
/RecordOutputMgr/
\
-I
$(UTILITIES_DIR)
/version/
...
...
src/utils/BinTree/Makefile
View file @
2268ee4f
...
...
@@ -11,6 +11,7 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \
-I
$(UTILITIES_DIR)
/FileRecordTools/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/FileReaders/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/Records/
\
-I
$(UTILITIES_DIR)
/KeyListOps/
\
-I
$(UTILITIES_DIR)
/BamTools/include
\
-I
$(UTILITIES_DIR)
/BamTools/src/
\
-I
$(UTILITIES_DIR)
/version/
...
...
src/utils/Contexts/ContextBase.cpp
View file @
2268ee4f
...
...
@@ -52,20 +52,16 @@ ContextBase::ContextBase()
_hasConstantSeed
(
false
),
_seed
(
0
),
_forwardOnly
(
false
),
_reverseOnly
(
false
)
_reverseOnly
(
false
),
_hasColumnOpsMethods
(
false
)
{
_programNames
[
"intersect"
]
=
INTERSECT
;
_programNames
[
"sample"
]
=
SAMPLE
;
_programNames
[
"map"
]
=
MAP
;
_validScoreOps
.
insert
(
"sum"
);
_validScoreOps
.
insert
(
"max"
);
_validScoreOps
.
insert
(
"min"
);
_validScoreOps
.
insert
(
"mean"
);
_validScoreOps
.
insert
(
"mode"
);
_validScoreOps
.
insert
(
"median"
);
_validScoreOps
.
insert
(
"antimode"
);
_validScoreOps
.
insert
(
"collapse"
);
if
(
hasColumnOpsMethods
())
{
_keyListOps
=
new
KeyListOps
();
}
}
ContextBase
::~
ContextBase
()
...
...
@@ -79,6 +75,11 @@ ContextBase::~ContextBase()
delete
_files
[
i
];
_files
[
i
]
=
NULL
;
}
if
(
hasColumnOpsMethods
())
{
delete
_keyListOps
;
_keyListOps
=
NULL
;
}
}
bool
ContextBase
::
determineOutputType
()
{
...
...
@@ -176,6 +177,19 @@ bool ContextBase::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
else
if
(
strcmp
(
_argv
[
_i
],
"-seed"
)
==
0
)
{
if
(
!
handle_seed
())
return
false
;
}
else
if
(
strcmp
(
_argv
[
_i
],
"-o"
)
==
0
)
{
if
(
!
handle_o
())
return
false
;
}
else
if
(
strcmp
(
_argv
[
_i
],
"-c"
)
==
0
)
{
if
(
!
handle_c
())
return
false
;
}
else
if
(
strcmp
(
_argv
[
_i
],
"-null"
)
==
0
)
{
if
(
!
handle_null
())
return
false
;
}
else
if
(
strcmp
(
_argv
[
_i
],
"-delim"
)
==
0
)
{
if
(
!
handle_delim
())
return
false
;
}
}
return
true
;
}
...
...
@@ -191,6 +205,12 @@ bool ContextBase::isValidState()
if
(
!
determineOutputType
())
{
return
false
;
}
if
(
hasColumnOpsMethods
())
{
FileRecordMgr
*
dbFile
=
getFile
(
hasIntersectMethods
()
?
_databaseFileIdx
:
0
);
if
(
!
_keyListOps
->
isValidColumnOps
(
dbFile
))
{
return
false
;
}
}
return
true
;
}
...
...
@@ -363,3 +383,85 @@ bool ContextBase::handle_ubam()
markUsed
(
_i
-
_skipFirstArgs
);
return
true
;
}
// Methods specific to column operations.
// for col ops, -c is the string of columns upon which to operate
bool
ContextBase
::
handle_c
()
{
if
(
!
hasColumnOpsMethods
())
{
return
false
;
}
if
((
_i
+
1
)
<
_argc
)
{
_keyListOps
->
setColumns
(
_argv
[
_i
+
1
]);
markUsed
(
_i
-
_skipFirstArgs
);
_i
++
;
markUsed
(
_i
-
_skipFirstArgs
);
}
return
true
;
}
// for col ops, -o is the string of operations to apply to the columns (-c)
bool
ContextBase
::
handle_o
()
{
if
(
!
hasColumnOpsMethods
())
{
return
false
;
}
if
((
_i
+
1
)
<
_argc
)
{
_keyListOps
->
setOperations
(
_argv
[
_i
+
1
]);
markUsed
(
_i
-
_skipFirstArgs
);
_i
++
;
markUsed
(
_i
-
_skipFirstArgs
);
}
return
true
;
}
// for col ops, -null is a NULL vakue assigned
// when no overlaps are detected.
bool
ContextBase
::
handle_null
()
{
if
(
!
hasColumnOpsMethods
())
{
return
false
;
}
if
((
_i
+
1
)
<
_argc
)
{
_keyListOps
->
setNullValue
(
_argv
[
_i
+
1
]);
markUsed
(
_i
-
_skipFirstArgs
);
_i
++
;
markUsed
(
_i
-
_skipFirstArgs
);
}
return
true
;
}
//for col ops, delimStr will appear between each item in
//a collapsed but delimited list.
bool
ContextBase
::
handle_delim
()
{
if
(
!
hasColumnOpsMethods
())
{
return
false
;
}
if
((
_i
+
1
)
<
_argc
)
{
_keyListOps
->
setDelimStr
(
_argv
[
_i
+
1
]);
markUsed
(
_i
-
_skipFirstArgs
);
_i
++
;
markUsed
(
_i
-
_skipFirstArgs
);
}
return
true
;
}
void
ContextBase
::
setColumnOpsMethods
(
bool
val
)
{
_hasColumnOpsMethods
=
val
;
if
(
val
)
{
_keyListOps
=
new
KeyListOps
();
}
}
const
QuickString
&
ContextBase
::
getColumnOpsVal
(
RecordKeyList
&
keyList
)
const
{
if
(
!
hasColumnOpsMethods
())
{
return
_nullStr
;
}
return
_keyListOps
->
getOpVals
(
keyList
);
}
src/utils/Contexts/ContextBase.h
View file @
2268ee4f
...
...
@@ -24,6 +24,7 @@
#include "NewGenomeFile.h"
#include "api/BamReader.h"
#include "api/BamAux.h"
#include "KeyListOps.h"
class
ContextBase
{
...
...
@@ -144,6 +145,13 @@ public:
//methods.
virtual
bool
hasIntersectMethods
()
const
{
return
false
;
}
// determine whether column operations like those used in map
// are available.
void
setColumnOpsMethods
(
bool
val
);
virtual
bool
hasColumnOpsMethods
()
const
{
return
_hasColumnOpsMethods
;
}
const
QuickString
&
getColumnOpsVal
(
RecordKeyList
&
keyList
)
const
;
//methods applicable only to column operations.
protected:
PROGRAM_TYPE
_program
;
...
...
@@ -191,15 +199,11 @@ protected:
int
_bamHeaderAndRefIdx
;
int
_maxNumDatabaseFields
;
bool
_useFullBamTags
;
QuickString
_columnOperation
;
int
_column
;
QuickString
_nullValue
;
bool
_reportCount
;
int
_maxDistance
;
bool
_reportNames
;
bool
_reportScores
;
QuickString
_scoreOp
;
set
<
QuickString
>
_validScoreOps
;
int
_numOutputRecords
;
...
...
@@ -208,6 +212,10 @@ protected:
bool
_forwardOnly
;
bool
_reverseOnly
;
bool
_hasColumnOpsMethods
;
KeyListOps
*
_keyListOps
;
QuickString
_nullStr
;
//placeholder return value when col ops aren't valid.
void
markUsed
(
int
i
)
{
_argsProcessed
[
i
]
=
true
;
}
bool
isUsed
(
int
i
)
const
{
return
_argsProcessed
[
i
];
}
bool
cmdArgsValid
();
...
...
@@ -231,6 +239,11 @@ protected:
virtual
bool
handle_split
();
virtual
bool
handle_sorted
();
virtual
bool
handle_ubam
();
virtual
bool
handle_c
();
virtual
bool
handle_o
();
virtual
bool
handle_null
();
virtual
bool
handle_delim
();
};
#endif
/* CONTEXTBASE_H_ */
src/utils/Contexts/ContextIntersect.h
View file @
2268ee4f
...
...
@@ -21,6 +21,8 @@ public:
//NOTE: Query and database files will only be marked as such by either the
//parseCmdArgs method, or by explicitly setting them.
FileRecordMgr
*
getQueryFile
()
{
return
getFile
(
_queryFileIdx
);
}
FileRecordMgr
*
getDatabaseFile
()
{
return
getFile
(
_databaseFileIdx
);
}
int
getQueryFileIdx
()
const
{
return
_queryFileIdx
;
}
void
setQueryFileIdx
(
int
idx
)
{
_queryFileIdx
=
idx
;
}
int
getDatabaseFileIdx
()
const
{
return
_databaseFileIdx
;
}
...
...
src/utils/Contexts/ContextMap.cpp
View file @
2268ee4f
...
...
@@ -12,13 +12,7 @@ ContextMap::ContextMap()
// map requires sorted input
setSortedInput
(
true
);
setLeftJoin
(
true
);
// default to BED score column
setColumn
(
5
);
// default to "sum"
setColumnOperation
(
"sum"
);
// default to "." as a NULL value
setNullValue
(
'.'
);
setColumnOpsMethods
(
true
);
}
ContextMap
::~
ContextMap
()
...
...
@@ -44,75 +38,22 @@ bool ContextMap::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
if
(
isUsed
(
_i
-
_skipFirstArgs
))
{
continue
;
}
else
if
(
strcmp
(
_argv
[
_i
],
"-o"
)
==
0
)
{
if
(
!
handle_o
())
return
false
;