Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
R3
legacy
bedtools2
Commits
77c1bdad
Commit
77c1bdad
authored
Feb 19, 2014
by
nkindlon
Browse files
Created new KeyListOps class to replace vectorOps. Converted map tool to this.
parent
85df2b4a
Changes
18
Hide whitespace changes
Inline
Side-by-side
Makefile
View file @
77c1bdad
...
...
@@ -78,6 +78,7 @@ UTIL_SUBDIRS = $(SRC_DIR)/utils/bedFile \
$(SRC_DIR)
/utils/gzstream
\
$(SRC_DIR)
/utils/fileType
\
$(SRC_DIR)
/utils/bedFilePE
\
$(SRC_DIR)
/utils/KeyListOps
\
$(SRC_DIR)
/utils/NewChromsweep
\
$(SRC_DIR)
/utils/sequenceUtilities
\
$(SRC_DIR)
/utils/tabFile
\
...
...
src/mapFile/Makefile
View file @
77c1bdad
...
...
@@ -29,6 +29,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I
$(UTILITIES_DIR)
/FileRecordTools/FileReaders/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/Records/
\
-I
$(UTILITIES_DIR)
/RecordOutputMgr/
\
-I
$(UTILITIES_DIR)
/KeyListOps/
\
-I
$(UTILITIES_DIR)
/NewChromsweep
\
-I
$(UTILITIES_DIR)
/VectorOps
\
-I
$(UTILITIES_DIR)
/BinTree
\
...
...
src/mapFile/mapFile.cpp
View file @
77c1bdad
...
...
@@ -21,11 +21,14 @@ const int PRECISION = 21;
FileMap
::
FileMap
(
ContextMap
*
context
)
:
_context
(
context
),
_blockMgr
(
NULL
),
_recordOutputMgr
(
NULL
)
_recordOutputMgr
(
NULL
),
_colOps
(
_context
->
getColOps
())
{
_blockMgr
=
new
BlockMgr
(
_context
->
getOverlapFraction
(),
_context
->
getReciprocal
());
_recordOutputMgr
=
new
RecordOutputMgr
();
_recordOutputMgr
->
init
(
_context
);
_keyListOps
.
setNullValue
(
_context
->
getNullValue
());
_keyListOps
.
setDelimStr
(
_context
->
getDelim
());
}
FileMap
::~
FileMap
(
void
)
{
...
...
@@ -43,78 +46,174 @@ bool FileMap::mapFiles()
}
RecordKeyList
hitSet
;
while
(
sweep
.
next
(
hitSet
))
{
_outputValues
.
clear
();
if
(
_context
->
getObeySplits
())
{
RecordKeyList
keySet
(
hitSet
.
getKey
());
RecordKeyList
resultSet
(
hitSet
.
getKey
());
_blockMgr
->
findBlockedOverlaps
(
keySet
,
hitSet
,
resultSet
);
SummarizeHits
(
resultSet
);
_recordOutputMgr
->
printRecord
(
resultSet
.
getKey
(),
_output
);
calculateOutput
(
resultSet
);
_recordOutputMgr
->
printRecord
(
resultSet
.
getKey
(),
_output
Values
);
}
else
{
SummarizeHits
(
hitSet
);
_recordOutputMgr
->
printRecord
(
hitSet
.
getKey
(),
_output
);
calculateOutput
(
hitSet
);
_recordOutputMgr
->
printRecord
(
hitSet
.
getKey
(),
_output
Values
);
}
}
return
true
;
}
void
FileMap
::
ExtractColumnFromHits
(
RecordKeyList
&
hits
)
{
_column_vec
.
clear
();
RecordKeyList
::
const_iterator_type
iter
=
hits
.
begin
();
for
(;
iter
!=
hits
.
end
();
iter
=
hits
.
next
())
{
_column_vec
.
push_back
(
iter
->
value
()
->
getField
(
_context
->
getColumn
()).
str
());
}
}
void
FileMap
::
SummarizeHits
(
RecordKeyList
&
hits
)
{
const
QuickString
&
operation
=
_context
->
getColumnOperation
();
_output
.
clear
();
if
(
hits
.
size
()
==
0
)
{
if
(
operation
==
"count"
||
operation
==
"count_distinct"
)
_output
.
append
(
"0"
);
else
_output
.
append
(
_context
->
getNullValue
().
str
());
return
;
}
_tmp_output
.
str
(
""
);
_tmp_output
.
clear
();
ExtractColumnFromHits
(
hits
);
VectorOps
vo
(
_column_vec
);
if
(
operation
==
"sum"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetSum
();
else
if
(
operation
==
"mean"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMean
();
else
if
(
operation
==
"median"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMedian
();
else
if
(
operation
==
"min"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMin
();
else
if
(
operation
==
"max"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMax
();
else
if
(
operation
==
"absmin"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetAbsMin
();
else
if
(
operation
==
"absmax"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetAbsMax
();
else
if
(
operation
==
"mode"
)
_tmp_output
<<
vo
.
GetMode
();
else
if
(
operation
==
"antimode"
)
_tmp_output
<<
vo
.
GetAntiMode
();
else
if
(
operation
==
"count"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetCount
();
else
if
(
operation
==
"count_distinct"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetCountDistinct
();
else
if
(
operation
==
"collapse"
)
_tmp_output
<<
vo
.
GetCollapse
();
else
if
(
operation
==
"distinct"
)
_tmp_output
<<
vo
.
GetDistinct
();
else
{
cerr
<<
"ERROR: "
<<
operation
<<
" is an unrecognized operation
\n
"
;
exit
(
1
);
}
_output
.
append
(
_tmp_output
.
str
());
void
FileMap
::
calculateOutput
(
RecordKeyList
&
hits
)
{
//loop through all requested columns, and for each one, call the method needed
//for the operation specified.
_keyListOps
.
setKeyList
(
&
hits
);
double
val
=
0.0
;
for
(
int
i
=
0
;
i
<
(
int
)
_colOps
.
size
();
i
++
)
{
int
col
=
_colOps
[
i
].
first
;
KeyListOps
::
OP_TYPES
opCode
=
_colOps
[
i
].
second
;
_keyListOps
.
setColumn
(
col
);
switch
(
opCode
)
{
case
KeyListOps
::
SUM
:
val
=
_keyListOps
.
getSum
();
if
(
isnan
(
val
))
{
_outputValues
.
append
(
_context
->
getNullValue
());
}
else
{
_outputValues
.
append
(
val
);
}
break
;
case
KeyListOps
::
MEAN
:
val
=
_keyListOps
.
getMean
();
if
(
isnan
(
val
))
{
_outputValues
.
append
(
_context
->
getNullValue
());
}
else
{
_outputValues
.
append
(
val
);
}
break
;
case
KeyListOps
::
STDDEV
:
val
=
_keyListOps
.
getStddev
();
if
(
isnan
(
val
))
{
_outputValues
.
append
(
_context
->
getNullValue
());
}
else
{
_outputValues
.
append
(
val
);
}
break
;
case
KeyListOps
::
SAMPLE_STDDEV
:
val
=
_keyListOps
.
getSampleStddev
();
if
(
isnan
(
val
))
{
_outputValues
.
append
(
_context
->
getNullValue
());
}
else
{
_outputValues
.
append
(
val
);
}
break
;
case
KeyListOps
::
MEDIAN
:
val
=
_keyListOps
.
getMedian
();
if
(
isnan
(
val
))
{
_outputValues
.
append
(
_context
->
getNullValue
());
}
else
{
_outputValues
.
append
(
val
);
}
break
;
case
KeyListOps
::
MODE
:
_outputValues
.
append
(
_keyListOps
.
getMode
());
break
;
case
KeyListOps
::
ANTIMODE
:
_outputValues
.
append
(
_keyListOps
.
getAntiMode
());
break
;
case
KeyListOps
::
MIN
:
val
=
_keyListOps
.
getMin
();
if
(
isnan
(
val
))
{
_outputValues
.
append
(
_context
->
getNullValue
());
}
else
{
_outputValues
.
append
(
val
);
}
break
;
case
KeyListOps
::
MAX
:
val
=
_keyListOps
.
getMax
();
if
(
isnan
(
val
))
{
_outputValues
.
append
(
_context
->
getNullValue
());
}
else
{
_outputValues
.
append
(
val
);
}
break
;
case
KeyListOps
::
ABSMIN
:
val
=
_keyListOps
.
getAbsMin
();
if
(
isnan
(
val
))
{
_outputValues
.
append
(
_context
->
getNullValue
());
}
else
{
_outputValues
.
append
(
val
);
}
break
;
case
KeyListOps
::
ABSMAX
:
val
=
_keyListOps
.
getAbsMax
();
if
(
isnan
(
val
))
{
_outputValues
.
append
(
_context
->
getNullValue
());
}
else
{
_outputValues
.
append
(
val
);
}
break
;
case
KeyListOps
::
COUNT
:
_outputValues
.
append
(
_keyListOps
.
getCount
());
break
;
case
KeyListOps
::
DISTINCT
:
_outputValues
.
append
(
_keyListOps
.
getDistinct
());
break
;
case
KeyListOps
::
COUNT_DISTINCT
:
_outputValues
.
append
(
_keyListOps
.
getCountDistinct
());
break
;
case
KeyListOps
::
DISTINCT_ONLY
:
_outputValues
.
append
(
_keyListOps
.
getDistinctOnly
());
break
;
case
KeyListOps
::
COLLAPSE
:
_outputValues
.
append
(
_keyListOps
.
getCollapse
());
break
;
case
KeyListOps
::
CONCAT
:
_outputValues
.
append
(
_keyListOps
.
getConcat
());
break
;
case
KeyListOps
::
FREQ_ASC
:
_outputValues
.
append
(
_keyListOps
.
getFreqAsc
());
break
;
case
KeyListOps
::
FREQ_DESC
:
_outputValues
.
append
(
_keyListOps
.
getFreqDesc
());
break
;
case
KeyListOps
::
FIRST
:
_outputValues
.
append
(
_keyListOps
.
getFirst
());
break
;
case
KeyListOps
::
LAST
:
_outputValues
.
append
(
_keyListOps
.
getLast
());
break
;
case
KeyListOps
::
INVALID
:
default:
// Any unrecognized operation should have been handled already in the context validation.
// It's thus unnecessary to handle it here, but throw an error to help us know if future
// refactoring or code changes accidentally bypass the validation phase.
cerr
<<
"ERROR: Invalid operation given for column "
<<
col
<<
". Exiting..."
<<
endl
;
break
;
}
//if this isn't the last column, add a tab.
if
(
i
<
(
int
)
_colOps
.
size
()
-
1
)
{
_outputValues
.
append
(
'\t'
);
}
}
}
src/mapFile/mapFile.h
View file @
77c1bdad
...
...
@@ -18,10 +18,11 @@ using namespace std;
#include <iomanip>
#include "VectorOps.h"
#include "RecordKeyList.h"
#include "KeyListOps.h"
#include "ContextMap.h"
using
namespace
std
;
class
ContextMap
;
class
BlockMgr
;
class
RecordOutputMgr
;
...
...
@@ -35,90 +36,13 @@ public:
private:
ContextMap
*
_context
;
Record
*
_queryRec
;
Record
*
_databaseRec
;
BlockMgr
*
_blockMgr
;
RecordOutputMgr
*
_recordOutputMgr
;
KeyListOps
_keyListOps
;
const
ContextMap
::
colOpsType
&
_colOps
;
QuickString
_outputValues
;
// placeholder for the results of mapping B to each a in A.
vector
<
string
>
_column_vec
;
// vector to hold current column's worth of data
ostringstream
_tmp_output
;
QuickString
_output
;
// placeholder for the results of mapping B to each a in A.
//------------------------------------------------
// private methods
//------------------------------------------------
void
Map
();
void
SummarizeHits
(
RecordKeyList
&
hits
);
void
ExtractColumnFromHits
(
RecordKeyList
&
hits
);
void
calculateOutput
(
RecordKeyList
&
hits
);
};
#endif
/* MAPFILE_H */
/*
#include "bedFile.h"
#include "chromsweep.h"
#include "VectorOps.h"
#include "api/BamReader.h"
#include "api/BamWriter.h"
#include "api/BamAux.h"
#include "BamAncillary.h"
using namespace BamTools;
#include <vector>
#include <iostream>
#include <algorithm>
#include <numeric>
#include <fstream>
#include <iomanip>
#include <stdlib.h>
using namespace std;
class BedMap {
public:
// constructor
BedMap(string bedAFile, string bedBFile, int column, string operation,
float overlapFraction, bool sameStrand,
bool diffStrand, bool reciprocal,
bool choseNullValue, string nullValue,
bool printHeader);
// destructor
~BedMap(void);
private:
//------------------------------------------------
// private attributes
//------------------------------------------------
string _bedAFile;
string _bedBFile;
int _column;
string _operation;
bool _sameStrand;
bool _diffStrand;
bool _reciprocal;
float _overlapFraction;
string _nullValue;
bool _printHeader;
// instance of a bed file class.
BedFile *_bedA, *_bedB;
vector<string> _column_vec; // vector to hold current column's worth of data
//------------------------------------------------
// private methods
//------------------------------------------------
void Map();
string MapHits(const BED &a, const vector<BED> &hits);
void ExtractColumnFromHits(const vector<BED> &hits);
};
*/
//#endif /* MAPFILE_H */
src/mapFile/mapMain.cpp
View file @
77c1bdad
...
...
@@ -38,144 +38,6 @@ int map_main(int argc, char* argv[]) {
return
retVal
?
0
:
1
;
}
/*
int map_main(int argc, char* argv[]) {
// our configuration variables
bool showHelp = false;
// input files
string bedAFile;
string bedBFile;
int column = 5;
string operation = "sum";
string nullValue = ".";
// input arguments
float overlapFraction = 1E-9;
bool haveBedA = false;
bool haveBedB = false;
bool haveColumn = false;
bool haveOperation = false;
bool haveFraction = false;
bool reciprocalFraction = false;
bool sameStrand = false;
bool diffStrand = false;
bool printHeader = false;
bool choseNullValue = false;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
(PARAMETER_CHECK("--help", 5, parameterLength))) {
showHelp = true;
}
}
if(showHelp) map_help();
// do some parsing (all of these parameters require 2 strings)
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if(PARAMETER_CHECK("-a", 2, parameterLength)) {
if ((i+1) < argc) {
haveBedA = true;
bedAFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
if ((i+1) < argc) {
haveBedB = true;
bedBFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-c", 2, parameterLength)) {
if ((i+1) < argc) {
haveColumn = true;
column = atoi(argv[i + 1]);
i++;
}
}
else if(PARAMETER_CHECK("-o", 2, parameterLength)) {
if ((i+1) < argc) {
haveOperation = true;
operation = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
if ((i+1) < argc) {
haveFraction = true;
overlapFraction = atof(argv[i + 1]);
i++;
}
}
else if(PARAMETER_CHECK("-r", 2, parameterLength)) {
reciprocalFraction = true;
}
else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
sameStrand = true;
}
else if (PARAMETER_CHECK("-S", 2, parameterLength)) {
diffStrand = true;
}
else if (PARAMETER_CHECK("-null", 5, parameterLength)) {
nullValue = argv[i + 1];
choseNullValue = true;
i++;
}
else if(PARAMETER_CHECK("-header", 7, parameterLength)) {
printHeader = true;
}
else {
cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true;
}
}
// make sure we have both input files
if (!haveBedA || !haveBedB) {
cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;
showHelp = true;
}
if (reciprocalFraction && !haveFraction) {
cerr << endl << "*****" << endl << "*****ERROR: If using -r, you need to define -f." << endl << "*****" << endl;
showHelp = true;
}
if (sameStrand && diffStrand) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl;
showHelp = true;
}
if (!showHelp) {
BedMap *bm = new BedMap(bedAFile, bedBFile, column, operation,
overlapFraction, sameStrand,
diffStrand, reciprocalFraction,
choseNullValue, nullValue,
printHeader);
delete bm;
return 0;
}
else {
map_help();
return 0;
}
}
*/
void
map_help
(
void
)
{
cerr
<<
"
\n
Tool: bedtools map (aka mapBed)"
<<
endl
;
...
...
src/utils/Contexts/ContextBase.cpp
View file @
77c1bdad
...
...
@@ -57,15 +57,6 @@ ContextBase::ContextBase()
_programNames
[
"intersect"
]
=
INTERSECT
;
_programNames
[
"sample"
]
=
SAMPLE
;
_programNames
[
"map"
]
=
MAP
;
_validScoreOps
.
insert
(
"sum"
);
_validScoreOps
.
insert
(
"max"
);
_validScoreOps
.
insert
(
"min"
);
_validScoreOps
.
insert
(
"mean"
);
_validScoreOps
.
insert
(
"mode"
);
_validScoreOps
.
insert
(
"median"
);
_validScoreOps
.
insert
(
"antimode"
);
_validScoreOps
.
insert
(
"collapse"
);
}
ContextBase
::~
ContextBase
()
...
...
src/utils/Contexts/ContextBase.h
View file @
77c1bdad
...
...
@@ -191,15 +191,11 @@ protected:
int
_bamHeaderAndRefIdx
;
int
_maxNumDatabaseFields
;
bool
_useFullBamTags
;
QuickString
_columnOperation
;
int
_column
;
QuickString
_nullValue
;
bool
_reportCount
;
int
_maxDistance
;
bool
_reportNames
;
bool
_reportScores
;
QuickString
_scoreOp
;
set
<
QuickString
>
_validScoreOps
;
int
_numOutputRecords
;
...
...
src/utils/Contexts/ContextIntersect.h
View file @
77c1bdad
...
...
@@ -21,6 +21,8 @@ public:
//NOTE: Query and database files will only be marked as such by either the
//parseCmdArgs method, or by explicitly setting them.
FileRecordMgr
*
getQueryFile
()
{
return
getFile
(
_queryFileIdx
);
}
FileRecordMgr
*
getDatabaseFile
()
{
return
getFile
(
_databaseFileIdx
);
}
int
getQueryFileIdx
()
const
{
return
_queryFileIdx
;
}
void
setQueryFileIdx
(
int
idx
)
{
_queryFileIdx
=
idx
;
}
int
getDatabaseFileIdx
()
const
{
return
_databaseFileIdx
;
}
...
...
src/utils/Contexts/ContextMap.cpp
View file @
77c1bdad
...
...
@@ -8,15 +8,16 @@
#include "ContextMap.h"
ContextMap
::
ContextMap
()
:
_delimStr
(
","
)
{
// map requires sorted input
setSortedInput
(
true
);
setLeftJoin
(
true
);
// default to BED score column
setColumn
(
5
);
setColumn
s
(
"5"
);
// default to "sum"
set
Column
Operation
(
"sum"
);
setOperation
s
(
"sum"
);
// default to "." as a NULL value
setNullValue
(
'.'
);
}
...
...
@@ -53,6 +54,10 @@ bool ContextMap::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
else
if
(
strcmp
(
_argv
[
_i
],
"-null"
)
==
0
)
{
if
(
!
handle_null
())
return
false
;
}
else
if
(
strcmp
(
_argv
[
_i
],
"-delim"
)
==
0
)
{
if
(
!
handle_delim
())
return
false
;
}
}
return
ContextIntersect
::
parseCmdArgs
(
argc
,
argv
,
_skipFirstArgs
);
}
...
...
@@ -66,23 +71,64 @@ bool ContextMap::isValidState()
if
(
getDatabaseFileType
()
==
FileRecordTypeChecker
::
BAM_FILE_TYPE
)
{
//throw Error
cerr
<<
endl
<<
"*****"
<<
endl
cerr
<<
endl
<<
"*****"
<<
endl
<<
"***** ERROR: BAM database file not currently supported for the map tool."
<<
endl
;
exit
(
1
);
}
// TODO
// enforce any specific checks for Map.
//get the strings from context containing the comma-delimited lists of columns
//and operations. Split both of these into vectors. Get the operation code
//for each operation string. Finally, make a vector of pairs, where the first
//member of each pair is a column number, and the second member is the code for the
//operation to perform on that column.
vector
<
QuickString
>
columnsVec
;
vector
<
QuickString
>
opsVec
;
int
numCols
=
Tokenize
(
_columns
,
columnsVec
,
','
);
int
numOps
=
Tokenize
(
_operations
,
opsVec
,
','
);
if
(
numOps
<
1
||
numCols
<
1
)
{
cerr
<<
endl
<<
"*****"
<&