Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
bedtools2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
External wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
R3
legacy
bedtools2
Commits
823bfa7f
Commit
823bfa7f
authored
11 years ago
by
arq5x
Browse files
Options
Downloads
Patches
Plain Diff
map uses PFM.
parent
ca1d3bdd
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/mapFile/mapFile.cpp
+39
-156
39 additions, 156 deletions
src/mapFile/mapFile.cpp
src/mapFile/mapFile.h
+3
-1
3 additions, 1 deletion
src/mapFile/mapFile.h
src/mapFile/mapMain.cpp
+4
-1
4 additions, 1 deletion
src/mapFile/mapMain.cpp
with
46 additions
and
158 deletions
src/mapFile/mapFile.cpp
+
39
−
156
View file @
823bfa7f
...
...
@@ -55,187 +55,70 @@ bool FileMap::mapFiles()
//} else {
//}
//_recordOutputMgr->printKeyAndTerminate(hitSet);
_recordOutputMgr
->
printRecord
(
hitSet
.
getKey
(),
MapHits
(
hitSet
));
SummarizeHits
(
hitSet
);
_recordOutputMgr
->
printRecord
(
hitSet
.
getKey
(),
_output
);
}
return
true
;
}
void
FileMap
::
ExtractColumnFromHits
(
RecordKeyList
&
hits
)
{
for
(
RecordKeyList
::
const_iterator_type
iter
=
hits
.
begin
();
iter
!=
hits
.
end
();
iter
=
hits
.
next
())
{
try
{
ostringstream
startString
;
startString
<<
iter
->
value
()
->
getStartPos
();
//_column_vec.push_back(iter->value().fields.at(_column));
_column_vec
.
push_back
(
startString
.
str
());
}
catch
(
std
::
out_of_range
&
e
)
{
exit
(
1
);
}
/*
catch(std::out_of_range& e) {
cerr << endl << "*****" << endl
<< "*****ERROR: requested column ("
<< _column + 1
<< ") exceeds the number of columns in file at line "
<< _bedA->_lineNum << ". Exiting."
<< endl << "*****" << endl;
exit(1);
}
*/
_column_vec
.
clear
();
RecordKeyList
::
const_iterator_type
iter
=
hits
.
begin
();
for
(;
iter
!=
hits
.
end
();
iter
=
hits
.
next
())
{
_column_vec
.
push_back
(
iter
->
value
()
->
getField
(
_context
->
getColumn
()).
str
());
}
}
}
string
FileMap
::
MapHits
(
RecordKeyList
&
hits
)
{
ostringstream
output
;
if
(
hits
.
size
()
==
0
)
// FIX ME
return
"-1"
;
//return _nullValue;
void
FileMap
::
SummarizeHits
(
RecordKeyList
&
hits
)
{
ExtractColumnFromHits
(
hits
);
const
QuickString
&
operation
=
_context
->
getColumnOperation
();
_output
.
clear
();
if
(
hits
.
size
()
==
0
)
{
if
(
operation
==
"count"
||
operation
==
"count_distinct"
)
_output
.
append
(
"0"
);
else
_output
.
append
(
_context
->
getNullValue
().
str
());
return
;
}
string
operation
=
_context
->
getColumnOperation
();
_tmp_output
.
str
(
""
);
_tmp_output
.
clear
();
ExtractColumnFromHits
(
hits
);
VectorOps
vo
(
_column_vec
);
if
(
operation
==
"sum"
)
output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetSum
();
_tmp_
output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetSum
();
else
if
(
operation
==
"mean"
)
output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMean
();
_tmp_
output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMean
();
else
if
(
operation
==
"median"
)
output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMedian
();
_tmp_
output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMedian
();
else
if
(
operation
==
"min"
)
output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMin
();
_tmp_
output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMin
();
else
if
(
operation
==
"max"
)
output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMax
();
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMax
();
else
if
(
operation
==
"absmin"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetAbsMin
();
else
if
(
operation
==
"absmax"
)
_tmp_output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetAbsMax
();
else
if
(
operation
==
"mode"
)
output
<<
vo
.
GetMode
();
_tmp_
output
<<
vo
.
GetMode
();
else
if
(
operation
==
"antimode"
)
output
<<
vo
.
GetAntiMode
();
_tmp_
output
<<
vo
.
GetAntiMode
();
else
if
(
operation
==
"count"
)
output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetCount
();
_tmp_
output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetCount
();
else
if
(
operation
==
"count_distinct"
)
output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetCountDistinct
();
_tmp_
output
<<
setprecision
(
PRECISION
)
<<
vo
.
GetCountDistinct
();
else
if
(
operation
==
"collapse"
)
output
<<
vo
.
GetCollapse
();
_tmp_
output
<<
vo
.
GetCollapse
();
else
if
(
operation
==
"distinct"
)
output
<<
vo
.
GetDistinct
();
_tmp_
output
<<
vo
.
GetDistinct
();
else
{
cerr
<<
"ERROR: "
<<
operation
<<
" is an unrecoginzed operation
\n
"
;
exit
(
1
);
}
_column_vec
.
clear
();
return
output
.
str
();
}
/*
const int PRECISION = 21;
double GetUserColumn(const string s);
// Constructor
BedMap::BedMap(string bedAFile, string bedBFile, int column, string operation,
float overlapFraction, bool sameStrand,
bool diffStrand, bool reciprocal,
bool choseNullValue, string nullValue,
bool printHeader)
{
_bedAFile = bedAFile;
_bedBFile = bedBFile;
_column = column - 1; // user's request is 1-based
_operation = operation;
_overlapFraction = overlapFraction;
_sameStrand = sameStrand;
_diffStrand = diffStrand;
_reciprocal = reciprocal;
_nullValue = nullValue;
_printHeader = printHeader;
if (!choseNullValue && operation == "count")
_nullValue = "0";
Map();
}
// Destructor
BedMap::~BedMap(void)
{}
void BedMap::Map() {
_output
.
append
(
_tmp_output
.
str
());
// create new BED file objects for A and B
_bedA = new BedFile(_bedAFile);
_bedB = new BedFile(_bedBFile);
// use the chromsweep algorithm to detect overlaps on the fly.
ChromSweep sweep = ChromSweep(_bedA, _bedB,
_sameStrand, _diffStrand,
_overlapFraction, _reciprocal,
false, _printHeader);
pair<BED, vector<BED> > hit_set;
hit_set.second.reserve(10000);
while (sweep.Next(hit_set)) {
string result = MapHits(hit_set.first, hit_set.second);
_bedA->reportBedTab(hit_set.first);
printf("%s\n", result.c_str());
}
}
string BedMap::MapHits(const BED &a, const vector<BED> &hits) {
ostringstream output;
if (hits.size() == 0)
return _nullValue;
ExtractColumnFromHits(hits);
VectorOps vo(_column_vec);
if (_operation == "sum")
output << setprecision (PRECISION) << vo.GetSum();
else if (_operation == "mean")
output << setprecision (PRECISION) << vo.GetMean();
else if (_operation == "median")
output << setprecision (PRECISION) << vo.GetMedian();
else if (_operation == "min")
output << setprecision (PRECISION) << vo.GetMin();
else if (_operation == "max")
output << setprecision (PRECISION) << vo.GetMax();
else if (_operation == "mode")
output << vo.GetMode();
else if (_operation == "antimode")
output << vo.GetAntiMode();
else if (_operation == "count")
output << setprecision (PRECISION) << vo.GetCount();
else if (_operation == "count_distinct")
output << setprecision (PRECISION) << vo.GetCountDistinct();
else if (_operation == "collapse")
output << vo.GetCollapse();
else if (_operation == "distinct")
output << vo.GetDistinct();
else {
cerr << "ERROR: " << _operation << " is an unrecoginzed operation\n";
exit(1);
}
_column_vec.clear();
return output.str();
}
void BedMap::ExtractColumnFromHits(const vector<BED> &hits) {
for (size_t i = 0; i < hits.size(); ++i) {
try {
_column_vec.push_back(hits[i].fields.at(_column));
}
catch(std::out_of_range& e) {
cerr << endl << "*****" << endl
<< "*****ERROR: requested column ("
<< _column + 1
<< ") exceeds the number of columns in file at line "
<< _bedA->_lineNum << ". Exiting."
<< endl << "*****" << endl;
exit(1);
}
}
}
*/
This diff is collapsed.
Click to expand it.
src/mapFile/mapFile.h
+
3
−
1
View file @
823bfa7f
...
...
@@ -42,11 +42,13 @@ private:
vector
<
string
>
_column_vec
;
// vector to hold current column's worth of data
ostringstream
_tmp_output
;
QuickString
_output
;
// placeholder for the results of mapping B to each a in A.
//------------------------------------------------
// private methods
//------------------------------------------------
void
Map
();
string
Map
Hits
(
RecordKeyList
&
hits
);
void
Summarize
Hits
(
RecordKeyList
&
hits
);
void
ExtractColumnFromHits
(
RecordKeyList
&
hits
);
};
...
...
This diff is collapsed.
Click to expand it.
src/mapFile/mapMain.cpp
+
4
−
1
View file @
823bfa7f
...
...
@@ -191,7 +191,7 @@ void map_help(void) {
cerr
<<
"
\t
-o
\t
"
<<
"Specify the operation that should be applied to -c."
<<
endl
;
cerr
<<
"
\t\t
Valid operations:"
<<
endl
;
cerr
<<
"
\t\t
sum, min, max,"
<<
endl
;
cerr
<<
"
\t\t
sum, min, max,
absmin, absmax,
"
<<
endl
;
cerr
<<
"
\t\t
mean, median,"
<<
endl
;
cerr
<<
"
\t\t
collapse (i.e., print a comma separated list (duplicates allowed)), "
<<
endl
;
cerr
<<
"
\t\t
distinct (i.e., print a comma separated list (NO duplicates allowed)), "
<<
endl
;
...
...
@@ -215,6 +215,9 @@ void map_help(void) {
cerr
<<
"
\t\t
that overlap A on the _opposite_ strand."
<<
endl
;
cerr
<<
"
\t\t
- By default, overlaps are reported without respect to strand."
<<
endl
<<
endl
;
cerr
<<
"
\t
-g
\t
"
<<
"Provide a genome file to enforce consistent chromosome sort order"
<<
endl
;
cerr
<<
"
\t\t
across input files."
<<
endl
<<
endl
;
cerr
<<
"
\t
-null
\t
"
<<
"The value to print if no overlaps are found for an A interval."
<<
endl
;
cerr
<<
"
\t\t
- Default -
\"
.
\"
"
<<
endl
<<
endl
;
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment