Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
R3
legacy
bedtools2
Commits
f36d6db4
Commit
f36d6db4
authored
Apr 07, 2014
by
nkindlon
Browse files
Merge converted to PFM, first check-in
parent
ed71c8e0
Changes
36
Hide whitespace changes
Inline
Side-by-side
Makefile
View file @
f36d6db4
...
...
@@ -47,7 +47,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)
/linksBed
\
$(SRC_DIR)
/maskFastaFromBed
\
$(SRC_DIR)
/mapFile
\
$(SRC_DIR)
/merge
Bed
\
$(SRC_DIR)
/merge
File
\
$(SRC_DIR)
/multiBamCov
\
$(SRC_DIR)
/multiIntersectBed
\
$(SRC_DIR)
/nekSandbox1
\
...
...
src/intersectFile/intersectFile.cpp
View file @
f36d6db4
...
...
@@ -81,7 +81,7 @@ bool FileIntersect::processUnsortedFiles()
while
(
!
queryFRM
->
eof
())
{
Record
*
queryRecord
=
queryFRM
->
allocateAndG
etNextRecord
();
Record
*
queryRecord
=
queryFRM
->
g
etNextRecord
();
if
(
queryRecord
==
NULL
)
{
continue
;
}
...
...
src/mergeBed/mergeBed.cpp
deleted
100644 → 0
View file @
ed71c8e0
/*****************************************************************************
mergeBed.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "lineFileUtilities.h"
#include "mergeBed.h"
void
BedMerge
::
ReportMergedNames
(
const
vector
<
string
>
&
names
)
{
if
(
names
.
size
()
>
0
)
{
printf
(
"
\t
"
);
vector
<
string
>::
const_iterator
nameItr
=
names
.
begin
();
vector
<
string
>::
const_iterator
nameEnd
=
names
.
end
();
for
(;
nameItr
!=
nameEnd
;
++
nameItr
)
{
if
(
nameItr
<
(
nameEnd
-
1
))
cout
<<
*
nameItr
<<
_delimiter
;
else
cout
<<
*
nameItr
;
}
}
else
{
cerr
<<
endl
<<
"*****"
<<
endl
<<
"*****ERROR: "
<<
"No names found to report for the -names option. Exiting."
<<
endl
<<
"*****"
<<
endl
;
exit
(
1
);
}
}
void
BedMerge
::
ReportMergedScores
(
const
vector
<
string
>
&
scores
)
{
// setup a VectorOps instances for the list of scores.
// VectorOps methods used for each possible operation.
VectorOps
vo
(
scores
);
std
::
stringstream
buffer
;
if
(
scores
.
size
()
>
0
)
{
if
(
_scoreOp
==
"sum"
)
buffer
<<
setprecision
(
PRECISION
)
<<
vo
.
GetSum
();
else
if
(
_scoreOp
==
"min"
)
buffer
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMin
();
else
if
(
_scoreOp
==
"max"
)
buffer
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMax
();
else
if
(
_scoreOp
==
"mean"
)
buffer
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMean
();
else
if
(
_scoreOp
==
"median"
)
buffer
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMedian
();
else
if
(
_scoreOp
==
"mode"
)
buffer
<<
setprecision
(
PRECISION
)
<<
vo
.
GetMode
();
else
if
(
_scoreOp
==
"antimode"
)
buffer
<<
setprecision
(
PRECISION
)
<<
vo
.
GetAntiMode
();
else
if
(
_scoreOp
==
"collapse"
)
buffer
<<
setprecision
(
PRECISION
)
<<
vo
.
GetCollapse
(
_delimiter
);
cout
<<
"
\t
"
<<
buffer
.
str
();
}
else
{
cerr
<<
endl
<<
"*****"
<<
endl
<<
"*****ERROR: No scores found to report for the -scores option. Exiting."
<<
endl
<<
"*****"
<<
endl
;
exit
(
1
);
}
}
// ===============
// = Constructor =
// ===============
BedMerge
::
BedMerge
(
string
&
bedFile
,
bool
numEntries
,
int
maxDistance
,
bool
forceStrand
,
bool
reportNames
,
bool
reportScores
,
const
string
&
scoreOp
,
const
string
&
delimiter
)
:
_bedFile
(
bedFile
),
_numEntries
(
numEntries
),
_forceStrand
(
forceStrand
),
_reportNames
(
reportNames
),
_reportScores
(
reportScores
),
_scoreOp
(
scoreOp
),
_maxDistance
(
maxDistance
),
_delimiter
(
delimiter
)
{
_bed
=
new
BedFile
(
bedFile
);
if
(
_forceStrand
==
false
)
MergeBed
();
else
MergeBedStranded
();
}
// =================
// = Destructor =
// =================
BedMerge
::~
BedMerge
(
void
)
{
}
// ===============================================
// Convenience method for reporting merged blocks
// ================================================
void
BedMerge
::
Report
(
string
chrom
,
int
start
,
int
end
,
const
vector
<
string
>
&
names
,
const
vector
<
string
>
&
scores
,
int
mergeCount
)
{
// ARQ: removed to force all output to be zero-based, BED format, reagrdless of input type
//if (_bed->isZeroBased == false) {start++;}
printf
(
"%s
\t
%d
\t
%d"
,
chrom
.
c_str
(),
start
,
end
);
// just the merged intervals
if
(
_numEntries
==
false
&&
_reportNames
==
false
&&
_reportScores
==
false
)
{
printf
(
"
\n
"
);
}
// merged intervals and counts
else
if
(
_numEntries
==
true
&&
_reportNames
==
false
&&
_reportScores
==
false
)
{
printf
(
"
\t
%d
\n
"
,
mergeCount
);
}
// merged intervals, counts, and scores
else
if
(
_numEntries
==
true
&&
_reportNames
==
false
&&
_reportScores
==
true
)
{
printf
(
"
\t
%d"
,
mergeCount
);
ReportMergedScores
(
scores
);
printf
(
"
\n
"
);
}
// merged intervals, counts, and names
else
if
(
_numEntries
==
true
&&
_reportNames
==
true
&&
_reportScores
==
false
)
{
ReportMergedNames
(
names
);
printf
(
"
\t
%d
\n
"
,
mergeCount
);
}
// merged intervals, counts, names, and scores
else
if
(
_numEntries
==
true
&&
_reportNames
==
true
&&
_reportScores
==
true
)
{
ReportMergedNames
(
names
);
ReportMergedScores
(
scores
);
printf
(
"
\t
%d
\n
"
,
mergeCount
);
}
// merged intervals and names
else
if
(
_numEntries
==
false
&&
_reportNames
==
true
&&
_reportScores
==
false
)
{
ReportMergedNames
(
names
);
printf
(
"
\n
"
);
}
// merged intervals and scores
else
if
(
_numEntries
==
false
&&
_reportNames
==
false
&&
_reportScores
==
true
)
{
ReportMergedScores
(
scores
);
printf
(
"
\n
"
);
}
// merged intervals, names, and scores
else
if
(
_numEntries
==
false
&&
_reportNames
==
true
&&
_reportScores
==
true
)
{
ReportMergedNames
(
names
);
ReportMergedScores
(
scores
);
printf
(
"
\n
"
);
}
}
// =========================================================
// Convenience method for reporting merged blocks by strand
// =========================================================
void
BedMerge
::
ReportStranded
(
string
chrom
,
int
start
,
int
end
,
const
vector
<
string
>
&
names
,
const
vector
<
string
>
&
scores
,
int
mergeCount
,
string
strand
)
{
// ARQ: removed to force all output to be zero-based, BED format, reagrdless of input type
//if (_bed->isZeroBased == false) {start++;}
printf
(
"%s
\t
%d
\t
%d"
,
chrom
.
c_str
(),
start
,
end
);
// just the merged intervals
if
(
_numEntries
==
false
&&
_reportNames
==
false
&&
_reportScores
==
false
)
{
printf
(
"
\t\t\t
%s
\n
"
,
strand
.
c_str
());
}
// merged intervals and counts
else
if
(
_numEntries
==
true
&&
_reportNames
==
false
&&
_reportScores
==
false
)
{
printf
(
"
\t\t
%d
\t
%s
\n
"
,
mergeCount
,
strand
.
c_str
());
}
// merged intervals, counts, and scores
else
if
(
_numEntries
==
true
&&
_reportNames
==
false
&&
_reportScores
==
true
)
{
printf
(
"
\t
%d"
,
mergeCount
);
ReportMergedScores
(
scores
);
printf
(
"
\t
%s
\n
"
,
strand
.
c_str
());
}
// merged intervals, counts, and names
else
if
(
_numEntries
==
true
&&
_reportNames
==
true
&&
_reportScores
==
false
)
{
ReportMergedNames
(
names
);
printf
(
"
\t
%d
\t
%s"
,
mergeCount
,
strand
.
c_str
());
printf
(
"
\n
"
);
}
// merged intervals, counts, names, and scores
else
if
(
_numEntries
==
true
&&
_reportNames
==
true
&&
_reportScores
==
true
)
{
ReportMergedNames
(
names
);
ReportMergedScores
(
scores
);
printf
(
"
\t
%s
\t
%d"
,
strand
.
c_str
(),
mergeCount
);
printf
(
"
\n
"
);
}
// merged intervals and names
else
if
(
_numEntries
==
false
&&
_reportNames
==
true
&&
_reportScores
==
false
)
{
ReportMergedNames
(
names
);
printf
(
"
\t
.
\t
%s
\n
"
,
strand
.
c_str
());
}
// merged intervals and scores
else
if
(
_numEntries
==
false
&&
_reportNames
==
false
&&
_reportScores
==
true
)
{
printf
(
"
\t
"
);
ReportMergedScores
(
scores
);
printf
(
"
\t
%s
\n
"
,
strand
.
c_str
());
}
// merged intervals, names, and scores
else
if
(
_numEntries
==
false
&&
_reportNames
==
true
&&
_reportScores
==
true
)
{
ReportMergedNames
(
names
);
ReportMergedScores
(
scores
);
printf
(
"
\t
%s
\n
"
,
strand
.
c_str
());
}
}
// =====================================================
// = Merge overlapping BED entries into a single entry =
// =====================================================
void
BedMerge
::
MergeBed
()
{
int
mergeCount
=
1
;
vector
<
string
>
names
;
vector
<
string
>
scores
;
int
start
=
-
1
;
int
end
=
-
1
;
BED
prev
,
curr
;
_bed
->
Open
();
while
(
_bed
->
GetNextBed
(
curr
,
true
))
{
// true = force sorted intervals
if
(
_bed
->
_status
!=
BED_VALID
)
continue
;
// new block, no overlap
if
(
(((
int
)
curr
.
start
-
end
)
>
_maxDistance
)
||
(
curr
.
chrom
!=
prev
.
chrom
))
{
if
(
start
>=
0
)
{
Report
(
prev
.
chrom
,
start
,
end
,
names
,
scores
,
mergeCount
);
// reset
mergeCount
=
1
;
names
.
clear
();
scores
.
clear
();
}
start
=
curr
.
start
;
end
=
curr
.
end
;
if
(
!
curr
.
name
.
empty
())
names
.
push_back
(
curr
.
name
);
if
(
!
curr
.
score
.
empty
())
scores
.
push_back
(
curr
.
score
);
}
// same block, overlaps
else
{
if
((
int
)
curr
.
end
>
end
)
end
=
curr
.
end
;
if
(
!
curr
.
name
.
empty
())
names
.
push_back
(
curr
.
name
);
if
(
!
curr
.
score
.
empty
())
scores
.
push_back
(
curr
.
score
);
mergeCount
++
;
}
prev
=
curr
;
}
if
(
start
>=
0
)
{
Report
(
prev
.
chrom
,
start
,
end
,
names
,
scores
,
mergeCount
);
}
}
// ===============================================================================
// = Merge overlapping BED entries into a single entry, accounting for strandedness =
// ================================================================================
void
BedMerge
::
MergeBedStranded
()
{
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bed
->
loadBedFileIntoMapNoBin
();
// loop through each chromosome and merge their BED entries
masterBedMapNoBin
::
const_iterator
m
=
_bed
->
bedMapNoBin
.
begin
();
masterBedMapNoBin
::
const_iterator
mEnd
=
_bed
->
bedMapNoBin
.
end
();
for
(;
m
!=
mEnd
;
++
m
)
{
// bedList is already sorted by start position.
string
chrom
=
m
->
first
;
// make a list of the two strands to merge separately.
vector
<
string
>
strands
(
2
);
strands
[
0
]
=
"+"
;
strands
[
1
]
=
"-"
;
// do two passes, one for each strand.
for
(
unsigned
int
s
=
0
;
s
<
strands
.
size
();
s
++
)
{
int
mergeCount
=
1
;
int
numOnStrand
=
0
;
vector
<
string
>
names
;
vector
<
string
>
scores
;
// merge overlapping features for this chromosome.
int
start
=
-
1
;
int
end
=
-
1
;
vector
<
BED
>::
const_iterator
bedItr
=
m
->
second
.
begin
();
vector
<
BED
>::
const_iterator
bedEnd
=
m
->
second
.
end
();
for
(;
bedItr
!=
bedEnd
;
++
bedItr
)
{
// if forcing strandedness, move on if the hit
// is not on the current strand.
if
(
bedItr
->
strand
!=
strands
[
s
])
{
continue
;
}
else
{
numOnStrand
++
;
}
if
(
(((
int
)
bedItr
->
start
-
end
)
>
_maxDistance
)
||
(
end
<
0
))
{
if
(
start
>=
0
)
{
ReportStranded
(
chrom
,
start
,
end
,
names
,
scores
,
mergeCount
,
strands
[
s
]);
// reset
mergeCount
=
1
;
names
.
clear
();
scores
.
clear
();
}
start
=
bedItr
->
start
;
end
=
bedItr
->
end
;
if
(
!
bedItr
->
name
.
empty
())
names
.
push_back
(
bedItr
->
name
);
if
(
!
bedItr
->
score
.
empty
())
scores
.
push_back
(
bedItr
->
score
);
}
else
{
if
((
int
)
bedItr
->
end
>
end
)
end
=
bedItr
->
end
;
mergeCount
++
;
if
(
!
bedItr
->
name
.
empty
())
names
.
push_back
(
bedItr
->
name
);
if
(
!
bedItr
->
score
.
empty
())
scores
.
push_back
(
bedItr
->
score
);
}
}
if
(
start
>=
0
)
{
ReportStranded
(
chrom
,
start
,
end
,
names
,
scores
,
mergeCount
,
strands
[
s
]);
}
}
}
}
src/mergeBed/mergeBed.h
deleted
100644 → 0
View file @
ed71c8e0
/*****************************************************************************
mergeBed.h
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "bedFile.h"
#include <vector>
#include <algorithm>
#include <numeric>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <limits.h>
#include <stdlib.h>
#include "VectorOps.h"
using
namespace
std
;
const
int
PRECISION
=
21
;
//************************************************
// Class methods and elements
//************************************************
class
BedMerge
{
public:
// constructor
BedMerge
(
string
&
bedFile
,
bool
numEntries
,
int
maxDistance
,
bool
forceStrand
,
bool
reportNames
,
bool
reportScores
,
const
string
&
scoreOp
,
const
string
&
delimiter
);
// destructor
~
BedMerge
(
void
);
void
MergeBed
();
void
MergeBedStranded
();
private:
string
_bedFile
;
bool
_numEntries
;
bool
_forceStrand
;
bool
_reportNames
;
bool
_reportScores
;
string
_scoreOp
;
int
_maxDistance
;
string
_delimiter
;
// instance of a bed file class.
BedFile
*
_bed
;
void
Report
(
string
chrom
,
int
start
,
int
end
,
const
vector
<
string
>
&
names
,
const
vector
<
string
>
&
scores
,
int
mergeCount
);
void
ReportStranded
(
string
chrom
,
int
start
,
int
end
,
const
vector
<
string
>
&
names
,
const
vector
<
string
>
&
scores
,
int
mergeCount
,
string
strand
);
void
ReportMergedNames
(
const
vector
<
string
>
&
names
);
void
ReportMergedScores
(
const
vector
<
string
>
&
scores
);
};
src/merge
Bed
/Makefile
→
src/merge
File
/Makefile
View file @
f36d6db4
...
...
@@ -5,18 +5,29 @@ BIN_DIR = ../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES
=
-I
$(UTILITIES_DIR)
/bedFile/
\
-I
$(UTILITIES_DIR)
/lineFileUtilities/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/fileType/
\
-I
$(UTILITIES_DIR)
/VectorOps/
\
INCLUDES
=
-I
$(UTILITIES_DIR)
/Contexts/
\
-I
$(UTILITIES_DIR)
/general/
\
-I
$(UTILITIES_DIR)
/fileType/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/GenomeFile/
\
-I
$(UTILITIES_DIR)
/BamTools/include
\
-I
$(UTILITIES_DIR)
/BamTools/src
\
-I
$(UTILITIES_DIR)
/BlockedIntervals
\
-I
$(UTILITIES_DIR)
/BamTools-Ancillary
\
-I
$(UTILITIES_DIR)
/FileRecordTools/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/FileReaders/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/Records/
\
-I
$(UTILITIES_DIR)
/KeyListOps/
\
-I
$(UTILITIES_DIR)
/RecordOutputMgr/
\
-I
$(UTILITIES_DIR)
/NewChromsweep
\
-I
$(UTILITIES_DIR)
/BinTree
\
-I
$(UTILITIES_DIR)
/version/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES
=
mergeMain.cpp merge
Bed
.cpp merge
Bed
.h
OBJECTS
=
mergeMain.o merge
Bed
.o
SOURCES
=
mergeMain.cpp merge
File
.cpp merge
File
.h
OBJECTS
=
mergeMain.o merge
File
.o
BUILT_OBJECTS
=
$(
patsubst
%,
$(OBJ_DIR)
/%,
$(OBJECTS)
)
...
...
@@ -30,6 +41,6 @@ $(BUILT_OBJECTS): $(SOURCES)
clean
:
@
echo
"Cleaning up."
@
rm
-f
$(OBJ_DIR)
/mergeMain.o
$(OBJ_DIR)
/merge
Bed
.o
@
rm
-f
$(OBJ_DIR)
/mergeMain.o
$(OBJ_DIR)
/merge
File
.o
.PHONY
:
clean
\ No newline at end of file
src/mergeFile/mergeFile.cpp
0 → 100644
View file @
f36d6db4
/*****************************************************************************
mergeFile.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "mergeFile.h"
MergeFile
::
MergeFile
(
ContextMerge
*
context
)
:
_context
(
context
),
_recordOutputMgr
(
NULL
)
{
_recordOutputMgr
=
new
RecordOutputMgr
();
_recordOutputMgr
->
init
(
_context
);
}
MergeFile
::~
MergeFile
()
{
delete
_recordOutputMgr
;
_recordOutputMgr
=
NULL
;
}
bool
MergeFile
::
merge
()
{
RecordKeyList
hitSet
;
FileRecordMgr
*
frm
=
_context
->
getFile
(
0
);
while
(
!
frm
->
eof
())
{
Record
*
key
=
frm
->
getNextRecord
(
&
hitSet
);
if
(
key
==
NULL
)
continue
;
_recordOutputMgr
->
printRecord
(
hitSet
.
getKey
(),
_context
->
getColumnOpsVal
(
hitSet
));
}
return
true
;
}
src/mergeFile/mergeFile.h
0 → 100644
View file @
f36d6db4
/*****************************************************************************
mergeFile.h
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#ifndef MERGE_FILE_H_
#define MERGE_FILE_H_
//************************************************
// Class methods and elements
//************************************************
#include "ContextMerge.h"
#include "RecordOutputMgr.h"
class
MergeFile
{
public:
MergeFile
(
ContextMerge
*
context
);
~
MergeFile
();
bool
merge
();
private:
ContextMerge
*
_context
;
RecordOutputMgr
*
_recordOutputMgr
;
};
#endif
src/merge
Bed
/mergeMain.cpp
→
src/merge
File
/mergeMain.cpp
View file @
f36d6db4
...
...
@@ -9,7 +9,7 @@
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "merge
Bed
.h"
#include "merge
File
.h"
#include "version.h"
using
namespace
std
;
...
...
@@ -26,113 +26,21 @@ void merge_help(void);
int
merge_main
(
int
argc
,
char
*
argv
[])
{
// our configuration variables
bool
showHelp
=
false
;
// input files
string
bedFile
=
"stdin"
;
int
maxDistance
=
0
;
string
scoreOp
=
""
;
// input arguments
bool
haveBed
=
true
;
bool
numEntries
=
false
;
bool
haveMaxDistance
=
false
;
bool
forceStrand
=
false
;
bool
reportNames
=
false
;
bool
reportScores
=
false
;
string
delimiter
=
","
;
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
int
parameterLength
=
(
int
)
strlen
(
argv
[
i
]);
if
((
PARAMETER_CHECK
(
"-h"
,
2
,
parameterLength
))
||
(
PARAMETER_CHECK
(
"--help"
,
5
,
parameterLength
)))
{
showHelp
=
true
;
ContextMerge
*
context
=
new
ContextMerge
();
if
(
!
context
->
parseCmdArgs
(
argc
,
argv
,
1
)
||
context
->
getShowHelp
()
||
!
context
->
isValidState
())
{