Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
R3
legacy
bedtools2
Commits
e65c98bc
Commit
e65c98bc
authored
Apr 04, 2015
by
arq5x
Browse files
Merge branch 'master' of
https://github.com/arq5x/bedtools2
parents
237766a7
81f458fb
Changes
31
Hide whitespace changes
Inline
Side-by-side
Makefile
View file @
e65c98bc
...
...
@@ -66,7 +66,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)
/sortBed
\
$(SRC_DIR)
/spacingFile
\
$(SRC_DIR)
/split
\
$(SRC_DIR)
/subtract
Bed
\
$(SRC_DIR)
/subtract
File
\
$(SRC_DIR)
/tagBam
\
$(SRC_DIR)
/unionBedGraphs
\
$(SRC_DIR)
/windowBed
\
...
...
README.md
View file @
e65c98bc
bedtools - a swiss army knife for genome arithmetic
===================================================
**Current version**
: 2.2
2.1
**Current version**
: 2.2
3.0
Note
-------
...
...
src/closestFile/closestMain.cpp
View file @
e65c98bc
...
...
@@ -114,6 +114,11 @@ void closest_help(void) {
cerr
<<
"
\t
-header
\t
"
<<
"Print the header from the A file prior to results."
<<
endl
<<
endl
;
cerr
<<
"
\t
-nonamecheck
\t
"
<<
"For sorted data, don't throw an error if the file has different naming conventions"
<<
endl
;
cerr
<<
"
\t\t\t
for the same chromosome. ex.
\"
chr1
\"
vs
\"
chr01
\"
."
<<
endl
<<
endl
;
cerr
<<
"Notes: "
<<
endl
;
cerr
<<
"
\t
Reports
\"
none
\"
for chrom and
\"
-1
\"
for all other fields when a feature"
<<
endl
;
cerr
<<
"
\t
is not found in B on the same chromosome as the feature in A."
<<
endl
;
...
...
src/intersectFile/intersectMain.cpp
View file @
e65c98bc
...
...
@@ -134,6 +134,9 @@ void intersect_help(void) {
cerr
<<
"
\t
-sortout
\t
"
<<
"When using multiple databases, sort the output DB hits"
<<
endl
;
cerr
<<
"
\t\t\t
for each record."
<<
endl
<<
endl
;
cerr
<<
"
\t
-nonamecheck
\t
"
<<
"For sorted data, don't throw an error if the file has different naming conventions"
<<
endl
;
cerr
<<
"
\t\t\t
for the same chromosome. ex.
\"
chr1
\"
vs
\"
chr01
\"
."
<<
endl
<<
endl
;
CommonHelp
();
cerr
<<
"Notes: "
<<
endl
;
...
...
src/jaccard/jaccardMain.cpp
View file @
e65c98bc
...
...
@@ -73,6 +73,11 @@ void jaccard_help(void) {
cerr
<<
"
\t\t
"
<<
"the forward or reverse strand, respectively."
<<
endl
;
cerr
<<
"
\t\t
"
<<
"- By default, merging is done without respect to strand."
<<
endl
<<
endl
;
cerr
<<
"
\t
-nonamecheck
\t
"
<<
"For sorted data, don't throw an error if the file has different naming conventions"
<<
endl
;
cerr
<<
"
\t\t\t
for the same chromosome. ex.
\"
chr1
\"
vs
\"
chr01
\"
."
<<
endl
<<
endl
;
cerr
<<
"Notes: "
<<
endl
;
cerr
<<
"
\t
(1) Input files must be sorted by chrom, then start position."
<<
endl
<<
endl
;
...
...
src/mapFile/mapMain.cpp
View file @
e65c98bc
...
...
@@ -84,6 +84,10 @@ void map_help(void) {
cerr
<<
"
\t
-prec
\t
"
<<
"Sets the decimal precision for output (Default: 5)"
<<
endl
<<
endl
;
cerr
<<
"
\t
-nonamecheck
\t
"
<<
"For sorted data, don't throw an error if the file has different naming conventions"
<<
endl
;
cerr
<<
"
\t\t\t
for the same chromosome. ex.
\"
chr1
\"
vs
\"
chr01
\"
."
<<
endl
<<
endl
;
cerr
<<
"Notes: "
<<
endl
;
cerr
<<
"
\t
(1) Both input files must be sorted by chrom, then start."
<<
endl
<<
endl
;
...
...
src/subtractBed/subtractBed.cpp
deleted
100644 → 0
View file @
237766a7
/*****************************************************************************
subtractBed.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include
"lineFileUtilities.h"
#include
"subtractBed.h"
#include
<numeric>
/*
Constructor
*/
BedSubtract
::
BedSubtract
(
string
&
bedAFile
,
string
&
bedBFile
,
float
overlapFraction
,
bool
sameStrand
,
bool
diffStrand
,
bool
removeAll
,
bool
removeAny
)
:
_bedAFile
(
bedAFile
)
,
_bedBFile
(
bedBFile
)
,
_overlapFraction
(
overlapFraction
)
,
_sameStrand
(
sameStrand
)
,
_diffStrand
(
diffStrand
)
,
_removeAll
(
removeAll
)
,
_removeAny
(
removeAny
)
{
_bedA
=
new
BedFile
(
bedAFile
);
_bedB
=
new
BedFile
(
bedBFile
);
SubtractBed
();
}
/*
Destructor
*/
BedSubtract
::~
BedSubtract
(
void
)
{
}
void
BedSubtract
::
FindAndSubtractOverlaps
(
BED
&
a
,
vector
<
BED
>
&
hits
)
{
// find all of the overlaps between a and B.
_bedB
->
allHits
(
a
.
chrom
,
a
.
start
,
a
.
end
,
a
.
strand
,
hits
,
_sameStrand
,
_diffStrand
,
0.0
,
false
);
// is A completely spanned by an entry in B?
// if so, A should not be reported.
int
numConsumedByB
=
0
;
int
numOverlaps
=
0
;
vector
<
BED
>
bOverlaps
;
// list of hits in B. Special proc. if multiple.
vector
<
BED
>::
const_iterator
h
=
hits
.
begin
();
vector
<
BED
>::
const_iterator
hitsEnd
=
hits
.
end
();
for
(;
h
!=
hitsEnd
;
++
h
)
{
int
s
=
max
(
a
.
start
,
h
->
start
);
int
e
=
min
(
a
.
end
,
h
->
end
);
int
overlapBases
=
(
e
-
s
);
int
aLength
=
(
a
.
end
-
a
.
start
);
if
(
s
<
e
)
{
// is there enough overlap (default ~ 1bp)
float
overlap
=
((
float
)
overlapBases
/
(
float
)
aLength
);
if
(
overlap
>=
1.0
)
{
numOverlaps
++
;
numConsumedByB
++
;
}
else
if
(
overlap
>=
_overlapFraction
||
_removeAny
)
{
numOverlaps
++
;
bOverlaps
.
push_back
(
*
h
);
}
}
}
if
(
numOverlaps
==
0
)
{
// no overlap found, so just report A as-is.
_bedA
->
reportBedNewLine
(
a
);
}
else
if
((
numOverlaps
==
1
)
&&
(
!
_removeAny
))
{
// one overlap found. only need to look at the single
// entry in bOverlaps.
// if A was not "consumed" by any entry in B
if
((
numConsumedByB
==
0
)
&&
(
_removeAll
==
false
))
{
BED
theHit
=
bOverlaps
[
0
];
// A ++++++++++++
// B ----
// Res. ==== ====
if
(
(
theHit
.
start
>
a
.
start
)
&&
(
theHit
.
end
<
a
.
end
)
)
{
_bedA
->
reportBedRangeNewLine
(
a
,
a
.
start
,
theHit
.
start
);
_bedA
->
reportBedRangeNewLine
(
a
,
theHit
.
end
,
a
.
end
);
}
// A ++++++++++++
// B ----------
// Res. ==
else
if
(
theHit
.
start
==
a
.
start
)
{
_bedA
->
reportBedRangeNewLine
(
a
,
theHit
.
end
,
a
.
end
);
}
// A ++++++++++++
// B ----------
// Res. ====
else
if
(
theHit
.
start
<
a
.
start
)
{
_bedA
->
reportBedRangeNewLine
(
a
,
theHit
.
end
,
a
.
end
);
}
// A ++++++++++++
// B ----------
// Res. =======
else
if
(
theHit
.
start
>
a
.
start
)
{
_bedA
->
reportBedRangeNewLine
(
a
,
a
.
start
,
theHit
.
start
);
}
}
}
else
if
((
numOverlaps
>
1
)
||
_removeAny
)
{
// multiple overlapz found. look at all the hits
// and figure out which bases in A survived. then
// report the contigous intervals that survived.
vector
<
bool
>
aKeep
(
a
.
end
-
a
.
start
,
true
);
if
(
numConsumedByB
==
0
)
{
if
(
_removeAll
){
return
;
}
// if there's any overlap, then we don't report.
// track the number of hit starts and ends at each position in A
for
(
vector
<
BED
>::
iterator
h
=
bOverlaps
.
begin
();
h
!=
bOverlaps
.
end
();
++
h
)
{
int
s
=
max
(
a
.
start
,
h
->
start
);
int
e
=
min
(
a
.
end
,
h
->
end
);
for
(
int
i
=
s
+
1
;
i
<=
e
;
++
i
)
{
aKeep
[
i
-
a
.
start
-
1
]
=
false
;
}
}
if
(
_removeAny
){
int
asum
=
std
::
accumulate
(
aKeep
.
rbegin
(),
aKeep
.
rend
(),
0
);
float
afrac
=
1.0
-
(
float
)
asum
/
aKeep
.
size
();
if
(
afrac
>
_overlapFraction
){
return
;
}
for
(
unsigned
int
i
=
0
;
i
<
aKeep
.
size
();
++
i
)
{
aKeep
[
i
]
=
true
;
}
}
// report the remaining blocks.
for
(
unsigned
int
i
=
0
;
i
<
aKeep
.
size
();
++
i
)
{
if
(
aKeep
[
i
]
==
true
)
{
CHRPOS
blockStart
=
i
+
a
.
start
;
while
((
aKeep
[
i
]
==
true
)
&&
(
i
<
aKeep
.
size
()))
{
i
++
;
}
CHRPOS
blockEnd
=
i
+
a
.
start
;
blockEnd
=
min
(
a
.
end
,
blockEnd
);
_bedA
->
reportBedRangeNewLine
(
a
,
blockStart
,
blockEnd
);
}
}
}
}
}
void
BedSubtract
::
SubtractBed
()
{
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB
->
loadBedFileIntoMap
();
BED
a
;
vector
<
BED
>
hits
;
// reserve some space
hits
.
reserve
(
100
);
_bedA
->
Open
();
while
(
_bedA
->
GetNextBed
(
a
))
{
if
(
_bedA
->
_status
==
BED_VALID
)
{
FindAndSubtractOverlaps
(
a
,
hits
);
hits
.
clear
();
}
}
_bedA
->
Close
();
}
// END Intersect
src/subtractBed/subtractBed.h
deleted
100644 → 0
View file @
237766a7
/*****************************************************************************
subtractBed.h
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#ifndef SUBTRACTBED_H
#define SUBTRACTBED_H
#include
"bedFile.h"
#include
<vector>
#include
<iostream>
#include
<fstream>
using
namespace
std
;
//************************************************
// Class methods and elements
//************************************************
class
BedSubtract
{
public:
// constructor
BedSubtract
(
string
&
bedAFile
,
string
&
bedBFile
,
float
overlapFraction
,
bool
sameStrand
,
bool
diffStrand
,
bool
removeAll
,
bool
removeAny
);
// destructor
~
BedSubtract
(
void
);
private:
// processing variables
string
_bedAFile
;
string
_bedBFile
;
float
_overlapFraction
;
bool
_sameStrand
;
bool
_diffStrand
;
bool
_removeAll
;
bool
_removeAny
;
// instances of bed file class.
BedFile
*
_bedA
,
*
_bedB
;
// methods
void
FindAndSubtractOverlaps
(
BED
&
a
,
vector
<
BED
>
&
hits
);
void
SubtractBed
();
};
#endif
/* SUBTRACTBED_H */
src/subtract
Bed
/Makefile
→
src/subtract
File
/Makefile
View file @
e65c98bc
...
...
@@ -5,18 +5,31 @@ BIN_DIR = ../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES
=
-I
$(UTILITIES_DIR)
/bedFile/
\
-I
$(UTILITIES_DIR)
/lineFileUtilities/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/fileType/
\
INCLUDES
=
-I
$(UTILITIES_DIR)
/Contexts/
\
-I
$(UTILITIES_DIR)
/general/
\
-I
$(UTILITIES_DIR)
/fileType/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/GenomeFile/
\
-I
$(UTILITIES_DIR)
/BamTools/include
\
-I
$(UTILITIES_DIR)
/BamTools/src
\
-I
$(UTILITIES_DIR)
/BlockedIntervals
\
-I
$(UTILITIES_DIR)
/BamTools-Ancillary
\
-I
$(UTILITIES_DIR)
/FileRecordTools/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/FileReaders/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/Records/
\
-I
$(UTILITIES_DIR)
/KeyListOps/
\
-I
$(UTILITIES_DIR)
/RecordOutputMgr/
\
-I
$(UTILITIES_DIR)
/NewChromsweep
\
-I
$(UTILITIES_DIR)
/BinTree
\
-I
$(UTILITIES_DIR)
/version/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES
=
subtractMain.cpp subtract
Bed
.cpp subtract
Bed
.h
OBJECTS
=
subtractMain.o subtract
Bed
.o
SOURCES
=
subtractMain.cpp subtract
File
.cpp subtract
File
.h
OBJECTS
=
subtractMain.o subtract
File
.o
BUILT_OBJECTS
=
$(
patsubst
%,
$(OBJ_DIR)
/%,
$(OBJECTS)
)
PROGRAM
=
subtractFile
all
:
$(BUILT_OBJECTS)
...
...
@@ -24,10 +37,10 @@ all: $(BUILT_OBJECTS)
$(BUILT_OBJECTS)
:
$(SOURCES)
@
echo
" * compiling"
$
(
*
F
)
.cpp
@
$(CXX)
-c
-o
$@
$
(
*
F
)
.cpp
$(LDFLAGS)
$(CXXFLAGS)
$(INCLUDES)
@
$(CXX)
-c
-o
$@
$
(
*
F
)
.cpp
$(LDFLAGS)
$(CXXFLAGS)
$(DFLAGS)
$(INCLUDES)
clean
:
@
echo
"Cleaning up."
@
rm
-f
$(OBJ_DIR)
/subtractMain.o
$(OBJ_DIR)
/subtract
Bed
.o
@
rm
-f
$(OBJ_DIR)
/subtractMain.o
$(OBJ_DIR)
/subtract
File
.o
.PHONY
:
clean
src/subtractFile/subtractFile.cpp
0 → 100644
View file @
e65c98bc
/*
* subtractFile.cpp
*
* Created on: Feb 19, 2015
* Author: nek3d
*/
/*****************************************************************************
subtractFile.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include
"subtractFile.h"
#include
"ContextSubtract.h"
#include
"FileRecordMgr.h"
#include
"NewChromsweep.h"
#include
"BinTree.h"
#include
"RecordOutputMgr.h"
#include
<numeric>
//for std::accumulate
SubtractFile
::
SubtractFile
(
ContextSubtract
*
context
)
:
_context
(
context
),
_blockMgr
(
NULL
),
_recordOutputMgr
(
NULL
),
_tmpBlocksMgr
(
NULL
)
{
_recordOutputMgr
=
new
RecordOutputMgr
();
_recordOutputMgr
->
init
(
_context
);
if
(
_context
->
getObeySplits
())
{
_blockMgr
=
new
BlockMgr
(
_context
->
getOverlapFraction
(),
_context
->
getReciprocal
());
_recordOutputMgr
->
setSplitInfo
(
_blockMgr
);
}
_tmpBlocksMgr
=
new
BlockMgr
(
_context
->
getOverlapFraction
(),
_context
->
getReciprocal
());
}
SubtractFile
::~
SubtractFile
(
void
)
{
delete
_blockMgr
;
_blockMgr
=
NULL
;
delete
_recordOutputMgr
;
delete
_tmpBlocksMgr
;
}
void
SubtractFile
::
processHits
(
RecordKeyVector
&
hits
)
{
_recordOutputMgr
->
printRecord
(
hits
);
}
bool
SubtractFile
::
subtractFiles
()
{
if
(
_context
->
getSortedInput
())
{
return
processSortedFiles
();
}
return
processUnsortedFiles
();
}
bool
SubtractFile
::
processSortedFiles
()
{
// use the chromsweep algorithm to detect overlaps on the fly.
NewChromSweep
sweep
(
_context
);
if
(
!
sweep
.
init
())
{
return
false
;
}
RecordKeyVector
hitSet
;
while
(
sweep
.
next
(
hitSet
))
{
if
(
_context
->
getObeySplits
())
{
RecordKeyVector
keySet
(
hitSet
.
getKey
());
RecordKeyVector
resultSet
(
hitSet
.
getKey
());
_blockMgr
->
findBlockedOverlaps
(
keySet
,
hitSet
,
resultSet
);
subtractHits
(
resultSet
);
}
else
{
subtractHits
(
hitSet
);
}
}
if
(
!
_context
->
hasGenomeFile
())
{
sweep
.
closeOut
(
true
);
}
return
true
;
}
bool
SubtractFile
::
processUnsortedFiles
()
{
BinTree
*
binTree
=
new
BinTree
(
_context
);
binTree
->
loadDB
();
FileRecordMgr
*
queryFRM
=
_context
->
getFile
(
_context
->
getQueryFileIdx
());
while
(
!
queryFRM
->
eof
())
{
Record
*
queryRecord
=
queryFRM
->
getNextRecord
();
if
(
queryRecord
==
NULL
)
{
continue
;
}
RecordKeyVector
hitSet
(
queryRecord
);
binTree
->
getHits
(
queryRecord
,
hitSet
);
if
(
_context
->
getObeySplits
())
{
RecordKeyVector
keySet
(
hitSet
.
getKey
());
RecordKeyVector
resultSet
;
_blockMgr
->
findBlockedOverlaps
(
keySet
,
hitSet
,
resultSet
);
subtractHits
(
resultSet
);
}
else
{
subtractHits
(
hitSet
);
}
queryFRM
->
deleteRecord
(
queryRecord
);
}
//clean up.
delete
binTree
;
return
true
;
}
void
SubtractFile
::
subtractHits
(
RecordKeyVector
&
hits
)
{
if
(
hits
.
empty
())
{
// no intersection, nothing to subtract.
// just copy key to hits as if it were a
// self-intersection. This is just for reporting
// purposes.
hits
.
push_back
(
hits
.
getKey
());
processHits
(
hits
);
return
;
}
if
(
_context
->
getRemoveAll
()
&&
_context
->
getSubtractFraction
()
==
0.0
)
{
// hits aren't empty, meaning there is intersection,
// so we want to not report the hit.
hits
.
clearAll
();
return
;
}
//loop through hits. Track which bases in query were covered
const
Record
*
keyRec
=
hits
.
getKey
();
int
keyStart
=
keyRec
->
getStartPos
();
int
keyEnd
=
keyRec
->
getEndPos
();
//this vector of bools will represent the bases of the query.
//for each base, true means uncovered, false means covered.
//they begin as all uncovered.
vector
<
bool
>
keyBases
(
keyEnd
-
keyStart
,
true
);
//now loop through the hits, and cover corresponding query bases
//by setting them to false.
bool
basesRemoved
=
false
;
for
(
RecordKeyVector
::
const_iterator_type
iter
=
hits
.
begin
();
iter
!=
hits
.
end
();
iter
=
hits
.
next
())
{
const
Record
*
hitRec
=
*
iter
;
int
hitStart
=
hitRec
->
getStartPos
();
int
hitEnd
=
hitRec
->
getEndPos
();
int
startIdx
=
max
(
keyStart
,
hitStart
)
-
keyStart
;
int
endIdx
=
min
(
keyEnd
,
hitEnd
)
-
keyStart
;
int
keyLen
=
keyEnd
-
keyStart
;
int
coveredLen
=
endIdx
-
startIdx
;
float
coveragePct
=
(
float
)
coveredLen
/
(
float
)
keyLen
;
//for each base in the hit, set the base in the query to false.
//this effectively "erases" the covered bits. Only do
if
(
_context
->
getRemoveSum
()
||
coveragePct
>=
_context
->
getSubtractFraction
())
{
std
::
fill
(
keyBases
.
begin
()
+
startIdx
,
keyBases
.
begin
()
+
endIdx
,
false
);
basesRemoved
=
true
;
}
}
if
(
!
basesRemoved
)
{
//treat as if there were no intersection
hits
.
clearVector
();
hits
.
push_back
(
hits
.
getKey
());
processHits
(
hits
);
return
;
}
else
if
(
_context
->
getRemoveAll
())
{
hits
.
clearAll
();
return
;
}
// if the -N option is used ( removeSum), do not report if the percentage of
// uniquely covered bases exceeds the overlap fraction.
if
(
_context
->
getRemoveSum
())
{
//determine how many bases are left uncovered.
int
numBasesUncovered
=
std
::
accumulate
(
keyBases
.
begin
(),
keyBases
.
end
(),
0
);
//determine percentage that are covered.
float
pctCovered
=
1.0
-
(
float
)
numBasesUncovered
/
(
float
)(
keyEnd
-
keyStart
);
if
(
pctCovered
>
_context
->
getSubtractFraction
())
{
hits
.
clearAll
();
return
;
}
else
{
hits
.
clearVector
();
hits
.
push_back
(
hits
.
getKey
());
}
processHits
(
hits
);
return
;
}
//now make "blocks" out of the query's remaining stretches of
//uncovered bases.
RecordKeyVector
tempHits
(
keyRec
);
for
(
int
i
=
0
;
i
<
(
int
)
keyBases
.
size
();
i
++
)
{
if
(
keyBases
[
i
]
==
true
)
{
int
blockStart
=
keyStart
+
i
;
while
(
keyBases
[
i
]
==
true
&&
i
<
(
int
)
keyBases
.
size
())
{
i
++
;
}
int
blockEnd
=
min
(
keyStart
+
i
,
keyEnd
);
tempHits
.
push_back
(
_tmpBlocksMgr
->
allocateAndAssignRecord
(
keyRec
,
blockStart
,
blockEnd
));
}
}
processHits
(
tempHits
);
_tmpBlocksMgr
->
deleteBlocks
(
tempHits
);
}
src/subtractFile/subtractFile.h
0 → 100644
View file @
e65c98bc
/*
* subtractFile.h
*
* Created on: Feb 19, 2015
* Author: nek3d
*/
#ifndef SUBTRACTFILE_H_
#define SUBTRACTFILE_H_
#include
"RecordKeyVector.h"
#include
"BlockMgr.h"
using
namespace
std
;
class
ContextSubtract
;
class
BlockMgr
;
class
RecordOutputMgr
;
class
BlockMgr
;
class
SubtractFile
{