Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
R3
legacy
bedtools2
Commits
69d82a78
Commit
69d82a78
authored
Sep 10, 2013
by
nkindlon
Browse files
First commit for new 2.18-alpha branch.
parent
ab7bbbe4
Changes
49
Hide whitespace changes
Inline
Side-by-side
Makefile
View file @
69d82a78
...
...
@@ -16,11 +16,11 @@ export BIN_DIR = bin
export
SRC_DIR
=
src
export
UTIL_DIR
=
src/utils
export
CXX
=
g++
ifeq
($(DEBUG),1)
export
CXXFLAGS
=
-Wall
-O0
-g
-fno-inline
-fkeep-inline-functions
-D_FILE_OFFSET_BITS
=
64
-fPIC
-DDEBUG
-D_DEBUG
else
#
ifeq ($(DEBUG),1)
#
export CXXFLAGS = -Wall -O0 -g -fno-inline -fkeep-inline-functions -D_FILE_OFFSET_BITS=64 -fPIC -DDEBUG -D_DEBUG
#
else
export
CXXFLAGS
=
-Wall
-O2
-D_FILE_OFFSET_BITS
=
64
-fPIC
endif
#
endif
export
LIBS
=
-lz
export
BT_ROOT
=
src/utils/BamTools/
...
...
@@ -42,7 +42,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)
/genomeCoverageBed
\
$(SRC_DIR)
/getOverlap
\
$(SRC_DIR)
/groupBy
\
$(SRC_DIR)
/intersect
Bed
\
$(SRC_DIR)
/intersect
File
\
$(SRC_DIR)
/jaccard
\
$(SRC_DIR)
/linksBed
\
$(SRC_DIR)
/maskFastaFromBed
\
...
...
@@ -50,10 +50,12 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)
/mergeBed
\
$(SRC_DIR)
/multiBamCov
\
$(SRC_DIR)
/multiIntersectBed
\
$(SRC_DIR)
/nekSandbox1
\
$(SRC_DIR)
/nucBed
\
$(SRC_DIR)
/pairToBed
\
$(SRC_DIR)
/pairToPair
\
$(SRC_DIR)
/randomBed
\
$(SRC_DIR)
/regressTest
\
$(SRC_DIR)
/reldist
\
$(SRC_DIR)
/shuffleBed
\
$(SRC_DIR)
/slopBed
\
...
...
@@ -65,12 +67,17 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)
/windowMaker
UTIL_SUBDIRS
=
$(SRC_DIR)
/utils/bedFile
\
$(SRC_DIR)
/utils/BinTree
\
$(SRC_DIR)
/utils/version
\
$(SRC_DIR)
/utils/bedGraphFile
\
$(SRC_DIR)
/utils/chromsweep
\
$(SRC_DIR)
/utils/Contexts
\
$(SRC_DIR)
/utils/FileRecordTools
\
$(SRC_DIR)
/utils/general
\
$(SRC_DIR)
/utils/gzstream
\
$(SRC_DIR)
/utils/fileType
\
$(SRC_DIR)
/utils/bedFilePE
\
$(SRC_DIR)
/utils/NewChromsweep
\
$(SRC_DIR)
/utils/sequenceUtilities
\
$(SRC_DIR)
/utils/tabFile
\
$(SRC_DIR)
/utils/BamTools
\
...
...
@@ -78,7 +85,7 @@ UTIL_SUBDIRS = $(SRC_DIR)/utils/bedFile \
$(SRC_DIR)
/utils/BlockedIntervals
\
$(SRC_DIR)
/utils/Fasta
\
$(SRC_DIR)
/utils/VectorOps
\
$(SRC_DIR)
/utils/
g
enomeFile
$(SRC_DIR)
/utils/
G
enomeFile
BUILT_OBJECTS
=
$(OBJ_DIR)
/
*
.o
...
...
src/bedToBam/Makefile
View file @
69d82a78
...
...
@@ -8,7 +8,7 @@ BIN_DIR = ../../bin/
INCLUDES
=
-I
$(UTILITIES_DIR)
/bedFile/
\
-I
$(UTILITIES_DIR)
/version/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/
g
enomeFile/
\
-I
$(UTILITIES_DIR)
/
G
enomeFile/
\
-I
$(UTILITIES_DIR)
/lineFileUtilities/
\
-I
$(UTILITIES_DIR)
/fileType/
\
-I
$(UTILITIES_DIR)
/BamTools/include
\
...
...
src/bedToBam/bedToBam.cpp
View file @
69d82a78
...
...
@@ -11,7 +11,7 @@
******************************************************************************/
#include
"lineFileUtilities.h"
#include
"bedFile.h"
#include
"
g
enomeFile.h"
#include
"
G
enomeFile.h"
#include
"version.h"
...
...
src/bedToIgv/Makefile
View file @
69d82a78
...
...
@@ -6,7 +6,7 @@ BIN_DIR = ../../bin/
# define our includes
# -------------------
INCLUDES
=
-I
$(UTILITIES_DIR)
/bedFile/
\
-I
$(UTILITIES_DIR)
/
g
enomeFile/
\
-I
$(UTILITIES_DIR)
/
G
enomeFile/
\
-I
$(UTILITIES_DIR)
/lineFileUtilities/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/fileType/
\
...
...
src/bedToIgv/bedToIgv.cpp
View file @
69d82a78
...
...
@@ -11,7 +11,7 @@
******************************************************************************/
#include
"lineFileUtilities.h"
#include
"bedFile.h"
#include
"
g
enomeFile.h"
#include
"
G
enomeFile.h"
#include
"version.h"
#include
<vector>
...
...
src/bedpeToBam/Makefile
View file @
69d82a78
...
...
@@ -9,7 +9,7 @@ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-I
$(UTILITIES_DIR)
/bedFilePE/
\
-I
$(UTILITIES_DIR)
/version/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/
g
enomeFile/
\
-I
$(UTILITIES_DIR)
/
G
enomeFile/
\
-I
$(UTILITIES_DIR)
/lineFileUtilities/
\
-I
$(UTILITIES_DIR)
/fileType/
\
-I
$(UTILITIES_DIR)
/BamTools/include
\
...
...
src/bedpeToBam/bedpeToBam.cpp
View file @
69d82a78
...
...
@@ -11,7 +11,7 @@
******************************************************************************/
#include
"lineFileUtilities.h"
#include
"bedFilePE.h"
#include
"
g
enomeFile.h"
#include
"
G
enomeFile.h"
#include
"version.h"
...
...
src/bedtools.cpp
View file @
69d82a78
...
...
@@ -45,6 +45,7 @@ int closest_main(int argc, char* argv[]); //
int
cluster_main
(
int
argc
,
char
*
argv
[]);
//
int
complement_main
(
int
argc
,
char
*
argv
[]);
//
int
coverage_main
(
int
argc
,
char
*
argv
[]);
//
int
regress_test_main
(
int
argc
,
char
**
argv
);
//
int
expand_main
(
int
argc
,
char
*
argv
[]);
//
int
fastafrombed_main
(
int
argc
,
char
*
argv
[]);
//
int
flank_main
(
int
argc
,
char
*
argv
[]);
//
...
...
@@ -59,6 +60,7 @@ int map_main(int argc, char* argv[]); //
int
merge_main
(
int
argc
,
char
*
argv
[]);
//
int
multibamcov_main
(
int
argc
,
char
*
argv
[]);
//
int
multiintersect_main
(
int
argc
,
char
*
argv
[]);
//
int
nek_sandbox1_main
(
int
argc
,
char
*
argv
[]);
//
int
nuc_main
(
int
argc
,
char
*
argv
[]);
//
int
pairtobed_main
(
int
argc
,
char
*
argv
[]);
//
int
pairtopair_main
(
int
argc
,
char
*
argv
[]);
//
...
...
@@ -136,9 +138,8 @@ int main(int argc, char *argv[])
else
if
(
sub_cmd
==
"makewindows"
)
return
windowmaker_main
(
argc
-
1
,
argv
+
1
);
else
if
(
sub_cmd
==
"groupby"
)
return
groupby_main
(
argc
-
1
,
argv
+
1
);
else
if
(
sub_cmd
==
"expand"
)
return
expand_main
(
argc
-
1
,
argv
+
1
);
else
if
(
sub_cmd
==
"neksb1"
)
return
nek_sandbox1_main
(
argc
-
1
,
argv
+
1
);
else
if
(
sub_cmd
==
"regresstest"
)
return
regress_test_main
(
argc
,
argv
);
//this command does need all the orig args.
// help
else
if
(
sub_cmd
==
"-h"
||
sub_cmd
==
"--help"
||
sub_cmd
==
"-help"
)
...
...
@@ -230,7 +231,7 @@ int bedtools_help(void)
cout
<<
"[ BAM focused tools ]"
<<
endl
;
cout
<<
" multicov "
<<
"Counts coverage from multiple BAMs at specific intervals.
\n
"
;
cout
<<
" tag "
<<
"Tag BAM alignments based on overlaps with interval files.
\n
"
;
cout
<<
endl
;
cout
<<
"[ Statistical relationships ]"
<<
endl
;
cout
<<
" jaccard "
<<
"Calculate the Jaccard statistic b/w two sets of intervals.
\n
"
;
...
...
src/complementBed/Makefile
View file @
69d82a78
...
...
@@ -6,7 +6,7 @@ BIN_DIR = ../../bin/
# define our includes
# -------------------
INCLUDES
=
-I
$(UTILITIES_DIR)
/bedFile/
\
-I
$(UTILITIES_DIR)
/
g
enomeFile/
\
-I
$(UTILITIES_DIR)
/
G
enomeFile/
\
-I
$(UTILITIES_DIR)
/lineFileUtilities/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/fileType/
\
...
...
src/complementBed/complementBed.h
View file @
69d82a78
...
...
@@ -10,7 +10,7 @@
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include
"bedFile.h"
#include
"
g
enomeFile.h"
#include
"
G
enomeFile.h"
#include
<vector>
#include
<bitset>
...
...
src/flankBed/Makefile
View file @
69d82a78
...
...
@@ -6,7 +6,7 @@ BIN_DIR = ../../bin/
# define our includes
# -------------------
INCLUDES
=
-I
$(UTILITIES_DIR)
/bedFile/
\
-I
$(UTILITIES_DIR)
/
g
enomeFile/
\
-I
$(UTILITIES_DIR)
/
G
enomeFile/
\
-I
$(UTILITIES_DIR)
/lineFileUtilities/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/fileType/
\
...
...
src/flankBed/flankBed.h
View file @
69d82a78
...
...
@@ -11,7 +11,7 @@
******************************************************************************/
#include
"bedFile.h"
#include
"
g
enomeFile.h"
#include
"
G
enomeFile.h"
#include
<vector>
#include
<iostream>
...
...
src/genomeCoverageBed/Makefile
View file @
69d82a78
...
...
@@ -7,7 +7,7 @@ BIN_DIR = ../../bin/
# -------------------
INCLUDES
=
-I
$(UTILITIES_DIR)
/bedFile/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/
g
enomeFile/
\
-I
$(UTILITIES_DIR)
/
G
enomeFile/
\
-I
$(UTILITIES_DIR)
/lineFileUtilities/
\
-I
$(UTILITIES_DIR)
/fileType/
\
-I
$(UTILITIES_DIR)
/BamTools/include
\
...
...
src/genomeCoverageBed/genomeCoverageBed.h
View file @
69d82a78
...
...
@@ -10,7 +10,7 @@ aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include
"bedFile.h"
#include
"
g
enomeFile.h"
#include
"
G
enomeFile.h"
#include
"BlockedIntervals.h"
#include
"api/BamReader.h"
...
...
src/intersectBed/Makefile
deleted
100644 → 0
View file @
ab7bbbe4
UTILITIES_DIR
=
../utils/
OBJ_DIR
=
../../obj/
BIN_DIR
=
../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES
=
-I
$(UTILITIES_DIR)
/bedFile/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/genomeFile/
\
-I
$(UTILITIES_DIR)
/lineFileUtilities/
\
-I
$(UTILITIES_DIR)
/fileType/
\
-I
$(UTILITIES_DIR)
/BamTools/include
\
-I
$(UTILITIES_DIR)
/BlockedIntervals
\
-I
$(UTILITIES_DIR)
/BamTools-Ancillary
\
-I
$(UTILITIES_DIR)
/chromsweep
\
-I
$(UTILITIES_DIR)
/version/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES
=
intersectMain.cpp intersectBed.cpp intersectBed.h
OBJECTS
=
intersectMain.o intersectBed.o
BUILT_OBJECTS
=
$(
patsubst
%,
$(OBJ_DIR)
/%,
$(OBJECTS)
)
PROGRAM
=
intersectBed
all
:
$(BUILT_OBJECTS)
.PHONY
:
all
$(BUILT_OBJECTS)
:
$(SOURCES)
@
echo
" * compiling"
$
(
*
F
)
.cpp
@
$(CXX)
-c
-o
$@
$
(
*
F
)
.cpp
$(LDFLAGS)
$(CXXFLAGS)
$(DFLAGS)
$(INCLUDES)
clean
:
@
echo
"Cleaning up."
@
rm
-f
$(OBJ_DIR)
/intersectMain.o
$(OBJ_DIR)
/intersectBed.o
.PHONY
:
clean
src/intersectBed/intersectBed.cpp
deleted
100644 → 0
View file @
ab7bbbe4
/*****************************************************************************
intersectBed.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include
"lineFileUtilities.h"
#include
"intersectBed.h"
/************************************
Helper functions
************************************/
bool
BedIntersect
::
processHits
(
const
BED
&
a
,
const
vector
<
BED
>
&
hits
)
{
bool
hitsFound
=
false
;
if
(
_printable
==
true
)
{
// -wao the user wants to force the reporting of 0 overlap
if
(
hits
.
size
()
==
0
)
{
if
(
_writeAllOverlap
)
{
_bedA
->
reportBedTab
(
a
);
_bedB
->
reportNullBedTab
();
printf
(
"0
\n
"
);
}
else
if
(
_leftJoin
)
{
_bedA
->
reportBedTab
(
a
);
_bedB
->
reportNullBedNewLine
();
}
}
else
{
vector
<
BED
>::
const_iterator
h
=
hits
.
begin
();
vector
<
BED
>::
const_iterator
hitsEnd
=
hits
.
end
();
for
(;
h
!=
hitsEnd
;
++
h
)
{
CHRPOS
s
=
max
(
a
.
start
,
h
->
start
);
CHRPOS
e
=
min
(
a
.
end
,
h
->
end
);
int
overlapBases
=
(
e
-
s
);
ReportOverlapDetail
(
overlapBases
,
a
,
*
h
,
s
,
e
);
hitsFound
=
true
;
}
}
}
else
{
ReportOverlapSummary
(
a
,
hits
.
size
());
}
return
hitsFound
;
}
/*
Constructor
*/
BedIntersect
::
BedIntersect
(
string
bedAFile
,
string
bedBFile
,
bool
anyHit
,
bool
writeA
,
bool
writeB
,
bool
writeOverlap
,
bool
writeAllOverlap
,
float
overlapFraction
,
bool
noHit
,
bool
leftJoin
,
bool
writeCount
,
bool
sameStrand
,
bool
diffStrand
,
bool
reciprocal
,
bool
obeySplits
,
bool
bamInput
,
bool
bamOutput
,
bool
isUncompressedBam
,
bool
sortedInput
,
bool
printHeader
)
{
_bedAFile
=
bedAFile
;
_bedBFile
=
bedBFile
;
_anyHit
=
anyHit
;
_noHit
=
noHit
;
_leftJoin
=
leftJoin
;
_writeA
=
writeA
;
_writeB
=
writeB
;
_writeOverlap
=
writeOverlap
;
_writeAllOverlap
=
writeAllOverlap
;
_writeCount
=
writeCount
;
_overlapFraction
=
overlapFraction
;
_sameStrand
=
sameStrand
;
_diffStrand
=
diffStrand
;
_reciprocal
=
reciprocal
;
_obeySplits
=
obeySplits
;
_bamInput
=
bamInput
;
_bamOutput
=
bamOutput
;
_isUncompressedBam
=
isUncompressedBam
;
_sortedInput
=
sortedInput
;
_printHeader
=
printHeader
;
// should we print each overlap, or does the user want summary information?
_printable
=
true
;
if
(
_anyHit
||
_noHit
||
_writeCount
)
_printable
=
false
;
if
(
_bamInput
==
false
)
IntersectBed
();
else
IntersectBam
(
bedAFile
);
}
/*
Destructor
*/
BedIntersect
::~
BedIntersect
(
void
)
{
}
bool
BedIntersect
::
FindOverlaps
(
const
BED
&
a
,
vector
<
BED
>
&
hits
)
{
bool
hitsFound
=
false
;
// collect and report the sufficient hits
_bedB
->
allHits
(
a
.
chrom
,
a
.
start
,
a
.
end
,
a
.
strand
,
hits
,
_sameStrand
,
_diffStrand
,
_overlapFraction
,
_reciprocal
);
hitsFound
=
processHits
(
a
,
hits
);
return
hitsFound
;
}
bool
BedIntersect
::
FindBlockedOverlaps
(
const
BED
&
a
,
const
vector
<
BED
>
&
a_blocks
,
const
vector
<
BED
>
&
hits
,
bool
a_is_bam
)
{
int
a_footprint
=
GetTotalBlockLength
(
a_blocks
);
// container to store the set of raw hits
// that actually overlap the A blocks
bedVector
valid_hits
;
valid_hits
.
reserve
(
hits
.
size
());
// 1. Loop through each raw hit (outer loop)
// 2. Break the raw hit into it;s blocks
// and see of one of the hit blocks (inner loop)
// overlaps one of a's blocks (inner, inner loop)
// 3. If so, mark the hit as valid and add it to the valid_set.
// Otherwise, the hit only overlapped the span of a and not
// the individual blocks. Thus, it doesn't count.
bedVector
::
const_iterator
hItr
=
hits
.
begin
();
bedVector
::
const_iterator
hEnd
=
hits
.
end
();
for
(;
hItr
!=
hEnd
;
++
hItr
)
{
// break the hit into blocks
bedVector
hitBlocks
;
GetBedBlocks
(
*
hItr
,
hitBlocks
);
int
b_footprint
=
GetTotalBlockLength
(
hitBlocks
);
// test to see if there is a valid hit with one of the blocks
bool
valid_hit
=
false
;
int
total_overlap
=
0
;
bedVector
::
const_iterator
hbItr
=
hitBlocks
.
begin
();
bedVector
::
const_iterator
hbEnd
=
hitBlocks
.
end
();
for
(;
hbItr
!=
hbEnd
;
++
hbItr
)
{
// look for overlaps between this hit/block and each block in a
bedVector
::
const_iterator
a_blockItr
=
a_blocks
.
begin
();
bedVector
::
const_iterator
a_blockEnd
=
a_blocks
.
end
();
for
(;
a_blockItr
!=
a_blockEnd
;
++
a_blockItr
)
{
int
hs
=
max
(
a_blockItr
->
start
,
hbItr
->
start
);
int
he
=
min
(
a_blockItr
->
end
,
hbItr
->
end
);
int
overlap
=
he
-
hs
;
if
(
overlap
>
0
)
{
valid_hit
=
true
;
total_overlap
+=
overlap
;
}
}
}
if
(
valid_hit
)
{
// require sufficient overlap fraction (reciprocal or otherwise)
// w.r.t to the "footprint" (i.e., the total length of each block)
if
(
((
float
)
total_overlap
/
(
float
)
a_footprint
)
>
_overlapFraction
)
{
if
(
_reciprocal
&&
((
float
)
total_overlap
/
(
float
)
b_footprint
)
>
_overlapFraction
)
{
valid_hits
.
push_back
(
*
hItr
);
}
else
if
(
!
_reciprocal
)
{
valid_hits
.
push_back
(
*
hItr
);
}
}
}
}
if
(
!
a_is_bam
)
{
return
processHits
(
a
,
valid_hits
);
}
else
return
!
valid_hits
.
empty
();
}
void
BedIntersect
::
ReportOverlapDetail
(
int
overlapBases
,
const
BED
&
a
,
const
BED
&
b
,
CHRPOS
s
,
CHRPOS
e
)
{
// default. simple intersection only
if
(
_writeA
==
false
&&
_writeB
==
false
&&
_writeOverlap
==
false
&&
_leftJoin
==
false
)
{
_bedA
->
reportBedRangeNewLine
(
a
,
s
,
e
);
}
// -wa -wb write the original A and B
else
if
((
_writeA
==
true
&&
_writeB
==
true
)
||
_leftJoin
==
true
)
{
_bedA
->
reportBedTab
(
a
);
_bedB
->
reportBedNewLine
(
b
);
}
// -wa write just the original A
else
if
(
_writeA
==
true
)
{
_bedA
->
reportBedNewLine
(
a
);
}
// -wb write the intersected portion of A and the original B
else
if
(
_writeB
==
true
)
{
_bedA
->
reportBedRangeTab
(
a
,
s
,
e
);
_bedB
->
reportBedNewLine
(
b
);
}
// -wo write the original A and B plus the no. of overlapping bases.
else
if
(
_writeOverlap
==
true
)
{
_bedA
->
reportBedTab
(
a
);
_bedB
->
reportBedTab
(
b
);
if
(
b
.
zeroLength
==
false
)
printf
(
"%d
\n
"
,
overlapBases
);
else
printf
(
"0
\n
"
);
}
}
void
BedIntersect
::
ReportOverlapSummary
(
const
BED
&
a
,
const
int
&
numOverlapsFound
)
{
// -u just report the fact that there was >= 1 overlaps
if
(
_anyHit
&&
(
numOverlapsFound
>=
1
))
{
_bedA
->
reportBedNewLine
(
a
);
}
// -c report the total number of features overlapped in B
else
if
(
_writeCount
)
{
_bedA
->
reportBedTab
(
a
);
printf
(
"%d
\n
"
,
numOverlapsFound
);
}
// -v report iff there were no overlaps
else
if
(
_noHit
&&
(
numOverlapsFound
==
0
))
{
_bedA
->
reportBedNewLine
(
a
);
}
}
void
BedIntersect
::
IntersectBed
()
{
// create new BED file objects for A and B
_bedA
=
new
BedFile
(
_bedAFile
);
_bedB
=
new
BedFile
(
_bedBFile
);
if
(
_sortedInput
==
false
)
{
// load the "B" file into a map in order to
// compare each entry in A to it in search of overlaps.
_bedB
->
loadBedFileIntoMap
();
vector
<
BED
>
hits
;
hits
.
reserve
(
100
);
BED
a
;
// open the "A" file, process each BED entry and searh for overlaps.
_bedA
->
Open
();
// report A's header first if asked.
if
(
_printHeader
==
true
)
{
_bedA
->
PrintHeader
();
}
while
(
_bedA
->
GetNextBed
(
a
))
{
if
(
_bedA
->
_status
==
BED_VALID
)
{
// treat the BED as a single "block"
if
(
_obeySplits
==
false
)
FindOverlaps
(
a
,
hits
);
// split the BED12 into blocks and look for
// overlaps in each discrete block
else
{
// find the hits that overlap with the
// full span of the blocked BED
_bedB
->
allHits
(
a
.
chrom
,
a
.
start
,
a
.
end
,
a
.
strand
,
hits
,
_sameStrand
,
_diffStrand
,
0.0
,
false
);
// break a into discrete blocks, as we need to
// measure overlap with the individual blocks,
// not the full span.
bedVector
a_blocks
;
GetBedBlocks
(
a
,
a_blocks
);
// find the overlaps between the block in A and B
// last parm is false as a is not a BAM entry
FindBlockedOverlaps
(
a
,
a_blocks
,
hits
,
false
);
}
hits
.
clear
();
}
}
_bedA
->
Close
();
}
else
{
// use the chromsweep algorithm to detect overlaps on the fly.
ChromSweep
sweep
=
ChromSweep
(
_bedA
,
_bedB
,
_sameStrand
,
_diffStrand
,
_overlapFraction
,
_reciprocal
,
false
,
_printHeader
);
pair
<
BED
,
vector
<
BED
>
>
hit_set
;
hit_set
.
second
.
reserve
(
10000
);
while
(
sweep
.
Next
(
hit_set
))
{
if
(
_obeySplits
==
false
)
{
processHits
(
hit_set
.
first
,
hit_set
.
second
);
}
else
{
bedVector
a_blocks
;
GetBedBlocks
(
hit_set
.
first
,
a_blocks
);
FindBlockedOverlaps
(
hit_set
.
first
,
a_blocks
,
hit_set
.
second
,
false
);
}
}
}
}
void
BedIntersect
::
IntersectBam
(
string
bamFile
)
{
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB
=
new
BedFile
(
_bedBFile
);
_bedB
->
loadBedFileIntoMap
();
// create a dummy BED A file for printing purposes if not
// using BAM output.
if
(
_bamOutput
==
false
)
{
_bedA
=
new
BedFile
(
_bedAFile
);
_bedA
->
bedType
=
12
;
}
// open the BAM file
BamReader
reader
;
BamWriter
writer
;
if
(
!
reader
.
Open
(
bamFile
))
{
cerr
<<
"Failed to open BAM file "
<<
bamFile
<<
endl
;
exit
(
1
);
}
// get header & reference information
string
bamHeader
=
reader
.
GetHeaderText
();
RefVector
refs
=
reader
.
GetReferenceData
();
// open a BAM output to stdout if we are writing BAM
if
(
_bamOutput
==
true
)
{
// set compression mode
BamWriter
::
CompressionMode
compressionMode
=
BamWriter
::
Compressed
;
if
(
_isUncompressedBam
)
compressionMode
=
BamWriter
::
Uncompressed
;
writer
.
SetCompressionMode
(
compressionMode
);
// open our BAM writer
writer
.
Open
(
"stdout"
,
bamHeader
,
refs
);
}
vector
<
BED
>
hits
;
// reserve some space
hits
.
reserve
(
100
);
BamAlignment
bam
;
// get each set of alignments for each pair.
while
(
reader
.
GetNextAlignment
(
bam
))
{
// save an unaligned read if -v
if
(
!
bam
.
IsMapped
())
{
if
((
_noHit
==
true
)
&&
(
_bamOutput
==
true
))
writer
.
SaveAlignment
(
bam
);
continue
;