Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
R3
legacy
bedtools2
Commits
31878f3c
Commit
31878f3c
authored
Nov 06, 2014
by
arq5x
Browse files
merge conflicts
parents
5b47f91c
7b2a5605
Changes
31
Expand all
Hide whitespace changes
Inline
Side-by-side
Makefile
View file @
31878f3c
...
...
@@ -31,7 +31,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)
/bedpeToBam
\
$(SRC_DIR)
/bedToIgv
\
$(SRC_DIR)
/bed12ToBed6
\
$(SRC_DIR)
/closest
Bed
\
$(SRC_DIR)
/closest
File
\
$(SRC_DIR)
/clusterBed
\
$(SRC_DIR)
/complementBed
\
$(SRC_DIR)
/coverageBed
\
...
...
src/bedtools.cpp
View file @
31878f3c
...
...
@@ -141,9 +141,9 @@ int main(int argc, char *argv[])
else
if
(
sub_cmd
==
"makewindows"
)
return
windowmaker_main
(
argc
-
1
,
argv
+
1
);
else
if
(
sub_cmd
==
"groupby"
)
return
groupby_main
(
argc
-
1
,
argv
+
1
);
else
if
(
sub_cmd
==
"expand"
)
return
expand_main
(
argc
-
1
,
argv
+
1
);
else
if
(
sub_cmd
==
"sample"
)
return
sample_main
(
argc
-
1
,
argv
+
1
);
else
if
(
sub_cmd
==
"neksb1"
)
return
nek_sandbox1_main
(
argc
-
1
,
argv
+
1
);
else
if
(
sub_cmd
==
"regresstest"
)
return
regress_test_main
(
argc
,
argv
);
//this command does need all the orig args.
else
if
(
sub_cmd
==
"sample"
)
return
sample_main
(
argc
-
1
,
argv
+
1
);
else
if
(
sub_cmd
==
"neksb1"
)
return
nek_sandbox1_main
(
argc
-
1
,
argv
+
1
);
else
if
(
sub_cmd
==
"regresstest"
)
return
regress_test_main
(
argc
,
argv
);
//this command does need all the orig args.
// help
else
if
(
sub_cmd
==
"-h"
||
sub_cmd
==
"--help"
||
sub_cmd
==
"-help"
)
...
...
src/closestBed/closestBed.cpp
deleted
100644 → 0
View file @
5b47f91c
/*****************************************************************************
closestBed.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "lineFileUtilities.h"
#include "closestBed.h"
const
int
MAXSLOP
=
256000000
;
// 2*MAXSLOP = 512 megabases.
// We don't want to keep looking if we
// can't find a nearby feature within 512 Mb.
const
int
SLOPGROWTH
=
2048000
;
/*
Constructor
*/
BedClosest
::
BedClosest
(
string
&
bedAFile
,
string
&
bedBFile
,
bool
sameStrand
,
bool
diffStrand
,
string
&
tieMode
,
bool
reportDistance
,
bool
signDistance
,
string
&
_strandedDistMode
,
bool
ignoreOverlaps
,
bool
ignoreUpstream
,
bool
ignoreDownstream
,
bool
printHeader
,
bool
diffNames
)
:
_bedAFile
(
bedAFile
)
,
_bedBFile
(
bedBFile
)
,
_tieMode
(
tieMode
)
,
_sameStrand
(
sameStrand
)
,
_diffStrand
(
diffStrand
)
,
_reportDistance
(
reportDistance
)
,
_signDistance
(
signDistance
)
,
_strandedDistMode
(
_strandedDistMode
)
,
_ignoreOverlaps
(
ignoreOverlaps
)
,
_ignoreUpstream
(
ignoreUpstream
)
,
_ignoreDownstream
(
ignoreDownstream
)
,
_printHeader
(
printHeader
)
,
_diffNames
(
diffNames
)
{
_bedA
=
new
BedFile
(
_bedAFile
);
_bedB
=
new
BedFile
(
_bedBFile
);
FindClosestBed
();
}
/*
Destructor
*/
BedClosest
::~
BedClosest
(
void
)
{
}
void
BedClosest
::
ReportClosestNotFound
(
const
BED
&
a
)
{
_bedA
->
reportBedTab
(
a
);
if
(
_reportDistance
==
true
)
{
_bedB
->
reportNullBedTab
();
cout
<<
-
1
<<
endl
;
}
else
_bedB
->
reportNullBedNewLine
();
}
void
BedClosest
::
FindWindowOverlaps
(
BED
&
a
,
vector
<
BED
>
&
hits
)
{
int
slop
=
0
;
// start out just looking for overlaps
// within the current bin (~128Kb)
// update the current feature's start and end
CHRPOS
aFudgeStart
=
0
;
CHRPOS
aFudgeEnd
;
int
numOverlaps
=
0
;
vector
<
BED
>
closestB
;
CHRPOS
minDistance
=
INT_MAX
;
int32_t
curDistance
=
INT_MAX
;
vector
<
int32_t
>
distances
;
// is there at least one feature in B on the same chrom
// as the current A feature?
if
(
_bedB
->
bedMap
.
find
(
a
.
chrom
)
!=
_bedB
->
bedMap
.
end
())
{
while
((
numOverlaps
==
0
)
&&
(
slop
<=
MAXSLOP
))
{
// add some slop (starting at 0 bases) to a in hopes
// of finding a hit in B
if
((
static_cast
<
int
>
(
a
.
start
)
-
slop
)
>
0
)
aFudgeStart
=
a
.
start
-
slop
;
else
aFudgeStart
=
0
;
if
((
static_cast
<
int
>
(
a
.
start
)
+
slop
)
<
(
2
*
MAXSLOP
))
aFudgeEnd
=
a
.
end
+
slop
;
else
aFudgeEnd
=
2
*
MAXSLOP
;
// THE HEAVY LIFTING
// search for hits with the current slop added
_bedB
->
allHits
(
a
.
chrom
,
aFudgeStart
,
aFudgeEnd
,
a
.
strand
,
hits
,
_sameStrand
,
_diffStrand
,
0.0
,
false
);
vector
<
BED
>::
const_iterator
h
=
hits
.
begin
();
vector
<
BED
>::
const_iterator
hitsEnd
=
hits
.
end
();
for
(;
h
!=
hitsEnd
;
++
h
)
{
// skip the hit if the user doesn't want to allow same names.
if
((
_diffNames
==
true
)
&&
(
a
.
name
==
h
->
name
))
continue
;
// do the actual features overlap?
int
s
=
max
(
a
.
start
,
h
->
start
);
int
e
=
min
(
a
.
end
,
h
->
end
);
int
overlapBases
=
(
e
-
s
);
// make sure we allow overlapping features.
if
((
overlapBases
>
0
)
&&
(
_ignoreOverlaps
==
true
))
continue
;
else
numOverlaps
++
;
// there is overlap. make sure we allow overlapping features ()
if
(
overlapBases
>
0
)
{
curDistance
=
0
;
if
(
curDistance
<
(
int32_t
)
minDistance
)
{
closestB
.
clear
();
distances
.
clear
();
}
minDistance
=
0
;
closestB
.
push_back
(
*
h
);
distances
.
push_back
(
0
);
}
// the hit is to the "left" of A
else
if
(
h
->
end
<=
a
.
start
)
{
curDistance
=
(
a
.
start
-
h
->
end
)
+
1
;
if
(
_signDistance
)
{
if
((
_strandedDistMode
==
"ref"
)
||
(
_strandedDistMode
==
"a"
&&
a
.
strand
!=
"-"
)
||
(
_strandedDistMode
==
"b"
&&
h
->
strand
==
"-"
))
{
// hit is "upstream" of A
if
(
_ignoreUpstream
)
{
numOverlaps
--
;
continue
;
}
else
{
curDistance
=
-
curDistance
;
}
}
else
if
(
_ignoreDownstream
)
{
numOverlaps
--
;
continue
;
}
}
if
(
abs
(
curDistance
)
<
(
int32_t
)
minDistance
)
{
minDistance
=
abs
(
curDistance
);
closestB
.
clear
();
closestB
.
push_back
(
*
h
);
distances
.
clear
();
distances
.
push_back
(
curDistance
);
}
else
if
(
abs
(
curDistance
)
==
(
int32_t
)
minDistance
)
{
minDistance
=
abs
(
curDistance
);
closestB
.
push_back
(
*
h
);
distances
.
push_back
(
curDistance
);
}
}
// the hit is to the "right" of A
else
if
(
h
->
start
>=
a
.
end
)
{
curDistance
=
(
h
->
start
-
a
.
end
)
+
1
;
if
(
_signDistance
)
{
if
((
_strandedDistMode
==
"a"
&&
a
.
strand
==
"-"
)
||
(
_strandedDistMode
==
"b"
&&
h
->
strand
!=
"-"
))
{
// hit is "upstream" of A
if
(
_ignoreUpstream
)
{
numOverlaps
--
;
continue
;
}
else
{
curDistance
=
-
curDistance
;
}
}
else
if
(
_ignoreDownstream
){
numOverlaps
--
;
continue
;
}
}
if
(
abs
(
curDistance
)
<
(
int32_t
)
minDistance
)
{
minDistance
=
abs
(
curDistance
);
closestB
.
clear
();
closestB
.
push_back
(
*
h
);
distances
.
clear
();
distances
.
push_back
(
curDistance
);
}
else
if
(
abs
(
curDistance
)
==
(
int32_t
)
minDistance
)
{
minDistance
=
abs
(
curDistance
);
closestB
.
push_back
(
*
h
);
distances
.
push_back
(
curDistance
);
}
}
}
// if no overlaps were found, we'll widen the range
// by SLOPGROWTH in each direction and search again.
slop
+=
SLOPGROWTH
;
}
// while ((numOverlaps == 0) && (slop <= MAXSLOP))
// report the closest feature(s) in B to the current A feature.
// obey the user's reporting request (_tieMode)
if
(
numOverlaps
>
0
)
{
if
(
closestB
.
size
()
==
1
||
(
_tieMode
==
"first"
&&
closestB
.
size
()
>
0
))
{
_bedA
->
reportBedTab
(
a
);
if
(
_reportDistance
==
true
)
{
_bedB
->
reportBedTab
(
closestB
[
0
]);
cout
<<
distances
[
0
]
<<
endl
;
}
else
_bedB
->
reportBedNewLine
(
closestB
[
0
]);
}
else
{
if
(
_tieMode
==
"all"
)
{
size_t
i
=
0
;
vector
<
BED
>::
iterator
b
=
closestB
.
begin
();
for
(;
b
!=
closestB
.
end
();
++
b
)
{
_bedA
->
reportBedTab
(
a
);
if
(
_reportDistance
==
true
)
{
_bedB
->
reportBedTab
(
*
b
);
cout
<<
distances
[
i
++
]
<<
endl
;
}
else
_bedB
->
reportBedNewLine
(
*
b
);
}
}
else
if
(
_tieMode
==
"last"
&&
closestB
.
size
()
>
0
)
{
_bedA
->
reportBedTab
(
a
);
if
(
_reportDistance
==
true
)
{
_bedB
->
reportBedTab
(
closestB
[
closestB
.
size
()
-
1
]);
cout
<<
distances
[
distances
.
size
()
-
1
]
<<
endl
;
}
else
_bedB
->
reportBedNewLine
(
closestB
[
closestB
.
size
()
-
1
]);
}
}
}
// there were B features on the same chrom as A, but there were
// none that met the user's criteria (e.g., that it be on the
// same chrom)
else
if
(
numOverlaps
==
0
)
{
ReportClosestNotFound
(
a
);
}
}
// there is no feature in B on the same chromosome as A
else
{
ReportClosestNotFound
(
a
);
}
}
void
BedClosest
::
FindClosestBed
()
{
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB
->
loadBedFileIntoMap
();
BED
a
;
vector
<
BED
>
hits
;
// vector of potential hits
hits
.
reserve
(
100
);
_bedA
->
Open
();
// report A's header first if asked.
if
(
_printHeader
==
true
)
{
_bedA
->
PrintHeader
();
}
// process each entry in A in search of the closest feature in B
while
(
_bedA
->
GetNextBed
(
a
))
{
if
(
_bedA
->
_status
==
BED_VALID
)
{
FindWindowOverlaps
(
a
,
hits
);
hits
.
clear
();
}
}
_bedA
->
Close
();
}
// END ClosestBed
src/closestBed/closestBed.h
deleted
100644 → 0
View file @
5b47f91c
/*****************************************************************************
closestBed.h
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#ifndef CLOSESTBED_H
#define CLOSESTBED_H
#include "bedFile.h"
#include <vector>
#include <iostream>
#include <fstream>
using
namespace
std
;
//************************************************
// Class methods and elements
//************************************************
class
BedClosest
{
public:
// constructor
BedClosest
(
string
&
bedAFile
,
string
&
bedBFile
,
bool
sameStrand
,
bool
diffStrand
,
string
&
tieMode
,
bool
reportDistance
,
bool
signDistance
,
string
&
strandedDistMode
,
bool
ignoreOverlaps
,
bool
ignoreUpstream
,
bool
ignoreDownstream
,
bool
printHeader
,
bool
diffNames
);
// destructor
~
BedClosest
(
void
);
// find the closest feature in B to A
void
FindClosestBed
();
private:
// data
string
_bedAFile
;
string
_bedBFile
;
string
_tieMode
;
bool
_sameStrand
;
bool
_diffStrand
;
bool
_reportDistance
;
bool
_signDistance
;
string
_strandedDistMode
;
bool
_ignoreOverlaps
;
bool
_ignoreUpstream
;
bool
_ignoreDownstream
;
bool
_printHeader
;
bool
_diffNames
;
BedFile
*
_bedA
,
*
_bedB
;
// methods
void
FindWindowOverlaps
(
BED
&
,
vector
<
BED
>
&
);
void
ReportClosestNotFound
(
const
BED
&
a
);
};
#endif
/* CLOSEST_H */
src/closest
Bed
/Makefile
→
src/closest
File
/Makefile
View file @
31878f3c
UTILITIES_DIR
=
../utils/
OBJ_DIR
=
../../obj/
BIN_DIR
=
../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES
=
-I
$(UTILITIES_DIR)
/bedFile/
\
-I
$(UTILITIES_DIR)
/lineFileUtilities/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/fileType/
\
-I
$(UTILITIES_DIR)
/version/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES
=
closestMain.cpp closestBed.cpp closestBed.h
OBJECTS
=
closestMain.o closestBed.o
BUILT_OBJECTS
=
$(
patsubst
%,
$(OBJ_DIR)
/%,
$(OBJECTS)
)
all
:
$(BUILT_OBJECTS)
.PHONY
:
all
$(BUILT_OBJECTS)
:
$(SOURCES)
@
echo
" * compiling"
$
(
*
F
)
.cpp
@
$(CXX)
-c
-o
$@
$
(
*
F
)
.cpp
$(LDFLAGS)
$(CXXFLAGS)
$(INCLUDES)
clean
:
@
echo
"Cleaning up."
@
rm
-f
$(OBJ_DIR)
/closestMain.o
$(OBJ_DIR)
/closestBed.o
.PHONY
:
clean
UTILITIES_DIR
=
../utils/
OBJ_DIR
=
../../obj/
BIN_DIR
=
../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES
=
-I
$(UTILITIES_DIR)
/Contexts/
\
-I$(UTILITIES_DIR)/general/
\
-I$(UTILITIES_DIR)/fileType/
\
-I$(UTILITIES_DIR)/gzstream/
\
-I$(UTILITIES_DIR)/GenomeFile/
\
-I$(UTILITIES_DIR)/BamTools/
include
\
-I$(UTILITIES_DIR)/BamTools/src
\
-I$(UTILITIES_DIR)/BlockedIntervals
\
-I$(UTILITIES_DIR)/BamTools-Ancillary
\
-I$(UTILITIES_DIR)/FileRecordTools/
\
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/
\
-I$(UTILITIES_DIR)/FileRecordTools/Records/
\
-I$(UTILITIES_DIR)/KeyListOps/
\
-I$(UTILITIES_DIR)/RecordOutputMgr/
\
-I$(UTILITIES_DIR)/NewChromsweep
\
-I$(UTILITIES_DIR)/BinTree
\
-I$(UTILITIES_DIR)/version/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES
=
closestMain.cpp closestFile.cpp closestFile.h
OBJECTS
=
closestMain.o closestFile.o
BUILT_OBJECTS
=
$(
patsubst
%,
$(OBJ_DIR)
/%,
$(OBJECTS)
)
all
:
$(BUILT_OBJECTS)
.PHONY
:
all
$(BUILT_OBJECTS)
:
$(SOURCES)
@
echo
" * compiling"
$
(
*
F
)
.cpp
@
$(CXX)
-c
-o
$@
$
(
*
F
)
.cpp
$(LDFLAGS)
$(CXXFLAGS)
$(INCLUDES)
clean
:
@
echo
"Cleaning up."
@
rm
-f
$(OBJ_DIR)
/closestMain.o
$(OBJ_DIR)
/closestFile.o
.PHONY
:
clean
src/closestFile/closestFile.cpp
0 → 100644
View file @
31878f3c
/*
* newClosestFile.cpp
*
* Created on: Sep 25, 2014
* Author: nek3d
*/
#include "FileRecordMgr.h"
#include "RecordOutputMgr.h"
#include "closestFile.h"
#include "CloseSweep.h"
ClosestFile
::
ClosestFile
(
ContextClosest
*
context
)
:
_context
(
context
),
_recordOutputMgr
(
NULL
)
{
_recordOutputMgr
=
new
RecordOutputMgr
();
_recordOutputMgr
->
init
(
_context
);
}
ClosestFile
::~
ClosestFile
()
{
delete
_recordOutputMgr
;
}
bool
ClosestFile
::
getClosest
()
{
CloseSweep
sweep
(
_context
);
if
(
!
sweep
.
init
())
{
return
false
;
}
RecordKeyVector
hitSet
;
while
(
sweep
.
next
(
hitSet
))
{
if
(
_context
->
reportDistance
())
{
_recordOutputMgr
->
printClosest
(
hitSet
,
&
(
sweep
.
getDistances
()));
}
else
{
_recordOutputMgr
->
printClosest
(
hitSet
,
NULL
);
}
}
return
true
;
}
src/closestFile/closestFile.h
0 → 100644
View file @
31878f3c
/*
* newClosestFile.h
*
* Created on: Sep 25, 2014
* Author: nek3d
*/
#ifndef NEWCLOSESTFILE_H_
#define NEWCLOSESTFILE_H_
#include "ContextClosest.h"
using
namespace
std
;
class
RecordOutputMgr
;
class
ClosestFile
{
public:
ClosestFile
(
ContextClosest
*
context
);
~
ClosestFile
(
void
);
bool
getClosest
();
private:
ContextClosest
*
_context
;
RecordOutputMgr
*
_recordOutputMgr
;
};
#endif
/* NEWCLOSESTFILE_H_ */
src/closest
Bed
/closestMain.cpp
→
src/closest
File
/closestMain.cpp
View file @
31878f3c
/*****************************************************************************
closestMain.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "closestBed.h"
#include "version.h"
using
namespace
std
;
// define our program name
#define PROGRAM_NAME "bedtools closest"
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
// function declarations
void
closest_help
(
void
);
int
closest_main
(
int
argc
,
char
*
argv
[])
{
// our configuration variables
bool
showHelp
=
false
;
// input files
string
bedAFile
;
string
bedBFile
;
string
tieMode
=
"all"
;
string
strandedDistMode
=
""
;
bool
haveBedA
=
false
;
bool
haveBedB
=
false
;
bool
haveTieMode
=
false
;
bool
sameStrand
=
false
;
bool
diffStrand
=
false
;
bool
ignoreOverlaps
=
false
;
bool
ignoreUpstream
=
false
;
bool
ignoreDownstream
=
false
;
bool
reportDistance
=
false
;
bool
signDistance
=
false
;
bool
haveStrandedDistMode
=
false
;
bool
printHeader
=
false
;
bool
diffNames
=
false
;
// check to see if we should print out some help
if
(
argc
<=
1
)
showHelp
=
true
;
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
int
parameterLength
=
(
int
)
strlen
(
argv
[
i
]);
if
(
(
PARAMETER_CHECK
(
"-h"
,
2
,
parameterLength
))
||
(
PARAMETER_CHECK
(
"--help"
,
5
,
parameterLength
)))
{
showHelp
=
true
;
}
}
if
(
showHelp
)
closest_help
();
// do some parsing (all of these parameters require 2 strings)
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
int
parameterLength
=
(
int
)
strlen
(
argv
[
i
]);
if
(
PARAMETER_CHECK
(
"-a"
,
2
,
parameterLength
))
{
if
((
i
+
1
)
<
argc
)
{
haveBedA
=
true
;
bedAFile
=
argv
[
i
+
1
];
i
++
;
}
}
else
if
(
PARAMETER_CHECK
(
"-b"
,
2
,
parameterLength
))
{
if
((
i
+
1
)
<
argc
)
{
haveBedB
=
true
;