Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
R3
legacy
bedtools2
Commits
b0bc5b7e
Commit
b0bc5b7e
authored
Jun 24, 2015
by
Aaron Quinlan
Browse files
Merge pull request #261 from nkindlon/master
Converted complement to PFM with merge-based approach, sorted input e…
parents
aa11ef9d
f5927cf6
Changes
33
Hide whitespace changes
Inline
Side-by-side
Makefile
View file @
b0bc5b7e
...
...
@@ -36,7 +36,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)
/bed12ToBed6
\
$(SRC_DIR)
/closestFile
\
$(SRC_DIR)
/clusterBed
\
$(SRC_DIR)
/complement
Bed
\
$(SRC_DIR)
/complement
File
\
$(SRC_DIR)
/coverageFile
\
$(SRC_DIR)
/expand
\
$(SRC_DIR)
/fastaFromBed
\
...
...
src/bedtools.cpp
View file @
b0bc5b7e
...
...
@@ -46,7 +46,7 @@ int bedtoigv_main(int argc, char* argv[]);//
int
bedpetobam_main
(
int
argc
,
char
*
argv
[]);
//
void
closest_help
();
int
cluster_main
(
int
argc
,
char
*
argv
[]);
//
int
complement_
main
(
int
argc
,
char
*
argv
[]);
//
void
complement_
help
();
void
coverage_help
();
int
regress_test_main
(
int
argc
,
char
**
argv
);
//
int
expand_main
(
int
argc
,
char
*
argv
[]);
//
...
...
@@ -106,7 +106,6 @@ int main(int argc, char *argv[])
else
if
(
subCmd
==
"window"
)
return
window_main
(
argc
-
1
,
argv
+
1
);
else
if
(
subCmd
==
"genomecov"
)
return
genomecoverage_main
(
argc
-
1
,
argv
+
1
);
else
if
(
subCmd
==
"cluster"
)
return
cluster_main
(
argc
-
1
,
argv
+
1
);
else
if
(
subCmd
==
"complement"
)
return
complement_main
(
argc
-
1
,
argv
+
1
);
else
if
(
subCmd
==
"slop"
)
return
slop_main
(
argc
-
1
,
argv
+
1
);
else
if
(
subCmd
==
"split"
)
return
split_main
(
argc
-
1
,
argv
+
1
);
else
if
(
subCmd
==
"flank"
)
return
flank_main
(
argc
-
1
,
argv
+
1
);
...
...
@@ -307,6 +306,8 @@ void showHelp(const QuickString &subCmd) {
fisher_help
();
}
else
if
(
subCmd
==
"coverage"
)
{
coverage_help
();
}
else
if
(
subCmd
==
"complement"
)
{
complement_help
();
}
}
src/complementBed/complementBed.cpp
deleted
100644 → 0
View file @
aa11ef9d
/*****************************************************************************
complementBed.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "lineFileUtilities.h"
#include "complementBed.h"
BedComplement
::
BedComplement
(
string
&
bedFile
,
string
&
genomeFile
)
{
_bedFile
=
bedFile
;
_genomeFile
=
genomeFile
;
_bed
=
new
BedFile
(
bedFile
);
_genome
=
new
GenomeFile
(
genomeFile
);
}
BedComplement
::~
BedComplement
(
void
)
{
}
//
// Merge overlapping BED entries into a single entry
//
void
BedComplement
::
ComplementBed
()
{
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bed
->
loadBedFileIntoMapNoBin
();
// get a list of the chroms in the user's genome
vector
<
string
>
chromList
=
_genome
->
getChromList
();
// process each chrom in the genome
for
(
size_t
c
=
0
;
c
<
chromList
.
size
();
++
c
)
{
string
currChrom
=
chromList
[
c
];
// create a "bit vector" for the chrom
CHRPOS
currChromSize
=
_genome
->
getChromSize
(
currChrom
);
vector
<
bool
>
chromMasks
(
currChromSize
,
0
);
// mask the chrom for every feature in the BED file
bedVector
::
const_iterator
bItr
=
_bed
->
bedMapNoBin
[
currChrom
].
begin
();
bedVector
::
const_iterator
bEnd
=
_bed
->
bedMapNoBin
[
currChrom
].
end
();
for
(;
bItr
!=
bEnd
;
++
bItr
)
{
if
(
bItr
->
end
>
currChromSize
)
{
cerr
<<
"Warning: end of BED entry exceeds chromosome length. "
<<
"Please correct."
<<
endl
;
_bed
->
reportBedNewLine
(
*
bItr
);
exit
(
1
);
}
// mask all of the positions spanned by this BED entry.
for
(
CHRPOS
b
=
bItr
->
start
;
b
<
bItr
->
end
;
b
++
)
chromMasks
[
b
]
=
1
;
}
// report the unmasked, that is, complemented parts of the chrom
CHRPOS
i
=
0
;
CHRPOS
start
;
while
(
i
<
chromMasks
.
size
())
{
if
(
chromMasks
[
i
]
==
0
)
{
start
=
i
;
while
((
chromMasks
[
i
]
==
0
)
&&
(
i
<
chromMasks
.
size
()))
i
++
;
if
(
start
>
0
)
cout
<<
currChrom
<<
"
\t
"
<<
start
<<
"
\t
"
<<
i
<<
endl
;
else
cout
<<
currChrom
<<
"
\t
"
<<
0
<<
"
\t
"
<<
i
<<
endl
;
}
i
++
;
}
}
}
src/complementBed/complementBed.h
deleted
100644 → 0
View file @
aa11ef9d
/*****************************************************************************
complementBed.h
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "bedFile.h"
#include "GenomeFile.h"
#include <vector>
#include <bitset>
#include <algorithm>
#include <iostream>
#include <fstream>
#include <limits.h>
#include <stdlib.h>
using
namespace
std
;
//************************************************
// Class methods and elements
//************************************************
class
BedComplement
{
public:
// constructor
BedComplement
(
string
&
bedFile
,
string
&
genomeFile
);
// destructor
~
BedComplement
(
void
);
void
ComplementBed
();
private:
string
_bedFile
;
string
_genomeFile
;
BedFile
*
_bed
;
GenomeFile
*
_genome
;
};
src/complementBed/complementMain.cpp
deleted
100644 → 0
View file @
aa11ef9d
/*****************************************************************************
complementBedMain.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "complementBed.h"
#include "version.h"
using
namespace
std
;
// define our program name
#define PROGRAM_NAME "bedtools complement"
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
// function declarations
void
complement_help
(
void
);
int
complement_main
(
int
argc
,
char
*
argv
[])
{
// our configuration variables
bool
showHelp
=
false
;
// input files
string
bedFile
=
"stdin"
;
string
genomeFile
;
bool
haveBed
=
true
;
bool
haveGenome
=
false
;
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
int
parameterLength
=
(
int
)
strlen
(
argv
[
i
]);
if
((
PARAMETER_CHECK
(
"-h"
,
2
,
parameterLength
))
||
(
PARAMETER_CHECK
(
"--help"
,
5
,
parameterLength
)))
{
showHelp
=
true
;
}
}
if
(
showHelp
)
complement_help
();
// do some parsing (all of these parameters require 2 strings)
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
int
parameterLength
=
(
int
)
strlen
(
argv
[
i
]);
if
(
PARAMETER_CHECK
(
"-i"
,
2
,
parameterLength
))
{
if
((
i
+
1
)
<
argc
)
{
bedFile
=
argv
[
i
+
1
];
i
++
;
}
}
else
if
(
PARAMETER_CHECK
(
"-g"
,
2
,
parameterLength
))
{
if
((
i
+
1
)
<
argc
)
{
haveGenome
=
true
;
genomeFile
=
argv
[
i
+
1
];
i
++
;
}
}
else
{
cerr
<<
endl
<<
"*****ERROR: Unrecognized parameter: "
<<
argv
[
i
]
<<
" *****"
<<
endl
<<
endl
;
showHelp
=
true
;
}
}
// make sure we have both input files
if
(
!
haveBed
||
!
haveGenome
)
{
cerr
<<
endl
<<
"*****"
<<
endl
<<
"*****ERROR: Need -i BED file and -g genome file. "
<<
endl
<<
"*****"
<<
endl
;
showHelp
=
true
;
}
if
(
!
showHelp
)
{
BedComplement
*
bc
=
new
BedComplement
(
bedFile
,
genomeFile
);
bc
->
ComplementBed
();
}
else
{
complement_help
();
}
return
0
;
}
void
complement_help
(
void
)
{
cerr
<<
"
\n
Tool: bedtools complement (aka complementBed)"
<<
endl
;
cerr
<<
"Version: "
<<
VERSION
<<
"
\n
"
;
cerr
<<
"Summary: Returns the base pair complement of a feature file."
<<
endl
<<
endl
;
cerr
<<
"Usage: "
<<
PROGRAM_NAME
<<
" [OPTIONS] -i <bed/gff/vcf> -g <genome>"
<<
endl
<<
endl
;
cerr
<<
"Notes: "
<<
endl
;
cerr
<<
"
\t
(1) The genome file should tab delimited and structured as follows:"
<<
endl
;
cerr
<<
"
\t
<chromName><TAB><chromSize>"
<<
endl
<<
endl
;
cerr
<<
"
\t
For example, Human (hg19):"
<<
endl
;
cerr
<<
"
\t
chr1
\t
249250621"
<<
endl
;
cerr
<<
"
\t
chr2
\t
243199373"
<<
endl
;
cerr
<<
"
\t
..."
<<
endl
;
cerr
<<
"
\t
chr18_gl000207_random
\t
4262"
<<
endl
<<
endl
;
cerr
<<
"Tips: "
<<
endl
;
cerr
<<
"
\t
One can use the UCSC Genome Browser's MySQL database to extract"
<<
endl
;
cerr
<<
"
\t
chromosome sizes. For example, H. sapiens:"
<<
endl
<<
endl
;
cerr
<<
"
\t
mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e
\\
"
<<
endl
;
cerr
<<
"
\t\"
select chrom, size from hg19.chromInfo
\"
> hg19.genome"
<<
endl
<<
endl
;
exit
(
1
);
}
src/complement
Bed
/Makefile
→
src/complement
File
/Makefile
View file @
b0bc5b7e
UTILITIES_DIR
=
../utils/
OBJ_DIR
=
../../obj/
BIN_DIR
=
../../bin/
TOOL_DIR
=
../../src/
# -------------------
# define our includes
# -------------------
INCLUDES
=
-I
$(UTILITIES_DIR)
/bedFile/
\
INCLUDES
=
-I
$(UTILITIES_DIR)
/Contexts/
\
-I
$(UTILITIES_DIR)
/general/
\
-I
$(UTILITIES_DIR)
/fileType/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/GenomeFile/
\
-I
$(UTILITIES_DIR)
/lineFileUtilities/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/fileType/
\
-I
$(UTILITIES_DIR)
/BamTools/include
\
-I
$(UTILITIES_DIR)
/version/
-I
$(UTILITIES_DIR)
/BamTools/src
\
-I
$(UTILITIES_DIR)
/BlockedIntervals
\
-I
$(UTILITIES_DIR)
/BamTools-Ancillary
\
-I
$(UTILITIES_DIR)
/FileRecordTools/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/FileReaders/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/Records/
\
-I
$(UTILITIES_DIR)
/KeyListOps/
\
-I
$(UTILITIES_DIR)
/RecordOutputMgr/
\
-I
$(UTILITIES_DIR)
/NewChromsweep
\
-I
$(UTILITIES_DIR)
/BinTree
\
-I
$(UTILITIES_DIR)
/version/
\
-I
$(UTILITIES_DIR)
/ToolBase/
\
-I
$(TOOL_DIR)
/intersectFile/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES
=
complement
Main
.cpp complement
Bed
.cpp complement
Bed
.h
OBJECTS
=
complement
Main
.o complement
Bed
.o
SOURCES
=
complement
Help
.cpp complement
File
.cpp complement
File
.h
OBJECTS
=
complement
Help
.o complement
File
.o
BUILT_OBJECTS
=
$(
patsubst
%,
$(OBJ_DIR)
/%,
$(OBJECTS)
)
PROGRAM
=
complementBed
PROGRAM
=
complementFile
all
:
$(BUILT_OBJECTS)
...
...
@@ -28,11 +41,10 @@ all: $(BUILT_OBJECTS)
$(BUILT_OBJECTS)
:
$(SOURCES)
@
echo
" * compiling"
$
(
*
F
)
.cpp
@
$(CXX)
-c
-o
$@
$
(
*
F
)
.cpp
$(LDFLAGS)
$(CXXFLAGS)
$(INCLUDES)
@
$(CXX)
-c
-o
$@
$
(
*
F
)
.cpp
$(LDFLAGS)
$(CXXFLAGS)
$(DFLAGS)
$(INCLUDES)
clean
:
@
echo
"Cleaning up."
@
rm
-f
$(OBJ_DIR)
/complement
Main
.o
$(OBJ_DIR)
/complement
Bed
.o
@
rm
-f
$(OBJ_DIR)
/complement
Help
.o
$(OBJ_DIR)
/complement
File
.o
.PHONY
:
clean
src/complementFile/complementFile.cpp
0 → 100644
View file @
b0bc5b7e
#include "complementFile.h"
#include "NewGenomeFile.h"
ComplementFile
::
ComplementFile
(
ContextComplement
*
context
)
:
ToolBase
(
context
),
_genomeFile
(
context
->
getGenomeFile
()),
_currStartPos
(
0
),
_outputMgr
(
NULL
),
_chromList
(
_genomeFile
->
getChromList
()),
_currPosInGenomeList
(
-
1
)
{
}
ComplementFile
::~
ComplementFile
()
{
}
bool
ComplementFile
::
init
()
{
_frm
=
static_cast
<
FileRecordMergeMgr
*>
(
upCast
(
_context
)
->
getFile
(
0
));
return
true
;
}
bool
ComplementFile
::
findNext
(
RecordKeyVector
&
hits
)
{
while
(
!
_frm
->
eof
())
{
_frm
->
getNextRecord
(
&
hits
);
if
(
hits
.
getKey
()
==
NULL
)
continue
;
return
true
;
}
return
false
;
}
void
ComplementFile
::
processHits
(
RecordOutputMgr
*
outputMgr
,
RecordKeyVector
&
hits
)
{
_outputMgr
=
outputMgr
;
const
Record
*
rec
=
hits
.
getKey
();
//test for chrom change.
const
QuickString
&
newChrom
=
rec
->
getChrName
();
if
(
_currChrom
!=
newChrom
)
{
outPutLastRecordInPrevChrom
();
//if record's chrom doesn't exist in the genome file, do
//nothing
if
(
!
fastForward
(
newChrom
))
return
;
//we've switched to a new chromosome that is in both the DB
//and genome file.
_currStartPos
=
0
;
_currChrom
=
newChrom
;
_outRecord
.
setChrName
(
newChrom
);
}
int
endPos
=
rec
->
getStartPos
();
printRecord
(
endPos
);
_currStartPos
=
rec
->
getEndPos
();
}
void
ComplementFile
::
cleanupHits
(
RecordKeyVector
&
hits
)
{
_frm
->
deleteMergedRecord
(
hits
);
}
bool
ComplementFile
::
finalizeCalculations
()
{
outPutLastRecordInPrevChrom
();
fastForward
(
""
);
return
true
;
}
void
ComplementFile
::
outPutLastRecordInPrevChrom
()
{
const
QuickString
&
chrom
=
_outRecord
.
getChrName
();
//do nothing if triggered by first record in DB. At this point,
//there was no prev chrom, so nothing is stored in the output Record yet.
if
(
chrom
.
empty
())
return
;
int
maxChromSize
=
_genomeFile
->
getChromSize
(
chrom
);
if
(
_currStartPos
>=
maxChromSize
)
return
;
//chrom already covered and reported.
printRecord
(
maxChromSize
);
}
bool
ComplementFile
::
fastForward
(
const
QuickString
&
newChrom
)
{
if
(
!
newChrom
.
empty
()
&&
!
_genomeFile
->
hasChrom
(
newChrom
))
return
false
;
int
i
=
_currPosInGenomeList
+
1
;
while
(
i
<
(
int
)
_chromList
.
size
()
&&
_chromList
[
i
]
!=
newChrom
)
{
_outRecord
.
setChrName
(
_chromList
[
i
]);
_currStartPos
=
0
;
int
endPos
=
_genomeFile
->
getChromSize
(
_chromList
[
i
]);
printRecord
(
endPos
);
i
++
;
}
if
(
newChrom
.
empty
())
return
true
;
if
(
i
==
(
int
)
_chromList
.
size
())
{
//reached end but didn't find new chrom. Genome and DB are not sorted in same order.
cerr
<<
"***** ERROR: genome file and input file are not sorted in same order. Exiting..."
<<
endl
;
exit
(
1
);
//this is where we'd return false if we weren't exiting.
}
_currChrom
=
newChrom
;
_currPosInGenomeList
=
i
;
return
true
;
}
void
ComplementFile
::
printRecord
(
int
endPos
)
{
_outRecord
.
setStartPos
(
_currStartPos
);
QuickString
startStr
;
startStr
.
append
(
_currStartPos
);
_outRecord
.
setStartPosStr
(
startStr
);
_outRecord
.
setEndPos
(
endPos
);
QuickString
endStr
;
endStr
.
append
(
endPos
);
_outRecord
.
setEndPosStr
(
endStr
);
_outputMgr
->
printRecord
(
&
_outRecord
);
_outputMgr
->
newline
();
}
src/complementFile/complementFile.h
0 → 100644
View file @
b0bc5b7e
/*
* complementFile.h
*
* Created on: Jun 16, 2015
* Author: nek3d
*/
#ifndef COMPLEMENTFILE_H_
#define COMPLEMENTFILE_H_
#include "ToolBase.h"
#include "ContextComplement.h"
class
BlockMgr
;
class
BinTree
;
class
NewGenomeFile
;
class
RecordOutputMgr
;
class
ComplementFile
:
public
ToolBase
{
public:
ComplementFile
(
ContextComplement
*
context
);
virtual
~
ComplementFile
();
virtual
bool
init
();
virtual
bool
findNext
(
RecordKeyVector
&
hits
);
virtual
void
processHits
(
RecordOutputMgr
*
outputMgr
,
RecordKeyVector
&
hits
);
virtual
void
cleanupHits
(
RecordKeyVector
&
hits
);
virtual
bool
finalizeCalculations
();
virtual
void
giveFinalReport
(
RecordOutputMgr
*
outputMgr
)
{}
protected:
FileRecordMergeMgr
*
_frm
;
Bed3Interval
_outRecord
;
QuickString
_currChrom
;
const
NewGenomeFile
*
_genomeFile
;
int
_currStartPos
;
RecordOutputMgr
*
_outputMgr
;
const
vector
<
QuickString
>
&
_chromList
;
int
_currPosInGenomeList
;
virtual
ContextComplement
*
upCast
(
ContextBase
*
context
)
{
return
static_cast
<
ContextComplement
*>
(
context
);
}
void
outPutLastRecordInPrevChrom
();
bool
fastForward
(
const
QuickString
&
newChrom
);
void
printRecord
(
int
endPos
);
};
#endif
/* COMPLEMENTFILE_H_ */
src/complementFile/complementHelp.cpp
0 → 100644
View file @
b0bc5b7e
/*
* subtractMain.cpp
*
* Created on: Feb 19, 2015
* Author: nek3d
*/
#include "CommonHelp.h"
void
complement_help
(
void
)
{
cerr
<<
"
\n
Tool: bedtools complement (aka complementBed)"
<<
endl
;
cerr
<<
"Version: "
<<
VERSION
<<
"
\n
"
;
cerr
<<
"Summary: Returns the base pair complement of a feature file."
<<
endl
<<
endl
;
cerr
<<
"Usage: "
<<
"bedtools complement"
<<
" [OPTIONS] -i <bed/gff/vcf> -g <genome>"
<<
endl
<<
endl
;
cerr
<<
"Notes: "
<<
endl
;
cerr
<<
"
\t
(1) The genome file should tab delimited and structured as follows:"
<<
endl
;
cerr
<<
"
\t
<chromName><TAB><chromSize>"
<<
endl
<<
endl
;
cerr
<<
"
\t
For example, Human (hg19):"
<<
endl
;
cerr
<<
"
\t
chr1
\t
249250621"
<<
endl
;
cerr
<<
"
\t
chr2
\t
243199373"
<<
endl
;
cerr
<<
"
\t
..."
<<
endl
;
cerr
<<
"
\t
chr18_gl000207_random
\t
4262"
<<
endl
<<
endl
;
cerr
<<
"Tips: "
<<
endl
;
cerr
<<
"
\t
One can use the UCSC Genome Browser's MySQL database to extract"
<<
endl
;
cerr
<<
"
\t
chromosome sizes. For example, H. sapiens:"
<<
endl
<<
endl
;
cerr
<<
"
\t
mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e
\\
"
<<
endl
;
cerr
<<
"
\t\"
select chrom, size from hg19.chromInfo
\"
> hg19.genome"
<<
endl
<<
endl
;
exit
(
1
);
}
src/utils/Contexts/ContextComplement.cpp
0 → 100644
View file @
b0bc5b7e
/*
* ContextComplement.cpp
*
* Created on: Feb 19, 2015
* Author: nek3d
*/
#include "ContextComplement.h"
ContextComplement
::
ContextComplement
()
{
setSortedInput
(
true
);
setUseMergedIntervals
(
true
);
setExplicitBedOutput
(
true
);
}
ContextComplement
::~
ContextComplement
()
{
}
bool
ContextComplement
::
isValidState
()
{
if
(
!
hasGenomeFile
())
{
_errorMsg
=
"
\n
***** ERROR: no -g genome file provided. *****"
;
return
false
;
}
return
ContextBase
::
isValidState
();
}
src/utils/Contexts/ContextComplement.h
0 → 100644
View file @
b0bc5b7e
/*
* ContextComplement.h
*
* Created on: Feb 19, 2015
* Author: nek3d
*/
#ifndef CONTEXTCOMPLEMENT_H_
#define CONTEXTCOMPLEMENT_H_