Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
R3
legacy
bedtools2
Commits
85818904
Commit
85818904
authored
Jun 17, 2015
by
Neil Kindlon
Browse files
Converted complement to PFM with merge-based approach, sorted input enforced
parent
16d4db0d
Changes
18
Show whitespace changes
Inline
Side-by-side
Makefile
View file @
85818904
...
...
@@ -36,7 +36,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)
/bed12ToBed6
\
$(SRC_DIR)
/closestFile
\
$(SRC_DIR)
/clusterBed
\
$(SRC_DIR)
/complement
Bed
\
$(SRC_DIR)
/complement
File
\
$(SRC_DIR)
/coverageFile
\
$(SRC_DIR)
/expand
\
$(SRC_DIR)
/fastaFromBed
\
...
...
src/bedtools.cpp
View file @
85818904
...
...
@@ -46,7 +46,7 @@ int bedtoigv_main(int argc, char* argv[]);//
int
bedpetobam_main
(
int
argc
,
char
*
argv
[]);
//
void
closest_help
();
int
cluster_main
(
int
argc
,
char
*
argv
[]);
//
int
complement_
main
(
int
argc
,
char
*
argv
[]);
//
void
complement_
help
();
void
coverage_help
();
int
regress_test_main
(
int
argc
,
char
**
argv
);
//
int
expand_main
(
int
argc
,
char
*
argv
[]);
//
...
...
@@ -106,7 +106,6 @@ int main(int argc, char *argv[])
else
if
(
subCmd
==
"window"
)
return
window_main
(
argc
-
1
,
argv
+
1
);
else
if
(
subCmd
==
"genomecov"
)
return
genomecoverage_main
(
argc
-
1
,
argv
+
1
);
else
if
(
subCmd
==
"cluster"
)
return
cluster_main
(
argc
-
1
,
argv
+
1
);
else
if
(
subCmd
==
"complement"
)
return
complement_main
(
argc
-
1
,
argv
+
1
);
else
if
(
subCmd
==
"slop"
)
return
slop_main
(
argc
-
1
,
argv
+
1
);
else
if
(
subCmd
==
"split"
)
return
split_main
(
argc
-
1
,
argv
+
1
);
else
if
(
subCmd
==
"flank"
)
return
flank_main
(
argc
-
1
,
argv
+
1
);
...
...
@@ -307,6 +306,8 @@ void showHelp(const QuickString &subCmd) {
fisher_help
();
}
else
if
(
subCmd
==
"coverage"
)
{
coverage_help
();
}
else
if
(
subCmd
==
"complement"
)
{
complement_help
();
}
}
src/complementBed/complementBed.cpp
deleted
100644 → 0
View file @
16d4db0d
/*****************************************************************************
complementBed.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include
"lineFileUtilities.h"
#include
"complementBed.h"
BedComplement
::
BedComplement
(
string
&
bedFile
,
string
&
genomeFile
)
{
_bedFile
=
bedFile
;
_genomeFile
=
genomeFile
;
_bed
=
new
BedFile
(
bedFile
);
_genome
=
new
GenomeFile
(
genomeFile
);
}
BedComplement
::~
BedComplement
(
void
)
{
}
//
// Merge overlapping BED entries into a single entry
//
void
BedComplement
::
ComplementBed
()
{
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bed
->
loadBedFileIntoMapNoBin
();
// get a list of the chroms in the user's genome
vector
<
string
>
chromList
=
_genome
->
getChromList
();
// process each chrom in the genome
for
(
size_t
c
=
0
;
c
<
chromList
.
size
();
++
c
)
{
string
currChrom
=
chromList
[
c
];
// create a "bit vector" for the chrom
CHRPOS
currChromSize
=
_genome
->
getChromSize
(
currChrom
);
vector
<
bool
>
chromMasks
(
currChromSize
,
0
);
// mask the chrom for every feature in the BED file
bedVector
::
const_iterator
bItr
=
_bed
->
bedMapNoBin
[
currChrom
].
begin
();
bedVector
::
const_iterator
bEnd
=
_bed
->
bedMapNoBin
[
currChrom
].
end
();
for
(;
bItr
!=
bEnd
;
++
bItr
)
{
if
(
bItr
->
end
>
currChromSize
)
{
cerr
<<
"Warning: end of BED entry exceeds chromosome length. "
<<
"Please correct."
<<
endl
;
_bed
->
reportBedNewLine
(
*
bItr
);
exit
(
1
);
}
// mask all of the positions spanned by this BED entry.
for
(
CHRPOS
b
=
bItr
->
start
;
b
<
bItr
->
end
;
b
++
)
chromMasks
[
b
]
=
1
;
}
// report the unmasked, that is, complemented parts of the chrom
CHRPOS
i
=
0
;
CHRPOS
start
;
while
(
i
<
chromMasks
.
size
())
{
if
(
chromMasks
[
i
]
==
0
)
{
start
=
i
;
while
((
chromMasks
[
i
]
==
0
)
&&
(
i
<
chromMasks
.
size
()))
i
++
;
if
(
start
>
0
)
cout
<<
currChrom
<<
"
\t
"
<<
start
<<
"
\t
"
<<
i
<<
endl
;
else
cout
<<
currChrom
<<
"
\t
"
<<
0
<<
"
\t
"
<<
i
<<
endl
;
}
i
++
;
}
}
}
src/complementBed/complementBed.h
deleted
100644 → 0
View file @
16d4db0d
/*****************************************************************************
complementBed.h
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include
"bedFile.h"
#include
"GenomeFile.h"
#include
<vector>
#include
<bitset>
#include
<algorithm>
#include
<iostream>
#include
<fstream>
#include
<limits.h>
#include
<stdlib.h>
using
namespace
std
;
//************************************************
// Class methods and elements
//************************************************
class
BedComplement
{
public:
// constructor
BedComplement
(
string
&
bedFile
,
string
&
genomeFile
);
// destructor
~
BedComplement
(
void
);
void
ComplementBed
();
private:
string
_bedFile
;
string
_genomeFile
;
BedFile
*
_bed
;
GenomeFile
*
_genome
;
};
src/complementBed/complementMain.cpp
deleted
100644 → 0
View file @
16d4db0d
/*****************************************************************************
complementBedMain.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include
"complementBed.h"
#include
"version.h"
using
namespace
std
;
// define our program name
#define PROGRAM_NAME "bedtools complement"
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
// function declarations
void
complement_help
(
void
);
int
complement_main
(
int
argc
,
char
*
argv
[])
{
// our configuration variables
bool
showHelp
=
false
;
// input files
string
bedFile
=
"stdin"
;
string
genomeFile
;
bool
haveBed
=
true
;
bool
haveGenome
=
false
;
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
int
parameterLength
=
(
int
)
strlen
(
argv
[
i
]);
if
((
PARAMETER_CHECK
(
"-h"
,
2
,
parameterLength
))
||
(
PARAMETER_CHECK
(
"--help"
,
5
,
parameterLength
)))
{
showHelp
=
true
;
}
}
if
(
showHelp
)
complement_help
();
// do some parsing (all of these parameters require 2 strings)
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
int
parameterLength
=
(
int
)
strlen
(
argv
[
i
]);
if
(
PARAMETER_CHECK
(
"-i"
,
2
,
parameterLength
))
{
if
((
i
+
1
)
<
argc
)
{
bedFile
=
argv
[
i
+
1
];
i
++
;
}
}
else
if
(
PARAMETER_CHECK
(
"-g"
,
2
,
parameterLength
))
{
if
((
i
+
1
)
<
argc
)
{
haveGenome
=
true
;
genomeFile
=
argv
[
i
+
1
];
i
++
;
}
}
else
{
cerr
<<
endl
<<
"*****ERROR: Unrecognized parameter: "
<<
argv
[
i
]
<<
" *****"
<<
endl
<<
endl
;
showHelp
=
true
;
}
}
// make sure we have both input files
if
(
!
haveBed
||
!
haveGenome
)
{
cerr
<<
endl
<<
"*****"
<<
endl
<<
"*****ERROR: Need -i BED file and -g genome file. "
<<
endl
<<
"*****"
<<
endl
;
showHelp
=
true
;
}
if
(
!
showHelp
)
{
BedComplement
*
bc
=
new
BedComplement
(
bedFile
,
genomeFile
);
bc
->
ComplementBed
();
}
else
{
complement_help
();
}
return
0
;
}
void
complement_help
(
void
)
{
cerr
<<
"
\n
Tool: bedtools complement (aka complementBed)"
<<
endl
;
cerr
<<
"Version: "
<<
VERSION
<<
"
\n
"
;
cerr
<<
"Summary: Returns the base pair complement of a feature file."
<<
endl
<<
endl
;
cerr
<<
"Usage: "
<<
PROGRAM_NAME
<<
" [OPTIONS] -i <bed/gff/vcf> -g <genome>"
<<
endl
<<
endl
;
cerr
<<
"Notes: "
<<
endl
;
cerr
<<
"
\t
(1) The genome file should tab delimited and structured as follows:"
<<
endl
;
cerr
<<
"
\t
<chromName><TAB><chromSize>"
<<
endl
<<
endl
;
cerr
<<
"
\t
For example, Human (hg19):"
<<
endl
;
cerr
<<
"
\t
chr1
\t
249250621"
<<
endl
;
cerr
<<
"
\t
chr2
\t
243199373"
<<
endl
;
cerr
<<
"
\t
..."
<<
endl
;
cerr
<<
"
\t
chr18_gl000207_random
\t
4262"
<<
endl
<<
endl
;
cerr
<<
"Tips: "
<<
endl
;
cerr
<<
"
\t
One can use the UCSC Genome Browser's MySQL database to extract"
<<
endl
;
cerr
<<
"
\t
chromosome sizes. For example, H. sapiens:"
<<
endl
<<
endl
;
cerr
<<
"
\t
mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e
\\
"
<<
endl
;
cerr
<<
"
\t\"
select chrom, size from hg19.chromInfo
\"
> hg19.genome"
<<
endl
<<
endl
;
exit
(
1
);
}
src/complement
Bed
/Makefile
→
src/complement
File
/Makefile
View file @
85818904
UTILITIES_DIR
=
../utils/
OBJ_DIR
=
../../obj/
BIN_DIR
=
../../bin/
TOOL_DIR
=
../../src/
# -------------------
# define our includes
# -------------------
INCLUDES
=
-I
$(UTILITIES_DIR)
/bedFile/
\
-I
$(UTILITIES_DIR)
/GenomeFile/
\
-I
$(UTILITIES_DIR)
/lineFileUtilities/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
INCLUDES
=
-I
$(UTILITIES_DIR)
/Contexts/
\
-I
$(UTILITIES_DIR)
/general/
\
-I
$(UTILITIES_DIR)
/fileType/
\
-I
$(UTILITIES_DIR)
/gzstream/
\
-I
$(UTILITIES_DIR)
/GenomeFile/
\
-I
$(UTILITIES_DIR)
/BamTools/include
\
-I
$(UTILITIES_DIR)
/version/
-I
$(UTILITIES_DIR)
/BamTools/src
\
-I
$(UTILITIES_DIR)
/BlockedIntervals
\
-I
$(UTILITIES_DIR)
/BamTools-Ancillary
\
-I
$(UTILITIES_DIR)
/FileRecordTools/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/FileReaders/
\
-I
$(UTILITIES_DIR)
/FileRecordTools/Records/
\
-I
$(UTILITIES_DIR)
/KeyListOps/
\
-I
$(UTILITIES_DIR)
/RecordOutputMgr/
\
-I
$(UTILITIES_DIR)
/NewChromsweep
\
-I
$(UTILITIES_DIR)
/BinTree
\
-I
$(UTILITIES_DIR)
/version/
\
-I
$(UTILITIES_DIR)
/ToolBase/
\
-I
$(TOOL_DIR)
/intersectFile/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES
=
complement
Main
.cpp complement
Bed
.cpp complement
Bed
.h
OBJECTS
=
complement
Main
.o complement
Bed
.o
SOURCES
=
complement
Help
.cpp complement
File
.cpp complement
File
.h
OBJECTS
=
complement
Help
.o complement
File
.o
BUILT_OBJECTS
=
$(
patsubst
%,
$(OBJ_DIR)
/%,
$(OBJECTS)
)
PROGRAM
=
complementBed
PROGRAM
=
complementFile
all
:
$(BUILT_OBJECTS)
...
...
@@ -28,11 +41,10 @@ all: $(BUILT_OBJECTS)
$(BUILT_OBJECTS)
:
$(SOURCES)
@
echo
" * compiling"
$
(
*
F
)
.cpp
@
$(CXX)
-c
-o
$@
$
(
*
F
)
.cpp
$(LDFLAGS)
$(CXXFLAGS)
$(INCLUDES)
@
$(CXX)
-c
-o
$@
$
(
*
F
)
.cpp
$(LDFLAGS)
$(CXXFLAGS)
$(DFLAGS)
$(INCLUDES)
clean
:
@
echo
"Cleaning up."
@
rm
-f
$(OBJ_DIR)
/complement
Main
.o
$(OBJ_DIR)
/complement
Bed
.o
@
rm
-f
$(OBJ_DIR)
/complement
Help
.o
$(OBJ_DIR)
/complement
File
.o
.PHONY
:
clean
src/complementFile/complementFile.cpp
0 → 100644
View file @
85818904
#include
"complementFile.h"
#include
"NewGenomeFile.h"
ComplementFile
::
ComplementFile
(
ContextComplement
*
context
)
:
ToolBase
(
context
),
_genomeFile
(
context
->
getGenomeFile
()),
_currStartPos
(
0
),
_outputMgr
(
NULL
),
_chromList
(
_genomeFile
->
getChromList
()),
_currPosInGenomeList
(
-
1
)
{
}
ComplementFile
::~
ComplementFile
()
{
}
bool
ComplementFile
::
init
()
{
_frm
=
static_cast
<
FileRecordMergeMgr
*>
(
upCast
(
_context
)
->
getFile
(
0
));
return
true
;
}
bool
ComplementFile
::
findNext
(
RecordKeyVector
&
hits
)
{
while
(
!
_frm
->
eof
())
{
_frm
->
getNextRecord
(
&
hits
);
if
(
hits
.
getKey
()
==
NULL
)
continue
;
return
true
;
}
return
false
;
}
void
ComplementFile
::
processHits
(
RecordOutputMgr
*
outputMgr
,
RecordKeyVector
&
hits
)
{
_outputMgr
=
outputMgr
;
const
Record
*
rec
=
hits
.
getKey
();
//test for chrom change.
const
QuickString
&
newChrom
=
rec
->
getChrName
();
if
(
_currChrom
!=
newChrom
)
{
outPutLastRecordInPrevChrom
();
//if record's chrom doesn't exist in the genome file, do
//nothing
if
(
!
fastForward
(
newChrom
))
return
;
//we've switched to a new chromosome that is in both the DB
//and genome file.
_currStartPos
=
0
;
_currChrom
=
newChrom
;
_outRecord
.
setChrName
(
newChrom
);
}
int
endPos
=
rec
->
getStartPos
();
printRecord
(
endPos
);
_currStartPos
=
rec
->
getEndPos
();
}
void
ComplementFile
::
cleanupHits
(
RecordKeyVector
&
hits
)
{
_frm
->
deleteMergedRecord
(
hits
);
}
bool
ComplementFile
::
finalizeCalculations
()
{
outPutLastRecordInPrevChrom
();
fastForward
(
""
);
return
true
;
}
void
ComplementFile
::
outPutLastRecordInPrevChrom
()
{
const
QuickString
&
chrom
=
_outRecord
.
getChrName
();
//do nothing if triggered by first record in DB. At this point,
//there was no prev chrom, so nothing is stored in the output Record yet.
if
(
chrom
.
empty
())
return
;
int
maxChromSize
=
_genomeFile
->
getChromSize
(
chrom
);
if
(
_currStartPos
>=
maxChromSize
)
return
;
//chrom already covered and reported.
printRecord
(
maxChromSize
);
}
bool
ComplementFile
::
fastForward
(
const
QuickString
&
newChrom
)
{
if
(
!
newChrom
.
empty
()
&&
!
_genomeFile
->
hasChrom
(
newChrom
))
return
false
;
int
i
=
_currPosInGenomeList
+
1
;
while
(
i
<
(
int
)
_chromList
.
size
()
&&
_chromList
[
i
]
!=
newChrom
)
{
_outRecord
.
setChrName
(
_chromList
[
i
]);
_currStartPos
=
0
;
int
endPos
=
_genomeFile
->
getChromSize
(
_chromList
[
i
]);
printRecord
(
endPos
);
i
++
;
}
if
(
newChrom
.
empty
())
return
true
;
if
(
i
==
(
int
)
_chromList
.
size
())
{
//reached end but didn't find new chrom. Genome and DB are not sorted in same order.
cerr
<<
"***** ERROR: genome file and input file are not sorted in same order. Exiting..."
<<
endl
;
exit
(
1
);
//this is where we'd return false if we weren't exiting.
}
_currChrom
=
newChrom
;
_currPosInGenomeList
=
i
;
return
true
;
}
void
ComplementFile
::
printRecord
(
int
endPos
)
{
_outRecord
.
setStartPos
(
_currStartPos
);
QuickString
startStr
;
startStr
.
append
(
_currStartPos
);
_outRecord
.
setStartPosStr
(
startStr
);
_outRecord
.
setEndPos
(
endPos
);
QuickString
endStr
;
endStr
.
append
(
endPos
);
_outRecord
.
setEndPosStr
(
endStr
);
_outputMgr
->
printRecord
(
&
_outRecord
);
_outputMgr
->
newline
();
}
src/complementFile/complementFile.h
0 → 100644
View file @
85818904
/*
* complementFile.h
*
* Created on: Jun 16, 2015
* Author: nek3d
*/
#ifndef COMPLEMENTFILE_H_
#define COMPLEMENTFILE_H_
#include
"ToolBase.h"
#include
"ContextComplement.h"
class
BlockMgr
;
class
BinTree
;
class
NewGenomeFile
;
class
RecordOutputMgr
;
class
ComplementFile
:
public
ToolBase
{
public:
ComplementFile
(
ContextComplement
*
context
);
virtual
~
ComplementFile
();
virtual
bool
init
();
virtual
bool
findNext
(
RecordKeyVector
&
hits
);
virtual
void
processHits
(
RecordOutputMgr
*
outputMgr
,
RecordKeyVector
&
hits
);
virtual
void
cleanupHits
(
RecordKeyVector
&
hits
);
virtual
bool
finalizeCalculations
();
virtual
void
giveFinalReport
(
RecordOutputMgr
*
outputMgr
)
{}
protected:
FileRecordMergeMgr
*
_frm
;
Bed3Interval
_outRecord
;
QuickString
_currChrom
;
const
NewGenomeFile
*
_genomeFile
;
int
_currStartPos
;
RecordOutputMgr
*
_outputMgr
;
const
vector
<
QuickString
>
&
_chromList
;
int
_currPosInGenomeList
;
virtual
ContextComplement
*
upCast
(
ContextBase
*
context
)
{
return
static_cast
<
ContextComplement
*>
(
context
);
}
void
outPutLastRecordInPrevChrom
();
bool
fastForward
(
const
QuickString
&
newChrom
);
void
printRecord
(
int
endPos
);
};
#endif
/* COMPLEMENTFILE_H_ */
src/complementFile/complementHelp.cpp
0 → 100644
View file @
85818904
/*
* subtractMain.cpp
*
* Created on: Feb 19, 2015
* Author: nek3d
*/
#include
"CommonHelp.h"
void
complement_help
(
void
)
{
cerr
<<
"
\n
Tool: bedtools complement (aka complementBed)"
<<
endl
;
cerr
<<
"Version: "
<<
VERSION
<<
"
\n
"
;
cerr
<<
"Summary: Returns the base pair complement of a feature file."
<<
endl
<<
endl
;
cerr
<<
"Usage: "
<<
"bedtools complement"
<<
" [OPTIONS] -i <bed/gff/vcf> -g <genome>"
<<
endl
<<
endl
;
cerr
<<
"Notes: "
<<
endl
;
cerr
<<
"
\t
(1) The genome file should tab delimited and structured as follows:"
<<
endl
;
cerr
<<
"
\t
<chromName><TAB><chromSize>"
<<
endl
<<
endl
;
cerr
<<
"
\t
For example, Human (hg19):"
<<
endl
;
cerr
<<
"
\t
chr1
\t
249250621"
<<
endl
;
cerr
<<
"
\t
chr2
\t
243199373"
<<
endl
;
cerr
<<
"
\t
..."
<<
endl
;
cerr
<<
"
\t
chr18_gl000207_random
\t
4262"
<<
endl
<<
endl
;
cerr
<<
"Tips: "
<<
endl
;
cerr
<<
"
\t
One can use the UCSC Genome Browser's MySQL database to extract"
<<
endl
;
cerr
<<
"
\t
chromosome sizes. For example, H. sapiens:"
<<
endl
<<
endl
;
cerr
<<
"
\t
mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e
\\
"
<<
endl
;
cerr
<<
"
\t\"
select chrom, size from hg19.chromInfo
\"
> hg19.genome"
<<
endl
<<
endl
;
exit
(
1
);
}
src/utils/Contexts/ContextComplement.cpp
0 → 100644
View file @
85818904
/*
* ContextComplement.cpp
*
* Created on: Feb 19, 2015
* Author: nek3d
*/
#include
"ContextComplement.h"
ContextComplement
::
ContextComplement
()
{
setSortedInput
(
true
);
setUseMergedIntervals
(
true
);
setExplicitBedOutput
(
true
);
}
ContextComplement
::~
ContextComplement
()
{
}
bool
ContextComplement
::
isValidState
()
{
if
(
!
hasGenomeFile
())
{
_errorMsg
=
"
\n
***** ERROR: no -g genome file provided. *****"
;
return
false
;
}
return
ContextBase
::
isValidState
();
}
src/utils/Contexts/ContextComplement.h
0 → 100644
View file @
85818904
/*
* ContextComplement.h
*
* Created on: Feb 19, 2015
* Author: nek3d
*/
#ifndef CONTEXTCOMPLEMENT_H_
#define CONTEXTCOMPLEMENT_H_