Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
R3
legacy
bedtools2
Commits
484419a9
Commit
484419a9
authored
Jul 01, 2010
by
Aaron
Browse files
Minor stylistic tweaks.
parent
9cb00210
Changes
5
Hide whitespace changes
Inline
Side-by-side
Makefile
View file @
484419a9
...
...
@@ -12,7 +12,7 @@ export CXXFLAGS = -Wall -O2
export
LIBS
=
-lz
# define our source subdirectories
SUBDIRS
=
$(SRC_DIR)
/bamToBed
$(SRC_DIR)
/bedToBam
$(SRC_DIR)
/bedToIgv
$(SRC_DIR)
/bed12ToBed6
$(SRC_DIR)
/closestBed
$(SRC_DIR)
/complementBed
$(SRC_DIR)
/coverageBed
$(SRC_DIR)
/fastaFromBed
$(SRC_DIR)
/genomeCoverageBed
$(SRC_DIR)
/intersectBed
$(SRC_DIR)
/linksBed
$(SRC_DIR)
/maskFastaFromBed
$(SRC_DIR)
/mergeBed
$(SRC_DIR)
/overlap
$(SRC_DIR)
/pairToBed
$(SRC_DIR)
/pairToPair
$(SRC_DIR)
/shuffleBed
$(SRC_DIR)
/slopBed
$(SRC_DIR)
/sortBed
$(SRC_DIR)
/subtractBed
$(SRC_DIR)
/windowBed
SUBDIRS
=
$(SRC_DIR)
/bamToBed
$(SRC_DIR)
/bedToBam
$(SRC_DIR)
/bedToIgv
$(SRC_DIR)
/bed12ToBed6
$(SRC_DIR)
/closestBed
$(SRC_DIR)
/complementBed
$(SRC_DIR)
/coverageBed
$(SRC_DIR)
/fastaFromBed
$(SRC_DIR)
/genomeCoverageBed
$(SRC_DIR)
/groupBy
$(SRC_DIR)
/intersectBed
$(SRC_DIR)
/linksBed
$(SRC_DIR)
/maskFastaFromBed
$(SRC_DIR)
/mergeBed
$(SRC_DIR)
/overlap
$(SRC_DIR)
/pairToBed
$(SRC_DIR)
/pairToPair
$(SRC_DIR)
/shuffleBed
$(SRC_DIR)
/slopBed
$(SRC_DIR)
/sortBed
$(SRC_DIR)
/subtractBed
$(SRC_DIR)
/windowBed
UTIL_SUBDIRS
=
$(SRC_DIR)
/utils/lineFileUtilities
$(SRC_DIR)
/utils/bedFile
$(SRC_DIR)
/utils/genomeFile
$(SRC_DIR)
/utils/gzstream
$(SRC_DIR)
/utils/bedFilePE
$(SRC_DIR)
/utils/sequenceUtilities
$(SRC_DIR)
/utils/BamTools
...
...
src/bedToIgv/bedToIgv.cpp
View file @
484419a9
...
...
@@ -226,29 +226,28 @@ void ProcessBed(istream &bedInput, BedFile *bed, string path, string sortType, s
if
(
session
!=
"none"
)
cout
<<
"load "
<<
session
<<
endl
;
// process each BED entry and convert to an IGV request
BED
bedEntry
,
nullBed
;
int
lineNum
=
0
;
BedLineStatus
bedStatus
;
bed
->
Open
();
// process each BED entry and convert to an IGV request
while
((
bedStatus
=
bed
->
GetNextBed
(
bedEntry
,
lineNum
))
!=
BED_INVALID
)
{
if
(
bedStatus
==
BED_VALID
)
{
string
locus
,
filename
;
if
(
useNames
==
false
)
{
locus
=
bedEntry
.
chrom
+
":"
+
ToString
(
bedEntry
.
start
-
slop
)
+
"-"
+
ToString
(
bedEntry
.
end
+
slop
);
locus
=
bedEntry
.
chrom
+
":"
+
ToString
(
bedEntry
.
start
-
slop
)
+
"-"
+
ToString
(
bedEntry
.
end
+
slop
);
if
(
useNames
==
false
)
filename
=
locus
;
}
else
{
locus
=
bedEntry
.
chrom
+
":"
+
ToString
(
bedEntry
.
start
-
slop
)
+
"-"
+
ToString
(
bedEntry
.
end
+
slop
);
if
(
bedEntry
.
name
.
empty
()
==
false
)
filename
=
bedEntry
.
name
;
else
{
cerr
<<
"Error: You requested that filenames be based upon the name field. However, it appears to be empty. Exiting!"
<<
endl
;
exit
(
1
);
}
}
}
// goto
cout
<<
"goto "
<<
locus
<<
endl
;
...
...
@@ -264,6 +263,7 @@ void ProcessBed(istream &bedInput, BedFile *bed, string path, string sortType, s
// snapshot
cout
<<
"snapshot "
<<
filename
<<
"."
<<
imageType
<<
endl
;
// reset
bedEntry
=
nullBed
;
}
}
...
...
src/genomeCoverageBed/genomeCoverageMain.cpp
View file @
484419a9
...
...
@@ -32,7 +32,7 @@ int main(int argc, char* argv[]) {
// input files
string
bedFile
;
string
genomeFile
;
int
max
=
999999999
;
int
max
=
INT_MAX
;
bool
haveBed
=
false
;
bool
bamInput
=
false
;
...
...
src/groupBy/groupBy.cpp
0 → 100755
View file @
484419a9
/*****************************************************************************
overlap.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0+ license.
******************************************************************************/
#include <vector>
#include <map>
#include <numeric>
#include <iterator>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <stdlib.h>
#include <math.h>
#include "version.h"
#include "lineFileUtilities.h"
using
namespace
std
;
// define our program name
#define PROGRAM_NAME "groupBy"
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
// function declarations
void
ShowHelp
(
void
);
void
DetermineInput
(
string
&
inFile
,
const
vector
<
int
>
&
groupColumns
,
int
opColumn
,
string
op
);
void
GroupBy
(
istream
&
input
,
const
vector
<
int
>
&
groupColumns
,
int
opColumn
,
string
op
);
void
ReportSummary
(
const
vector
<
string
>
&
group
,
const
vector
<
string
>
&
data
,
string
op
);
float
ToFloat
(
string
element
);
void
TabPrint
(
string
element
);
int
main
(
int
argc
,
char
*
argv
[])
{
// input files
string
inFile
;
string
groupColumnsString
=
"1,2,3"
;
string
opColumnString
;
string
op
=
"sum"
;
// our configuration variables
bool
showHelp
=
false
;
bool
haveInFile
=
false
;
bool
haveGroupColumns
=
false
;
bool
haveOpColumn
=
false
;
bool
haveOp
=
true
;
// check to see if we should print out some help
if
(
argc
<=
1
)
showHelp
=
true
;
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
int
parameterLength
=
(
int
)
strlen
(
argv
[
i
]);
if
((
PARAMETER_CHECK
(
"-h"
,
2
,
parameterLength
))
||
(
PARAMETER_CHECK
(
"--help"
,
5
,
parameterLength
)))
{
showHelp
=
true
;
}
}
if
(
showHelp
)
ShowHelp
();
// do some parsing (all of these parameters require 2 strings)
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
int
parameterLength
=
(
int
)
strlen
(
argv
[
i
]);
if
(
PARAMETER_CHECK
(
"-i"
,
2
,
parameterLength
))
{
if
((
i
+
1
)
<
argc
)
{
haveInFile
=
true
;
inFile
=
argv
[
i
+
1
];
i
++
;
}
}
else
if
(
PARAMETER_CHECK
(
"-grp"
,
4
,
parameterLength
))
{
haveGroupColumns
=
true
;
groupColumnsString
=
argv
[
i
+
1
];
i
++
;
}
else
if
(
PARAMETER_CHECK
(
"-opCol"
,
6
,
parameterLength
))
{
haveOpColumn
=
true
;
opColumnString
=
argv
[
i
+
1
];
i
++
;
}
else
if
(
PARAMETER_CHECK
(
"-op"
,
3
,
parameterLength
))
{
haveOp
=
true
;
op
=
argv
[
i
+
1
];
i
++
;
}
else
{
cerr
<<
endl
<<
"*****ERROR: Unrecognized parameter: "
<<
argv
[
i
]
<<
" *****"
<<
endl
<<
endl
;
showHelp
=
true
;
}
}
// make sure we have an input files
if
(
!
haveInFile
)
{
cerr
<<
endl
<<
"*****"
<<
endl
<<
"*****ERROR: Need -i file. "
<<
endl
<<
"*****"
<<
endl
;
showHelp
=
true
;
}
if
(
!
haveOpColumn
)
{
cerr
<<
endl
<<
"*****"
<<
endl
<<
"*****ERROR: Need -opCol."
<<
endl
<<
"*****"
<<
endl
;
showHelp
=
true
;
}
if
(
!
showHelp
)
{
// Split the column string sent by the user into discrete column numbers
// A comma separated string is expected.
vector
<
int
>
groupColumnsInt
;
Tokenize
(
groupColumnsString
,
groupColumnsInt
,
","
);
int
opColumnInt
=
atoi
(
opColumnString
.
c_str
());
DetermineInput
(
inFile
,
groupColumnsInt
,
opColumnInt
,
op
);
}
else
{
ShowHelp
();
}
}
void
ShowHelp
(
void
)
{
cerr
<<
endl
<<
"Program: "
<<
PROGRAM_NAME
<<
" (v"
<<
VERSION
<<
")"
<<
endl
;
cerr
<<
"Author: Aaron Quinlan (aaronquinlan@gmail.com)"
<<
endl
;
cerr
<<
"Summary: Summarizes a dataset column based upon"
<<
endl
;
cerr
<<
"
\t
common column groupings. Akin to the SQL
\"
group by
\"
command."
<<
endl
<<
endl
;
cerr
<<
"Usage: "
<<
PROGRAM_NAME
<<
" [OPTIONS] -i <input> -opCol <> "
<<
endl
<<
endl
;
cerr
<<
"Options: "
<<
endl
;
cerr
<<
"
\t
-i
\t
"
<<
"Input file. Use
\"
stdin
\"
for pipes."
<<
endl
<<
endl
;
cerr
<<
"
\t
-grp
\t
"
<<
"Specify the columns (1-based) for the grouping."
<<
endl
;
cerr
<<
"
\t\t
The columns must be comma separated."
<<
endl
;
cerr
<<
"
\t\t
Default: 1,2,3"
<<
endl
<<
endl
;
cerr
<<
"
\t
-opCol
\t
"
<<
"Specify the column (1-based) that should be summarized."
<<
endl
;
cerr
<<
"
\t\t
Required."
<<
endl
<<
endl
;
cerr
<<
"
\t
-op
\t
"
<<
"Specify the operation that should be applied to opCol."
<<
endl
;
cerr
<<
"
\t\t
Valid operations: sum, count, min, max, mean, median,"
<<
endl
;
cerr
<<
"
\t\t
mode, antimode, stdev"
<<
endl
;
cerr
<<
"
\t\t
Default: sum"
<<
endl
;
cerr
<<
"Examples: "
<<
endl
;
cerr
<<
"
\t
$ cat test.out"
<<
endl
;
cerr
<<
"
\t
chr1 10 20 A chr1 15 25 1000"
<<
endl
;
cerr
<<
"
\t
chr1 10 20 A chr1 25 35 10000"
<<
endl
<<
endl
;
cerr
<<
"
\t
$ cat test.out | groupBy -i stdin -grpCols 1,2,3,4 -opCol 8 -op sum"
<<
endl
;
cerr
<<
"
\t
chr1 10 20 A 11000"
<<
endl
<<
endl
;
cerr
<<
"
\t
$ cat test.out | groupBy -i stdin -grpCols 1,2,3,4 -opCol 8 -op max"
<<
endl
;
cerr
<<
"
\t
chr1 10 20 A 1000"
<<
endl
<<
endl
;
cerr
<<
"
\t
$ cat test.out | groupBy -i stdin -grpCols 1,2,3,4 -opCol 8 -op mean"
<<
endl
;
cerr
<<
"
\t
chr1 10 20 A 5500"
<<
endl
<<
endl
;
cerr
<<
"Notes: "
<<
endl
;
cerr
<<
"
\t
(1) The input file/stream should be sorted/grouped by the -grpCols."
<<
endl
<<
endl
;
// end the program here
exit
(
1
);
}
void
DetermineInput
(
string
&
inFile
,
const
vector
<
int
>
&
groupColumns
,
int
opColumn
,
string
op
)
{
if
(
inFile
!=
"stdin"
)
{
// process a file
ifstream
in
(
inFile
.
c_str
(),
ios
::
in
);
if
(
!
in
)
{
cerr
<<
"Error: The requested input file ("
<<
inFile
<<
") could not be opened. Exiting!"
<<
endl
;
exit
(
1
);
}
GroupBy
(
in
,
groupColumns
,
opColumn
,
op
);
}
else
GroupBy
(
cin
,
groupColumns
,
opColumn
,
op
);
}
void
GroupBy
(
istream
&
input
,
const
vector
<
int
>
&
groupColumns
,
int
opColumn
,
string
op
)
{
int
lineNum
=
0
;
string
inLine
;
vector
<
string
>
inFields
;
vector
<
string
>
prevGroup
(
0
);
vector
<
string
>
currGroup
(
0
);
vector
<
string
>
values
;
values
.
reserve
(
100000
);
while
(
getline
(
input
,
inLine
))
{
lineNum
++
;
Tokenize
(
inLine
,
inFields
);
currGroup
.
clear
();
vector
<
int
>::
const_iterator
gIt
=
groupColumns
.
begin
();
vector
<
int
>::
const_iterator
gEnd
=
groupColumns
.
end
();
for
(;
gIt
!=
gEnd
;
++
gIt
)
currGroup
.
push_back
(
inFields
[
*
gIt
-
1
]);
if
((
currGroup
!=
prevGroup
)
&&
(
prevGroup
.
size
()
>
0
))
{
// Summarize this group
ReportSummary
(
prevGroup
,
values
,
op
);
values
.
clear
();
values
.
push_back
(
inFields
[
opColumn
-
1
].
c_str
());
}
else
values
.
push_back
(
inFields
[
opColumn
-
1
].
c_str
());
prevGroup
=
currGroup
;
inFields
.
clear
();
}
values
.
clear
();
values
.
push_back
(
inFields
[
opColumn
-
1
].
c_str
());
ReportSummary
(
currGroup
,
values
,
op
);
}
void
ReportSummary
(
const
vector
<
string
>
&
group
,
const
vector
<
string
>
&
data
,
string
op
)
{
vector
<
double
>
dataF
;
// convert to floats
transform
(
data
.
begin
(),
data
.
end
(),
back_inserter
(
dataF
),
ToFloat
);
if
(
op
==
"sum"
)
{
// sum them up
double
total
=
accumulate
(
dataF
.
begin
(),
dataF
.
end
(),
0.0
);
for_each
(
group
.
begin
(),
group
.
end
(),
TabPrint
);
cout
<<
setprecision
(
7
)
<<
total
<<
endl
;
}
else
if
(
op
==
"min"
)
{
for_each
(
group
.
begin
(),
group
.
end
(),
TabPrint
);
cout
<<
setprecision
(
7
)
<<
*
min_element
(
dataF
.
begin
(),
dataF
.
end
()
)
<<
endl
;
}
else
if
(
op
==
"max"
)
{
for_each
(
group
.
begin
(),
group
.
end
(),
TabPrint
);
cout
<<
setprecision
(
7
)
<<
*
max_element
(
dataF
.
begin
(),
dataF
.
end
()
)
<<
endl
;
}
else
if
(
op
==
"mean"
)
{
double
total
=
accumulate
(
dataF
.
begin
(),
dataF
.
end
(),
0.0
);
double
mean
=
total
/
dataF
.
size
();
for_each
(
group
.
begin
(),
group
.
end
(),
TabPrint
);
cout
<<
setprecision
(
7
)
<<
mean
<<
endl
;
}
else
if
(
op
==
"median"
)
{
double
median
=
0.0
;
sort
(
dataF
.
begin
(),
dataF
.
end
());
int
totalLines
=
dataF
.
size
();
if
((
totalLines
%
2
)
>
0
)
{
long
mid
;
mid
=
totalLines
/
2
;
median
=
dataF
[
mid
];
}
else
{
long
midLow
,
midHigh
;
midLow
=
(
totalLines
/
2
)
-
1
;
midHigh
=
(
totalLines
/
2
);
median
=
(
dataF
[
midLow
]
+
dataF
[
midHigh
])
/
2.0
;
}
for_each
(
group
.
begin
(),
group
.
end
(),
TabPrint
);
cout
<<
setprecision
(
7
)
<<
median
<<
endl
;
}
else
if
(
op
==
"count"
)
{
for_each
(
group
.
begin
(),
group
.
end
(),
TabPrint
);
cout
<<
setprecision
(
7
)
<<
data
.
size
()
<<
endl
;
}
else
if
((
op
==
"mode"
)
||
(
op
==
"antimode"
))
{
// compute the frequency of each unique value
map
<
string
,
int
>
freqs
;
vector
<
string
>::
const_iterator
dIt
=
data
.
begin
();
vector
<
string
>::
const_iterator
dEnd
=
data
.
end
();
for
(;
dIt
!=
dEnd
;
++
dIt
)
{
freqs
[
*
dIt
]
++
;
}
// grab the mode and the anti mode
string
mode
,
antiMode
;
int
count
=
0
;
int
minCount
=
INT_MAX
;
for
(
map
<
string
,
int
>::
const_iterator
iter
=
freqs
.
begin
();
iter
!=
freqs
.
end
();
++
iter
)
{
if
(
iter
->
second
>
count
)
{
mode
=
iter
->
first
;
count
=
iter
->
second
;
}
if
(
iter
->
second
<
minCount
)
{
antiMode
=
iter
->
first
;
minCount
=
iter
->
second
;
}
}
// report
for_each
(
group
.
begin
(),
group
.
end
(),
TabPrint
);
if
(
op
==
"mode"
)
cout
<<
setprecision
(
7
)
<<
mode
<<
endl
;
else
if
(
op
==
"antimode"
)
cout
<<
setprecision
(
7
)
<<
antiMode
<<
endl
;
}
else
if
(
op
==
"stdev"
)
{
// get the mean
double
total
=
accumulate
(
dataF
.
begin
(),
dataF
.
end
(),
0.0
);
double
mean
=
total
/
dataF
.
size
();
// get the variance
double
totalVariance
=
0.0
;
vector
<
double
>::
const_iterator
dIt
=
dataF
.
begin
();
vector
<
double
>::
const_iterator
dEnd
=
dataF
.
end
();
for
(;
dIt
!=
dEnd
;
++
dIt
)
{
totalVariance
+=
pow
((
*
dIt
-
mean
),
2
);
}
double
variance
=
totalVariance
/
dataF
.
size
();
double
stddev
=
sqrt
(
variance
);
// report
for_each
(
group
.
begin
(),
group
.
end
(),
TabPrint
);
cout
<<
setprecision
(
7
)
<<
stddev
<<
endl
;
}
}
float
ToFloat
(
string
element
)
{
return
atof
(
element
.
c_str
());
}
void
TabPrint
(
string
element
)
{
cout
<<
element
<<
"
\t
"
;
}
src/overlap/overlap.cpp
View file @
484419a9
...
...
@@ -124,7 +124,7 @@ void ShowHelp(void) {
cerr
<<
"Usage: "
<<
PROGRAM_NAME
<<
" [OPTIONS] -i <input> -cols s1,e1,s2,e2 "
<<
endl
<<
endl
;
cerr
<<
"
Note
s: "
<<
endl
;
cerr
<<
"
Option
s: "
<<
endl
;
cerr
<<
"
\t
-i
\t
"
<<
"Input file. Use
\"
stdin
\"
for pipes."
<<
endl
<<
endl
;
cerr
<<
"
\t
-cols
\t
"
<<
"Specify the columns (1-based) for the starts and ends of the"
<<
endl
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment