Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
R3
legacy
bedtools2
Commits
43f88db6
Commit
43f88db6
authored
Apr 20, 2015
by
Aaron Quinlan
Browse files
Merge pull request #219 from lindenb/sortOnRef
sort BED on faidx file
parents
ada04b66
83c6faa9
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/sortBed/sortBed.cpp
View file @
43f88db6
...
...
@@ -9,13 +9,14 @@
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include
<set>
#include
"lineFileUtilities.h"
#include
"sortBed.h"
//
// Constructor
//
BedSort
::
BedSort
(
string
&
bedFile
,
bool
printHeader
)
{
BedSort
::
BedSort
(
string
&
bedFile
,
bool
printHeader
,
string
&
faidxFile
)
:
_faidxFile
(
faidxFile
)
{
_bedFile
=
bedFile
;
_bed
=
new
BedFile
(
bedFile
);
...
...
@@ -47,6 +48,72 @@ void BedSort::SortBed() {
}
}
void
BedSort
::
SortBedOnFaidx
()
{
set
<
string
>
all_chromosomes
;
if
(
_faidxFile
.
empty
())
{
cerr
<<
"[sortBed] File for fasta index undefined."
<<
endl
;
exit
(
EXIT_FAILURE
);
}
/* read FAIDX file */
ifstream
faidx
(
_faidxFile
.
c_str
(),
ios
::
in
);
if
(
!
faidx
.
is_open
())
{
cerr
<<
"Cannot open
\"
"
<<
_faidxFile
<<
"
\"
"
<<
strerror
(
errno
)
<<
endl
;
exit
(
EXIT_FAILURE
);
}
string
line
;
while
(
getline
(
faidx
,
line
,
'\n'
))
{
if
(
line
.
empty
())
continue
;
string
::
size_type
tab
=
line
.
find
(
'\t'
);
if
(
tab
!=
string
::
npos
)
{
line
.
erase
(
tab
);
}
if
(
all_chromosomes
.
find
(
line
)
!=
all_chromosomes
.
end
())
{
cerr
<<
"Chromosome
\"
"
<<
line
<<
"
\"
defined twice in "
<<
_faidxFile
<<
endl
;
exit
(
EXIT_FAILURE
);
}
_tid2chrom
[
_tid2chrom
.
size
()]
=
line
;
all_chromosomes
.
insert
(
line
);
}
faidx
.
close
();
/** end read FAIDX */
//check BED chromosomes
for
(
masterBedMapNoBin
::
iterator
m
=
_bed
->
bedMapNoBin
.
begin
();
m
!=
_bed
->
bedMapNoBin
.
end
();
++
m
)
{
if
(
all_chromosomes
.
find
(
m
->
first
)
==
all_chromosomes
.
end
())
{
cerr
<<
"Chromosome
\"
"
<<
m
->
first
<<
"
\"
undefined in "
<<
_faidxFile
<<
endl
;
exit
(
EXIT_FAILURE
);
}
}
//loop over each chromosome using the faidx index
for
(
size_t
tid
=
0
;
tid
<
_tid2chrom
.
size
();
++
tid
)
{
string
chrom
=
_tid2chrom
[
tid
];
masterBedMapNoBin
::
iterator
m
=
_bed
->
bedMapNoBin
.
find
(
chrom
);
if
(
m
==
_bed
->
bedMapNoBin
.
end
()
)
continue
;
//this chromosome is not present in BED
// bedList is already sorted by start position.
vector
<
BED
>
bedList
=
m
->
second
;
for
(
unsigned
int
i
=
0
;
i
<
bedList
.
size
();
++
i
)
{
_bed
->
reportBedNewLine
(
bedList
[
i
]);
}
}
}
void
BedSort
::
SortBedBySizeAsc
()
{
...
...
src/sortBed/sortBed.h
View file @
43f88db6
...
...
@@ -14,6 +14,7 @@
#include
<algorithm>
#include
<iostream>
#include
<fstream>
#include
<map>
using
namespace
std
;
...
...
@@ -26,12 +27,13 @@ class BedSort {
public:
// constructor
BedSort
(
string
&
bedFile
,
bool
printHeader
);
BedSort
(
string
&
bedFile
,
bool
printHeader
,
string
&
faidxFile
);
// destructor
~
BedSort
(
void
);
void
SortBed
();
// the default. sorts by chrom (asc.) then by start (asc.)
void
SortBedOnFaidx
();
// sort BED using a given Reference (e.g: faidx file )
void
SortBedBySizeAsc
();
void
SortBedBySizeDesc
();
void
SortBedByChromThenSizeAsc
();
...
...
@@ -41,7 +43,8 @@ public:
private:
string
_bedFile
;
string
_faidxFile
;
/* genome index, generated by samtools faidx 1st col is chromosome */
map
<
size_t
,
string
>
_tid2chrom
;
/** map chromosome name to sort order */
// instance of a bed file class.
BedFile
*
_bed
;
};
src/sortBed/sortMain.cpp
View file @
43f88db6
...
...
@@ -31,6 +31,7 @@ int sort_main(int argc, char* argv[]) {
// input files
string
bedFile
=
"stdin"
;
string
faidxFile
;
bool
haveBed
=
true
;
int
sortChoices
=
0
;
...
...
@@ -40,6 +41,7 @@ int sort_main(int argc, char* argv[]) {
bool
sortByChromThenSizeDesc
=
false
;
bool
sortByChromThenScoreAsc
=
false
;
bool
sortByChromThenScoreDesc
=
false
;
bool
sortByFaidx
=
false
;
bool
printHeader
=
false
;
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
...
...
@@ -88,6 +90,14 @@ int sort_main(int argc, char* argv[]) {
sortByChromThenScoreDesc
=
true
;
sortChoices
++
;
}
else
if
(
PARAMETER_CHECK
(
"-faidx"
,
6
,
parameterLength
))
{
if
((
i
+
1
)
<
argc
)
{
faidxFile
=
argv
[
i
+
1
];
i
++
;
}
sortByFaidx
=
true
;
sortChoices
++
;
}
else
if
(
PARAMETER_CHECK
(
"-header"
,
7
,
parameterLength
))
{
printHeader
=
true
;
}
...
...
@@ -109,7 +119,7 @@ int sort_main(int argc, char* argv[]) {
if
(
!
showHelp
)
{
BedSort
*
bm
=
new
BedSort
(
bedFile
,
printHeader
);
BedSort
*
bm
=
new
BedSort
(
bedFile
,
printHeader
,
faidxFile
);
if
(
sortBySizeAsc
)
{
bm
->
SortBedBySizeAsc
();
...
...
@@ -129,6 +139,9 @@ int sort_main(int argc, char* argv[]) {
else
if
(
sortByChromThenScoreDesc
)
{
bm
->
SortBedByChromThenScoreDesc
();
}
else
if
(
sortByFaidx
)
{
bm
->
SortBedOnFaidx
();
}
else
{
bm
->
SortBed
();
}
...
...
@@ -148,13 +161,14 @@ void sort_help(void) {
cerr
<<
"Usage: "
<<
PROGRAM_NAME
<<
" [OPTIONS] -i <bed/gff/vcf>"
<<
endl
<<
endl
;
cerr
<<
"Options: "
<<
endl
;
cerr
<<
"
\t
"
<<
"-sizeA
\t\t
"
<<
"Sort by feature size in ascending order."
<<
endl
;
cerr
<<
"
\t
"
<<
"-sizeD
\t\t
"
<<
"Sort by feature size in descending order."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenSizeA
\t
"
<<
"Sort by chrom (asc), then feature size (asc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenSizeD
\t
"
<<
"Sort by chrom (asc), then feature size (desc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenScoreA
\t
"
<<
"Sort by chrom (asc), then score (asc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenScoreD
\t
"
<<
"Sort by chrom (asc), then score (desc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-sizeA
\t\t\t
"
<<
"Sort by feature size in ascending order."
<<
endl
;
cerr
<<
"
\t
"
<<
"-sizeD
\t\t\t
"
<<
"Sort by feature size in descending order."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenSizeA
\t\t
"
<<
"Sort by chrom (asc), then feature size (asc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenSizeD
\t\t
"
<<
"Sort by chrom (asc), then feature size (desc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenScoreA
\t\t
"
<<
"Sort by chrom (asc), then score (asc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenScoreD
\t\t
"
<<
"Sort by chrom (asc), then score (desc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-faidx (names.txt)
\t
"
<<
"Sort according to the chromosomes declared in
\"
names.txt
\"
"
<<
endl
;
cerr
<<
"
\t
-header
\t
"
<<
"Print the header from the A file prior to results."
<<
endl
<<
endl
;
exit
(
1
);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment