Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
R3
legacy
bedtools2
Commits
83c6faa9
Commit
83c6faa9
authored
Apr 17, 2015
by
lindenb
Browse files
sort BED on faidx file
parent
ada04b66
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/sortBed/sortBed.cpp
View file @
83c6faa9
...
...
@@ -9,13 +9,14 @@
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include
<set>
#include
"lineFileUtilities.h"
#include
"sortBed.h"
//
// Constructor
//
BedSort
::
BedSort
(
string
&
bedFile
,
bool
printHeader
)
{
BedSort
::
BedSort
(
string
&
bedFile
,
bool
printHeader
,
string
&
faidxFile
)
:
_faidxFile
(
faidxFile
)
{
_bedFile
=
bedFile
;
_bed
=
new
BedFile
(
bedFile
);
...
...
@@ -47,6 +48,72 @@ void BedSort::SortBed() {
}
}
void
BedSort
::
SortBedOnFaidx
()
{
set
<
string
>
all_chromosomes
;
if
(
_faidxFile
.
empty
())
{
cerr
<<
"[sortBed] File for fasta index undefined."
<<
endl
;
exit
(
EXIT_FAILURE
);
}
/* read FAIDX file */
ifstream
faidx
(
_faidxFile
.
c_str
(),
ios
::
in
);
if
(
!
faidx
.
is_open
())
{
cerr
<<
"Cannot open
\"
"
<<
_faidxFile
<<
"
\"
"
<<
strerror
(
errno
)
<<
endl
;
exit
(
EXIT_FAILURE
);
}
string
line
;
while
(
getline
(
faidx
,
line
,
'\n'
))
{
if
(
line
.
empty
())
continue
;
string
::
size_type
tab
=
line
.
find
(
'\t'
);
if
(
tab
!=
string
::
npos
)
{
line
.
erase
(
tab
);
}
if
(
all_chromosomes
.
find
(
line
)
!=
all_chromosomes
.
end
())
{
cerr
<<
"Chromosome
\"
"
<<
line
<<
"
\"
defined twice in "
<<
_faidxFile
<<
endl
;
exit
(
EXIT_FAILURE
);
}
_tid2chrom
[
_tid2chrom
.
size
()]
=
line
;
all_chromosomes
.
insert
(
line
);
}
faidx
.
close
();
/** end read FAIDX */
//check BED chromosomes
for
(
masterBedMapNoBin
::
iterator
m
=
_bed
->
bedMapNoBin
.
begin
();
m
!=
_bed
->
bedMapNoBin
.
end
();
++
m
)
{
if
(
all_chromosomes
.
find
(
m
->
first
)
==
all_chromosomes
.
end
())
{
cerr
<<
"Chromosome
\"
"
<<
m
->
first
<<
"
\"
undefined in "
<<
_faidxFile
<<
endl
;
exit
(
EXIT_FAILURE
);
}
}
//loop over each chromosome using the faidx index
for
(
size_t
tid
=
0
;
tid
<
_tid2chrom
.
size
();
++
tid
)
{
string
chrom
=
_tid2chrom
[
tid
];
masterBedMapNoBin
::
iterator
m
=
_bed
->
bedMapNoBin
.
find
(
chrom
);
if
(
m
==
_bed
->
bedMapNoBin
.
end
()
)
continue
;
//this chromosome is not present in BED
// bedList is already sorted by start position.
vector
<
BED
>
bedList
=
m
->
second
;
for
(
unsigned
int
i
=
0
;
i
<
bedList
.
size
();
++
i
)
{
_bed
->
reportBedNewLine
(
bedList
[
i
]);
}
}
}
void
BedSort
::
SortBedBySizeAsc
()
{
...
...
src/sortBed/sortBed.h
View file @
83c6faa9
...
...
@@ -14,6 +14,7 @@
#include
<algorithm>
#include
<iostream>
#include
<fstream>
#include
<map>
using
namespace
std
;
...
...
@@ -26,12 +27,13 @@ class BedSort {
public:
// constructor
BedSort
(
string
&
bedFile
,
bool
printHeader
);
BedSort
(
string
&
bedFile
,
bool
printHeader
,
string
&
faidxFile
);
// destructor
~
BedSort
(
void
);
void
SortBed
();
// the default. sorts by chrom (asc.) then by start (asc.)
void
SortBedOnFaidx
();
// sort BED using a given Reference (e.g: faidx file )
void
SortBedBySizeAsc
();
void
SortBedBySizeDesc
();
void
SortBedByChromThenSizeAsc
();
...
...
@@ -41,7 +43,8 @@ public:
private:
string
_bedFile
;
string
_faidxFile
;
/* genome index, generated by samtools faidx 1st col is chromosome */
map
<
size_t
,
string
>
_tid2chrom
;
/** map chromosome name to sort order */
// instance of a bed file class.
BedFile
*
_bed
;
};
src/sortBed/sortMain.cpp
View file @
83c6faa9
...
...
@@ -31,6 +31,7 @@ int sort_main(int argc, char* argv[]) {
// input files
string
bedFile
=
"stdin"
;
string
faidxFile
;
bool
haveBed
=
true
;
int
sortChoices
=
0
;
...
...
@@ -40,6 +41,7 @@ int sort_main(int argc, char* argv[]) {
bool
sortByChromThenSizeDesc
=
false
;
bool
sortByChromThenScoreAsc
=
false
;
bool
sortByChromThenScoreDesc
=
false
;
bool
sortByFaidx
=
false
;
bool
printHeader
=
false
;
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
...
...
@@ -88,6 +90,14 @@ int sort_main(int argc, char* argv[]) {
sortByChromThenScoreDesc
=
true
;
sortChoices
++
;
}
else
if
(
PARAMETER_CHECK
(
"-faidx"
,
6
,
parameterLength
))
{
if
((
i
+
1
)
<
argc
)
{
faidxFile
=
argv
[
i
+
1
];
i
++
;
}
sortByFaidx
=
true
;
sortChoices
++
;
}
else
if
(
PARAMETER_CHECK
(
"-header"
,
7
,
parameterLength
))
{
printHeader
=
true
;
}
...
...
@@ -109,7 +119,7 @@ int sort_main(int argc, char* argv[]) {
if
(
!
showHelp
)
{
BedSort
*
bm
=
new
BedSort
(
bedFile
,
printHeader
);
BedSort
*
bm
=
new
BedSort
(
bedFile
,
printHeader
,
faidxFile
);
if
(
sortBySizeAsc
)
{
bm
->
SortBedBySizeAsc
();
...
...
@@ -129,6 +139,9 @@ int sort_main(int argc, char* argv[]) {
else
if
(
sortByChromThenScoreDesc
)
{
bm
->
SortBedByChromThenScoreDesc
();
}
else
if
(
sortByFaidx
)
{
bm
->
SortBedOnFaidx
();
}
else
{
bm
->
SortBed
();
}
...
...
@@ -148,13 +161,14 @@ void sort_help(void) {
cerr
<<
"Usage: "
<<
PROGRAM_NAME
<<
" [OPTIONS] -i <bed/gff/vcf>"
<<
endl
<<
endl
;
cerr
<<
"Options: "
<<
endl
;
cerr
<<
"
\t
"
<<
"-sizeA
\t\t
"
<<
"Sort by feature size in ascending order."
<<
endl
;
cerr
<<
"
\t
"
<<
"-sizeD
\t\t
"
<<
"Sort by feature size in descending order."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenSizeA
\t
"
<<
"Sort by chrom (asc), then feature size (asc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenSizeD
\t
"
<<
"Sort by chrom (asc), then feature size (desc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenScoreA
\t
"
<<
"Sort by chrom (asc), then score (asc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenScoreD
\t
"
<<
"Sort by chrom (asc), then score (desc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-sizeA
\t\t\t
"
<<
"Sort by feature size in ascending order."
<<
endl
;
cerr
<<
"
\t
"
<<
"-sizeD
\t\t\t
"
<<
"Sort by feature size in descending order."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenSizeA
\t\t
"
<<
"Sort by chrom (asc), then feature size (asc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenSizeD
\t\t
"
<<
"Sort by chrom (asc), then feature size (desc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenScoreA
\t\t
"
<<
"Sort by chrom (asc), then score (asc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-chrThenScoreD
\t\t
"
<<
"Sort by chrom (asc), then score (desc)."
<<
endl
;
cerr
<<
"
\t
"
<<
"-faidx (names.txt)
\t
"
<<
"Sort according to the chromosomes declared in
\"
names.txt
\"
"
<<
endl
;
cerr
<<
"
\t
-header
\t
"
<<
"Print the header from the A file prior to results."
<<
endl
<<
endl
;
exit
(
1
);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment