Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
R3
legacy
bedtools2
Commits
4a64280e
Commit
4a64280e
authored
Jan 13, 2011
by
Aaron
Browse files
Added support for zero length features (start = end, e.g. insertions in ref genome).
parent
4ac343ba
Changes
1
Hide whitespace changes
Inline
Side-by-side
src/utils/bedFile/bedFile.h
View file @
4a64280e
...
...
@@ -91,6 +91,7 @@ struct BED {
vector
<
string
>
otherFields
;
// experimental fields for the FJOIN approach.
bool
zeroLength
;
bool
added
;
bool
finished
;
// list of hits from another file.
...
...
@@ -108,6 +109,7 @@ public:
score
(
""
),
strand
(
""
),
otherFields
(),
zeroLength
(
false
),
added
(
false
),
finished
(
false
),
overlaps
()
...
...
@@ -182,6 +184,9 @@ struct BEDCOV {
// Add'l fields for BED12 and/or custom BED annotations
vector
<
string
>
otherFields
;
// flag a zero-length feature
bool
zeroLength
;
// Additional fields specific to computing coverage
map
<
unsigned
int
,
DEPTH
>
depthMap
;
unsigned
int
count
;
...
...
@@ -206,6 +211,9 @@ struct BEDCOVLIST {
// Add'l fields for BED12 and/or custom BED annotations
vector
<
string
>
otherFields
;
// flag a zero-length feature
bool
zeroLength
;
// Additional fields specific to computing coverage
vector
<
map
<
unsigned
int
,
DEPTH
>
>
depthMapList
;
...
...
@@ -499,7 +507,14 @@ private:
bed
.
chrom
=
lineVector
[
0
];
bed
.
start
=
atoi
(
lineVector
[
1
].
c_str
());
bed
.
end
=
atoi
(
lineVector
[
2
].
c_str
());
// handle starts == end (e.g., insertions in reference genome)
if
(
bed
.
start
==
bed
.
end
)
{
bed
.
start
--
;
bed
.
end
++
;
bed
.
zeroLength
=
true
;
}
if
(
this
->
bedType
==
4
)
{
bed
.
name
=
lineVector
[
3
];
}
...
...
@@ -615,7 +630,7 @@ private:
if
(
this
->
bedType
>=
8
&&
_isGff
)
{
bed
.
chrom
=
lineVector
[
0
];
// substract 1 to force the start to be BED-style
bed
.
start
=
atoi
(
lineVector
[
3
].
c_str
())
-
1
;
bed
.
start
=
atoi
(
lineVector
[
3
].
c_str
());
bed
.
end
=
atoi
(
lineVector
[
4
].
c_str
());
bed
.
name
=
lineVector
[
2
];
bed
.
score
=
lineVector
[
5
];
...
...
@@ -625,6 +640,14 @@ private:
// handle the optional 9th field.
if
(
this
->
bedType
==
9
)
bed
.
otherFields
.
push_back
(
lineVector
[
8
]);
// add GFF "group". unused in BED
// handle starts == end (e.g., insertions in reference genome)
if
(
bed
.
start
==
bed
.
end
)
{
bed
.
end
++
;
bed
.
zeroLength
=
true
;
}
// GFF uses 1-based starts
bed
.
start
--
;
}
else
{
cerr
<<
"Error: unexpected number of fields at line: "
<<
lineNum
<<
...
...
@@ -668,24 +691,36 @@ public:
*/
template
<
typename
T
>
inline
void
reportBedTab
(
const
T
&
bed
)
{
// if it is azeroLength feature, we need to
// correct the start and end coords to what they were
// in the original file
CHRPOS
start
=
bed
.
start
;
CHRPOS
end
=
bed
.
end
;
if
(
bed
.
zeroLength
)
{
if
(
_isGff
==
false
)
start
++
;
end
--
;
}
// BED
if
(
_isGff
==
false
&&
_isVcf
==
false
)
{
if
(
this
->
bedType
==
3
)
{
printf
(
"%s
\t
%d
\t
%d
\t
"
,
bed
.
chrom
.
c_str
(),
bed
.
start
,
bed
.
end
);
printf
(
"%s
\t
%d
\t
%d
\t
"
,
bed
.
chrom
.
c_str
(),
start
,
end
);
}
else
if
(
this
->
bedType
==
4
)
{
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
"
,
bed
.
chrom
.
c_str
(),
bed
.
start
,
bed
.
end
,
bed
.
name
.
c_str
());
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
"
,
bed
.
chrom
.
c_str
(),
start
,
end
,
bed
.
name
.
c_str
());
}
else
if
(
this
->
bedType
==
5
)
{
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
"
,
bed
.
chrom
.
c_str
(),
bed
.
start
,
bed
.
end
,
bed
.
name
.
c_str
(),
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
"
,
bed
.
chrom
.
c_str
(),
start
,
end
,
bed
.
name
.
c_str
(),
bed
.
score
.
c_str
());
}
else
if
(
this
->
bedType
==
6
)
{
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
%s
\t
"
,
bed
.
chrom
.
c_str
(),
bed
.
start
,
bed
.
end
,
bed
.
name
.
c_str
(),
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
%s
\t
"
,
bed
.
chrom
.
c_str
(),
start
,
end
,
bed
.
name
.
c_str
(),
bed
.
score
.
c_str
(),
bed
.
strand
.
c_str
());
}
else
if
(
this
->
bedType
>
6
)
{
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
%s
\t
"
,
bed
.
chrom
.
c_str
(),
bed
.
start
,
bed
.
end
,
bed
.
name
.
c_str
(),
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
%s
\t
"
,
bed
.
chrom
.
c_str
(),
start
,
end
,
bed
.
name
.
c_str
(),
bed
.
score
.
c_str
(),
bed
.
strand
.
c_str
());
vector
<
string
>::
const_iterator
othIt
=
bed
.
otherFields
.
begin
();
...
...
@@ -697,7 +732,7 @@ public:
}
// VCF
else
if
(
_isGff
==
false
&&
_isVcf
==
true
)
{
printf
(
"%s
\t
%d
\t
"
,
bed
.
chrom
.
c_str
(),
bed
.
start
+
1
);
printf
(
"%s
\t
%d
\t
"
,
bed
.
chrom
.
c_str
(),
start
+
1
);
vector
<
string
>::
const_iterator
othIt
=
bed
.
otherFields
.
begin
();
vector
<
string
>::
const_iterator
othEnd
=
bed
.
otherFields
.
end
();
...
...
@@ -710,14 +745,14 @@ public:
// "GFF-8"
if
(
this
->
bedType
==
8
)
{
printf
(
"%s
\t
%s
\t
%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
%s
\t
"
,
bed
.
chrom
.
c_str
(),
bed
.
otherFields
[
0
].
c_str
(),
bed
.
name
.
c_str
(),
bed
.
start
+
1
,
bed
.
end
,
bed
.
name
.
c_str
(),
start
+
1
,
end
,
bed
.
score
.
c_str
(),
bed
.
strand
.
c_str
(),
bed
.
otherFields
[
1
].
c_str
());
}
// "GFF-9"
else
if
(
this
->
bedType
==
9
)
{
printf
(
"%s
\t
%s
\t
%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
%s
\t
%s
\t
"
,
bed
.
chrom
.
c_str
(),
bed
.
otherFields
[
0
].
c_str
(),
bed
.
name
.
c_str
(),
bed
.
start
+
1
,
bed
.
end
,
bed
.
name
.
c_str
(),
start
+
1
,
end
,
bed
.
score
.
c_str
(),
bed
.
strand
.
c_str
(),
bed
.
otherFields
[
1
].
c_str
(),
bed
.
otherFields
[
2
].
c_str
());
}
...
...
@@ -735,24 +770,36 @@ public:
*/
template
<
typename
T
>
inline
void
reportBedNewLine
(
const
T
&
bed
)
{
// if it is azeroLength feature, we need to
// correct the start and end coords to what they were
// in the original file
CHRPOS
start
=
bed
.
start
;
CHRPOS
end
=
bed
.
end
;
if
(
bed
.
zeroLength
)
{
if
(
_isGff
==
false
)
start
++
;
end
--
;
}
//BED
if
(
_isGff
==
false
&&
_isVcf
==
false
)
{
if
(
this
->
bedType
==
3
)
{
printf
(
"%s
\t
%d
\t
%d
\n
"
,
bed
.
chrom
.
c_str
(),
bed
.
start
,
bed
.
end
);
printf
(
"%s
\t
%d
\t
%d
\n
"
,
bed
.
chrom
.
c_str
(),
start
,
end
);
}
else
if
(
this
->
bedType
==
4
)
{
printf
(
"%s
\t
%d
\t
%d
\t
%s
\n
"
,
bed
.
chrom
.
c_str
(),
bed
.
start
,
bed
.
end
,
bed
.
name
.
c_str
());
printf
(
"%s
\t
%d
\t
%d
\t
%s
\n
"
,
bed
.
chrom
.
c_str
(),
start
,
end
,
bed
.
name
.
c_str
());
}
else
if
(
this
->
bedType
==
5
)
{
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
%s
\n
"
,
bed
.
chrom
.
c_str
(),
bed
.
start
,
bed
.
end
,
bed
.
name
.
c_str
(),
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
%s
\n
"
,
bed
.
chrom
.
c_str
(),
start
,
end
,
bed
.
name
.
c_str
(),
bed
.
score
.
c_str
());
}
else
if
(
this
->
bedType
==
6
)
{
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
%s
\n
"
,
bed
.
chrom
.
c_str
(),
bed
.
start
,
bed
.
end
,
bed
.
name
.
c_str
(),
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
%s
\n
"
,
bed
.
chrom
.
c_str
(),
start
,
end
,
bed
.
name
.
c_str
(),
bed
.
score
.
c_str
(),
bed
.
strand
.
c_str
());
}
else
if
(
this
->
bedType
>
6
)
{
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
%s"
,
bed
.
chrom
.
c_str
(),
bed
.
start
,
bed
.
end
,
bed
.
name
.
c_str
(),
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
%s"
,
bed
.
chrom
.
c_str
(),
start
,
end
,
bed
.
name
.
c_str
(),
bed
.
score
.
c_str
(),
bed
.
strand
.
c_str
());
vector
<
string
>::
const_iterator
othIt
=
bed
.
otherFields
.
begin
();
...
...
@@ -765,7 +812,7 @@ public:
}
// VCF
else
if
(
_isGff
==
false
&&
_isVcf
==
true
)
{
printf
(
"%s
\t
%d
\t
"
,
bed
.
chrom
.
c_str
(),
bed
.
start
+
1
);
printf
(
"%s
\t
%d
\t
"
,
bed
.
chrom
.
c_str
(),
start
+
1
);
vector
<
string
>::
const_iterator
othIt
=
bed
.
otherFields
.
begin
();
vector
<
string
>::
const_iterator
othEnd
=
bed
.
otherFields
.
end
();
...
...
@@ -779,14 +826,14 @@ public:
// "GFF-8"
if
(
this
->
bedType
==
8
)
{
printf
(
"%s
\t
%s
\t
%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
%s
\n
"
,
bed
.
chrom
.
c_str
(),
bed
.
otherFields
[
0
].
c_str
(),
bed
.
name
.
c_str
(),
bed
.
start
+
1
,
bed
.
end
,
bed
.
name
.
c_str
(),
start
+
1
,
end
,
bed
.
score
.
c_str
(),
bed
.
strand
.
c_str
(),
bed
.
otherFields
[
1
].
c_str
());
}
// "GFF-9"
else
if
(
this
->
bedType
==
9
)
{
printf
(
"%s
\t
%s
\t
%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
%s
\t
%s
\n
"
,
bed
.
chrom
.
c_str
(),
bed
.
otherFields
[
0
].
c_str
(),
bed
.
name
.
c_str
(),
bed
.
start
+
1
,
bed
.
end
,
bed
.
name
.
c_str
(),
start
+
1
,
end
,
bed
.
score
.
c_str
(),
bed
.
strand
.
c_str
(),
bed
.
otherFields
[
1
].
c_str
(),
bed
.
otherFields
[
2
].
c_str
());
}
...
...
@@ -805,6 +852,16 @@ public:
*/
template
<
typename
T
>
inline
void
reportBedRangeTab
(
const
T
&
bed
,
CHRPOS
start
,
CHRPOS
end
)
{
// if it is azeroLength feature, we need to
// correct the start and end coords to what they were
// in the original file
if
(
bed
.
zeroLength
)
{
if
(
_isGff
==
false
)
start
++
;
end
--
;
}
// BED
if
(
_isGff
==
false
&&
_isVcf
==
false
)
{
if
(
this
->
bedType
==
3
)
{
...
...
@@ -873,6 +930,16 @@ public:
*/
template
<
typename
T
>
inline
void
reportBedRangeNewLine
(
const
T
&
bed
,
CHRPOS
start
,
CHRPOS
end
)
{
// if it is azeroLength feature, we need to
// correct the start and end coords to what they were
// in the original file
if
(
bed
.
zeroLength
)
{
if
(
_isGff
==
false
)
start
++
;
end
--
;
}
// BED
if
(
_isGff
==
false
&&
_isVcf
==
false
)
{
if
(
this
->
bedType
==
3
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment