Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
R3
legacy
bedtools2
Commits
b6f8ff74
Commit
b6f8ff74
authored
Jun 22, 2015
by
Neil Kindlon
Browse files
Fixed 44: race condition in bgzip for vcf files.
parent
85818904
Changes
10
Hide whitespace changes
Inline
Side-by-side
src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.cpp
View file @
b6f8ff74
...
...
@@ -68,7 +68,7 @@ bool BufferedStreamMgr::getTypeData()
_currScanBuffer
=
_inputStreamMgr
->
getSavedData
();
_typeChecker
.
setFilename
(
_filename
);
do
{
if
(
!
_typeChecker
.
scanBuffer
(
_currScanBuffer
.
c_str
(),
_currScanBuffer
.
size
(),
_inputStreamMgr
->
getEofHit
())
&&
!
_typeChecker
.
needsMoreData
())
{
if
(
!
_typeChecker
.
scanBuffer
(
_currScanBuffer
.
c_str
(),
_currScanBuffer
.
size
(),
_inputStreamMgr
->
getEofHit
()
,
_inputStreamMgr
->
isCompressed
()
)
&&
!
_typeChecker
.
needsMoreData
())
{
return
false
;
}
else
if
(
_typeChecker
.
needsMoreData
())
{
if
(
!
_inputStreamMgr
->
populateScanBuffer
())
{
...
...
src/utils/FileRecordTools/FileReaders/InputStreamMgr.h
View file @
b6f8ff74
...
...
@@ -33,9 +33,12 @@ public:
bool
populateScanBuffer
();
const
QuickString
&
getSavedData
()
const
{
return
_saveDataStr
;
}
bool
isGzipped
()
const
{
return
_isGzipped
;
}
bool
isBGzipped
()
const
{
return
_isBgzipped
;
}
bool
isBam
()
const
{
return
_isBam
;
}
bool
isCompressed
()
const
{
return
_isGzipped
||
_isBgzipped
||
_isBam
;
}
PushBackStreamBuf
*
getPushBackStreamBuf
()
const
{
return
_pushBackStreamBuf
;
}
// void getSavedData(QuickString &str) const { str = _saveDataStr; }
bool
isBam
()
const
{
return
_isBam
;
}
BamTools
::
BamReader
*
getBamReader
()
{
return
_bamReader
;
}
bool
resetStream
();
bool
getEofHit
()
{
return
_eofHit
;
}
...
...
src/utils/fileType/FileRecordTypeChecker.cpp
View file @
b6f8ff74
...
...
@@ -20,6 +20,7 @@ FileRecordTypeChecker::FileRecordTypeChecker()
_isGFF
=
false
;
_isGFFplus
=
false
;
_isGzipped
=
false
;
_isCompressed
=
false
;
_insufficientData
=
false
;
_fourthFieldNumeric
=
false
;
_givenEmptyBuffer
=
false
;
...
...
@@ -80,9 +81,10 @@ FileRecordTypeChecker::FileRecordTypeChecker()
}
bool
FileRecordTypeChecker
::
scanBuffer
(
const
char
*
buffer
,
size_t
len
,
bool
eofHit
)
bool
FileRecordTypeChecker
::
scanBuffer
(
const
char
*
buffer
,
size_t
len
,
bool
eofHit
,
bool
isCompressed
)
{
_eofHit
=
eofHit
;
_isCompressed
=
isCompressed
;
_numBytesInBuffer
=
len
;
if
(
_numBytesInBuffer
==
0
)
{
_fileType
=
EMPTY_FILE_TYPE
;
...
...
@@ -281,7 +283,7 @@ bool FileRecordTypeChecker::isTextDelimtedFormat(const char *buffer, size_t len)
{
//Break single string buffer into vector of QuickStrings. Delimiter is newline.
_tokenizer
.
setKeepFinalIncompleteElem
(
Tokenizer
::
IGNORE
);
int
numLines
=
_tokenizer
.
tokenize
(
buffer
,
'\n'
,
_eofHit
);
int
numLines
=
_tokenizer
.
tokenize
(
buffer
,
'\n'
,
_eofHit
,
_isCompressed
);
//anticipated delimiter characters are tab, comma, and semi-colon.
//If we need new ones, they must be added in this method.
...
...
src/utils/fileType/FileRecordTypeChecker.h
View file @
b6f8ff74
...
...
@@ -35,7 +35,7 @@ public:
GFF_PLUS_RECORD_TYPE
}
RECORD_TYPE
;
void
setFilename
(
const
QuickString
&
filename
)
{
_filename
=
filename
;
}
bool
scanBuffer
(
const
char
*
buf
,
size_t
len
,
bool
eofHit
);
bool
scanBuffer
(
const
char
*
buf
,
size_t
len
,
bool
eofHit
,
bool
isCompressed
=
false
);
bool
needsMoreData
()
const
{
return
_insufficientData
;
}
bool
recordTypeHasName
(
RECORD_TYPE
type
)
const
{
return
_hasName
.
find
(
type
)
!=
_hasName
.
end
();
}
...
...
@@ -105,6 +105,7 @@ private:
bool
_isGFF
;
bool
_isGFFplus
;
bool
_isGzipped
;
bool
_isCompressed
;
bool
_insufficientData
;
//set to true if scan buffer had only header lines.
bool
_fourthFieldNumeric
;
//this is just to distinguish between Bed4 and BedGraph files.
bool
_givenEmptyBuffer
;
...
...
src/utils/general/Makefile
View file @
b6f8ff74
...
...
@@ -11,7 +11,7 @@ INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ \
# define our source and object files
# ----------------------------------
SOURCES
=
QuickString.h QuickString.cpp ParseTools.h ParseTools.cpp PushBackStreamBuf.cpp PushBackStreamBuf.h CompressionTools.h CompressionTools.cpp
\
Tokenizer.h Tokenizer.
h
CommonHelp.h CommonHelp.cpp ErrorMsg.h ErrorMsg.cpp
Tokenizer.h Tokenizer.
cpp
CommonHelp.h CommonHelp.cpp ErrorMsg.h ErrorMsg.cpp
OBJECTS
=
QuickString.o ParseTools.o PushBackStreamBuf.o CompressionTools.o Tokenizer.o CommonHelp.o
BUILT_OBJECTS
=
$(
patsubst
%,
$(OBJ_DIR)
/%,
$(OBJECTS)
)
...
...
src/utils/general/Tokenizer.cpp
View file @
b6f8ff74
...
...
@@ -27,7 +27,7 @@ void Tokenizer::setNumExpectedItems(int newSize) {
resize
(
newSize
);
}
int
Tokenizer
::
tokenize
(
const
QuickString
&
str
,
char
delimiter
,
bool
eofHit
)
{
int
Tokenizer
::
tokenize
(
const
QuickString
&
str
,
char
delimiter
,
bool
eofHit
,
bool
isCompressed
)
{
int
strLen
=
(
int
)
str
.
size
();
...
...
@@ -42,7 +42,7 @@ int Tokenizer::tokenize(const QuickString &str, char delimiter, bool eofHit) {
}
if
(
currPos
>
startPos
)
{
if
((
currPos
==
strLen
&&
_keepFinalIncElem
!=
USE_NOW
)
&&
(
!
(
delimiter
==
'\n'
&&
eofHit
)))
{
(
(
!
(
delimiter
==
'\n'
&&
eofHit
))
||
isCompressed
)
)
{
//we found an incomplete final element.
// if we're ignoring incomplete elems, do nothing with it.
currIdx
--
;
//make sure it's not included in the final count of valid elems.
...
...
src/utils/general/Tokenizer.h
View file @
b6f8ff74
...
...
@@ -22,7 +22,7 @@ public:
// If not, don't worry about it.
void
setNumExpectedItems
(
int
val
);
int
tokenize
(
const
QuickString
&
str
,
char
delimiter
=
'\t'
,
bool
eofHit
=
false
);
int
tokenize
(
const
QuickString
&
str
,
char
delimiter
=
'\t'
,
bool
eofHit
=
false
,
bool
isCompressed
=
true
);
// If the final element ends before a delim char, that means
// the buffer passed in ends mid-element. The last, incomplete
...
...
test/intersect/bug44_a.vcf.gz
0 → 100644
View file @
b6f8ff74
File added
test/intersect/bug44_b.bed
0 → 100644
View file @
b6f8ff74
9 141151174 141151213 Low_complexity
9 141151355 141151475 LINE
9 141151514 141151815 SINE
9 141151817 141151937 SINE
9 141152314 141152378 Simple_repeat
9 141152592 141152787 SINE
9 141152832 141153431 LTR
MT 2591 2747 rRNA
MT 3230 3308 tRNA
MT 4329 4401 tRNA
MT 7448 7515 tRNA
X 67103806 67109934 LINE
X 134217438 134217916 LTR
X 8388422 8388715 SINE
X 16777024 16777338 SINE
X 25165534 25165834 LINE
X 41942860 41943542 LINE
X 50331587 50331685 SINE
X 75496377 75497894 LINE
test/intersect/test-intersect.sh
View file @
b6f8ff74
...
...
@@ -624,6 +624,18 @@ $BT intersect -a bug223_f.vcf -b bug223_f.vcf | cut -f1-6 > obs
check exp obs
rm
exp obs
##################################################################
# Bug 44: test that bgzipped vcf file works correctly
# with race condition
##################################################################
echo
" intersect.t51...
\c
"
echo
\
"MT 2706 . A G 2965 PASS BRF=0.05;FR=1;HP=1;HapScore=1;MGOF=17;MMLQ=30;MQ=62.05;NF=7607;NR=8147;PP=2965;QD=20;SC=AGGCGGGCATAACACAGCAAG;SbPval=0.52;Source=Platypus;TC=15840;TCF=7679;TCR=8161;TR=15754;WE=2749;WS=2693;CSQ=G|ENSG00000198763|ENST00000361453|Transcript|upstream_gene_variant||||||rs2854128|1764|1|MT-ND2|HGNC|7456|protein_coding|YES||ENSP00000355046|NU2M_HUMAN|Q7GXY9_HUMAN&Q5Q3P5_HUMAN&Q14X33_HUMAN&Q14WT3_HUMAN&A6ZH82_HUMAN&A6ZGN8_HUMAN&A6ZGG3_HUMAN|UPI0000000AA2||||||A:0.1656|||||||||||||,G|ENSG00000210151|ENST00000387416|Transcript|downstream_gene_variant||||||rs2854128|4740|-1|MT-TS1|HGNC|7497|Mt_tRNA|YES|||||||||||A:0.1656|||||||||||||,G|ENSG00000210077|ENST00000387342|Transcript|downstream_gene_variant||||||rs2854128|1036|1|MT-TV|HGNC|7500|Mt_tRNA|YES|||||||||||A:0.1656|||||||||||||,G|ENSG00000210144|ENST00000387409|Transcript|downstream_gene_variant||||||rs2854128|3120|-1|MT-TY|HGNC|7502|Mt_tRNA|YES|||||||||||A:0.1656|||||||||||||,G|ENSG00000210117|ENST00000387382|Transcript|upstream_gene_variant||||||rs2854128|2806|1|MT-TW|HGNC|7501|Mt_tRNA|YES|||||||||||A:0.1656|||||||||||||,G|ENSG00000210107|ENST00000387372|Transcript|downstream_gene_variant||||||rs2854128|1623|-1|MT-TQ|HGNC|7495|Mt_tRNA|YES|||||||||||A:0.1656|||||||||||||,G|ENSG00000210140|ENST00000387405|Transcript|downstream_gene_variant||||||rs2854128|3055|-1|MT-TC|HGNC|7477|Mt_tRNA|YES|||||||||||A:0.1656|||||||||||||,G|ENSG00000211459|ENST00000389680|Transcript|downstream_gene_variant||||||rs2854128|1105|1|MT-RNR1|HGNC|7470|Mt_rRNA|YES|||||||||||A:0.1656|||||||||||||,G|ENSG00000210082|ENST00000387347|Transcript|non_coding_transcript_exon_variant&non_coding_transcript_variant|1036|||||rs2854128||1|MT-RNR2|HGNC|7471|Mt_rRNA|YES||||||||1/1|||A:0.1656|||||||||||||,G|ENSG00000210127|ENST00000387392|Transcript|downstream_gene_variant||||||rs2854128|2881|-1|MT-TA|HGNC|7475|Mt_tRNA|YES|||||||||||A:0.1656|||||||||||||,G|ENSG00000198712|ENST00000361739|Transcript|upstream_gene_variant||||||rs2854128|4880|1|MT-CO2|HGNC|7421|protein_coding|YES||ENSP00000354876|COX2_HUMAN|Q7GXZ8_HUMAN&Q4R1L5_HUMAN&Q4R1L3_HUMAN&Q14XT3_HUMAN&K7WVJ5_HUMAN&H9E7W2_HUMAN&H9E7T7_HUMAN&H9E7P8_HUMAN&H9E7F7_HUMAN&E2DTL8_HUMAN&D3WYY9_HUMAN&D2Y6Y2_HUMAN&D2Y6Y1_HUMAN&B2YKU2_HUMAN|UPI0000000AA4||||||A:0.1656|||||||||||||,G|ENSG00000210049|ENST00000387314|Transcript|downstream_gene_variant||||||rs2854128|2059|1|MT-TF|HGNC|7481|Mt_tRNA|YES|||||||||||A:0.1656|||||||||||||,G|ENSG00000198888|ENST00000361390|Transcript|upstream_gene_variant||||||rs2854128|601|1|MT-ND1|HGNC|7455|protein_coding|YES||ENSP00000354687|NU1M_HUMAN|Q85KV6_HUMAN&Q8WCX9_HUMAN&Q5Q757_HUMAN&Q14WI3_HUMAN&G3EBI1_HUMAN&D2Y6X8_HUMAN&D2Y6X6_HUMAN&A6ZHG8_HUMAN|UPI0000000AA1||||||A:0.1656|||||||||||||,G|ENSG00000209082|ENST00000386347|Transcript|upstream_gene_variant||||||rs2854128|524|1|MT-TL1|HGNC|7490|Mt_tRNA|YES|||||||||||A:0.1656|||||||||||||,G|ENSG00000198804|ENST00000361624|Transcript|upstream_gene_variant||||||rs2854128|3198|1|MT-CO1|HGNC|7419|protein_coding|YES||ENSP00000354499|COX1_HUMAN|Q957U9_HUMAN&Q7GXY8_HUMAN&M9Z2G2_HUMAN&Q8HBX8_HUMAN&Q5Q1W2_HUMAN&Q4R1L4_HUMAN&Q14XD3_HUMAN&Q14X83_HUMAN&F8U4W0_HUMAN&D3WYY6_HUMAN&D3WYY5_HUMAN&D3WYY4_HUMAN&D2Y6W4_HUMAN&C8YAE4_HUMAN&C3UPN2_HUMAN&B7TCT8_HUMAN&B2Y9D8_HUMAN&A5YMT3_HUMAN&A1XP63_HUMAN&A0S1I7_HUMAN|UPI0000000AA3||||||A:0.1656|||||||||||||,G|ENSG00000210154|ENST00000387419|Transcript|upstream_gene_variant||||||rs2854128|4812|1|MT-TD|HGNC|7478|Mt_tRNA|YES|||||||||||A:0.1656|||||||||||||,G|ENSG00000210112|ENST00000387377|Transcript|upstream_gene_variant||||||rs2854128|1696|1|MT-TM|HGNC|7492|Mt_tRNA|YES|||||||||||A:0.1656|||||||||||||,G|ENSG00000210135|ENST00000387400|Transcript|downstream_gene_variant||||||rs2854128|2951|-1|MT-TN|HGNC|7493|Mt_tRNA|YES|||||||||||A:0.1656|||||||||||||,G|ENSG00000210100|ENST00000387365|Transcript|upstream_gene_variant||||||rs2854128|1557|1|MT-TI|HGNC|7488|Mt_tRNA|YES|||||||||||A:0.1656|||||||||||||;GR=3.07;PH=0.654;PS=0.002 GT:GL:GOF:GQ:NR:NV 1/1:-300,-298.01,0:3:99:2733:2718 1/1:-300,-298.01,0:17:99:6509:6461 1/1:-300,-298.01,0:2:99:6598:6575 MT 2591 2747 rRNA"
>
exp
$BT
intersect
-a
bug44_a.vcf.gz
-b
bug44_b.bed
-wa
-wb
>
obs
check exp obs
rm
exp obs
cd
multi_intersect
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment