Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
R3
legacy
bedtools2
Commits
509cc91d
Commit
509cc91d
authored
Jul 02, 2014
by
nkindlon
Browse files
Multiple DB intersect support with unit tests and updated help messages.
parent
ccd125c1
Changes
56
Hide whitespace changes
Inline
Side-by-side
src/intersectFile/intersectMain.cpp
View file @
509cc91d
...
...
@@ -13,6 +13,7 @@ using namespace std;
#include "intersectFile.h"
#include "ContextIntersect.h"
#include "CommonHelp.h"
// define our program name
#define PROGRAM_NAME "bedtools intersect"
...
...
@@ -46,6 +47,9 @@ void intersect_help(void) {
cerr
<<
"Usage: "
<<
PROGRAM_NAME
<<
" [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>"
<<
endl
<<
endl
;
cerr
<<
"
\t\t
"
<<
"Note: -b may be followed with multiple databases and/or "
<<
endl
;
cerr
<<
"
\t\t
"
"wildcard (*) character(s). "
<<
endl
;
cerr
<<
"Options: "
<<
endl
;
cerr
<<
"
\t
-abam
\t
"
<<
"The A input file is in BAM format. Output will be BAM as well."
<<
endl
<<
endl
;
...
...
@@ -119,6 +123,17 @@ void intersect_help(void) {
cerr
<<
"
\t\t
other software tools and scripts that need to process one"
<<
endl
;
cerr
<<
"
\t\t
line of bedtools output at a time."
<<
endl
<<
endl
;
cerr
<<
"
\t
-names
\t
"
<<
"When using multiple databases, provide an alias for each that"
<<
endl
;
cerr
<<
"
\t\t
will appear instead of a fileId when also printing the DB record."
<<
endl
<<
endl
;
cerr
<<
"
\t
-filenames"
<<
"
\t
When using multiple databases, show each complete filename"
<<
endl
;
cerr
<<
"
\t\t\t
instead of a fileId when also printing the DB record."
<<
endl
<<
endl
;
cerr
<<
"
\t
-sortout
\t
"
<<
"When using multiple databases, sort the output DB hits"
<<
endl
;
cerr
<<
"
\t\t\t
for each record."
<<
endl
<<
endl
;
CommonHelp
();
cerr
<<
"Notes: "
<<
endl
;
cerr
<<
"
\t
(1) When a BAM file is used for the A file, the alignment is retained if overlaps exist,"
<<
endl
;
cerr
<<
"
\t
and exlcuded if an overlap cannot be found. If multiple overlaps exist, they are not"
<<
endl
;
...
...
src/utils/BinTree/BinTree.cpp
View file @
509cc91d
...
...
@@ -3,8 +3,7 @@
BinTree
::
BinTree
(
ContextIntersect
*
context
)
:
_databaseFile
(
NULL
),
_context
(
context
),
:
_context
(
context
),
_binOffsetsExtended
(
NULL
),
_showBinMetrics
(
false
),
_maxBinNumFound
(
0
)
...
...
@@ -36,7 +35,7 @@ BinTree::~BinTree() {
}
for
(
innerListIterType
listIter
=
bin
->
begin
();
listIter
!=
bin
->
end
();
listIter
=
bin
->
next
())
{
const
Record
*
record
=
listIter
->
value
();
_
databaseFile
->
deleteRecord
(
record
);
_
context
->
getFile
(
record
->
getFileIdx
())
->
deleteRecord
(
record
);
}
delete
bin
;
bin
=
NULL
;
...
...
@@ -70,20 +69,22 @@ BinTree::~BinTree() {
void
BinTree
::
loadDB
()
{
_databaseFile
=
_context
->
getFile
(
_context
->
getDatabaseFileIdx
());
Record
*
record
=
NULL
;
while
(
!
_databaseFile
->
eof
())
{
record
=
_databaseFile
->
getNextRecord
();
//In addition to NULL records, we also don't want to add unmapped reads.
if
(
record
==
NULL
||
record
->
isUnmapped
())
{
continue
;
}
for
(
int
i
=
0
;
i
<
_context
->
getNumDatabaseFiles
();
i
++
)
{
FileRecordMgr
*
databaseFile
=
_context
->
getDatabaseFile
(
i
);
Record
*
record
=
NULL
;
while
(
!
databaseFile
->
eof
())
{
record
=
databaseFile
->
getNextRecord
();
//In addition to NULL records, we also don't want to add unmapped reads.
if
(
record
==
NULL
||
record
->
isUnmapped
())
{
continue
;
}
if
(
!
addRecordToTree
(
record
))
{
fprintf
(
stderr
,
"ERROR: Unable to add record to tree.
\n
"
);
_databaseFile
->
close
();
exit
(
1
);
if
(
!
addRecordToTree
(
record
))
{
fprintf
(
stderr
,
"ERROR: Unable to add record to tree.
\n
"
);
databaseFile
->
close
();
exit
(
1
);
}
}
}
}
...
...
@@ -149,6 +150,9 @@ void BinTree::getHits(Record *record, RecordKeyList &hitSet)
startBin
>>=
_binNextShift
;
endBin
>>=
_binNextShift
;
}
if
(
_context
->
getSortOutput
())
{
hitSet
.
sort
();
}
}
bool
BinTree
::
addRecordToTree
(
const
Record
*
record
)
...
...
src/utils/BinTree/BinTree.h
View file @
509cc91d
...
...
@@ -32,7 +32,6 @@ public:
private:
FileRecordMgr
*
_databaseFile
;
ContextIntersect
*
_context
;
//
...
...
@@ -52,8 +51,8 @@ private:
static
const
uint32_t
_binFirstShift
=
14
;
/* How much to shift to get to finest bin. */
static
const
uint32_t
_binNextShift
=
3
;
/* How much to shift to get to next larger bin. */
typedef
BTlist
<
const
Record
*>
innerListType
;
typedef
const
BTlistNode
<
const
Record
*>
*
innerListIterType
;
typedef
Record
List
innerListType
;
typedef
const
RecordListNode
*
innerListIterType
;
typedef
innerListType
*
binType
;
typedef
binType
*
allBinsType
;
typedef
QuickString
mainKeyType
;
...
...
src/utils/Contexts/ContextBase.cpp
View file @
509cc91d
...
...
@@ -36,18 +36,17 @@ ContextBase::ContextBase()
_reciprocal
(
false
),
_sameStrand
(
false
),
_diffStrand
(
false
),
_sortedInput
(
false
),
_sortedInput
(
false
),
_sortOutput
(
false
),
_reportDBnameTags
(
false
),
_reportDBfileNames
(
false
),
_printHeader
(
false
),
_printable
(
true
),
_explicitBedOutput
(
false
),
_queryFileIdx
(
-
1
),
_databaseFileIdx
(
-
1
),
_bamHeaderAndRefIdx
(
-
1
),
_maxNumDatabaseFields
(
0
),
_useFullBamTags
(
false
),
_reportCount
(
false
),
_reportNames
(
false
),
_reportScores
(
false
),
_numOutputRecords
(
0
),
_hasConstantSeed
(
false
),
_seed
(
0
),
...
...
@@ -193,6 +192,9 @@ bool ContextBase::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
else
if
(
strcmp
(
_argv
[
_i
],
"-delim"
)
==
0
)
{
if
(
!
handle_delim
())
return
false
;
}
else
if
(
strcmp
(
_argv
[
_i
],
"-sortout"
)
==
0
)
{
if
(
!
handle_sortout
())
return
false
;
}
}
return
true
;
...
...
@@ -210,7 +212,11 @@ bool ContextBase::isValidState()
return
false
;
}
if
(
hasColumnOpsMethods
())
{
FileRecordMgr
*
dbFile
=
getFile
(
hasIntersectMethods
()
?
_databaseFileIdx
:
0
);
//TBD: Adjust column ops for multiple databases.
//For now, use last file.
// FileRecordMgr *dbFile = getFile(hasIntersectMethods() ? _databaseFileIdx : 0);
FileRecordMgr
*
dbFile
=
getFile
(
getNumInputFiles
()
-
1
);
_keyListOps
->
setDBfileType
(
dbFile
->
getFileType
());
if
(
!
_keyListOps
->
isValidColumnOps
(
dbFile
))
{
return
false
;
...
...
@@ -251,7 +257,7 @@ bool ContextBase::openFiles() {
_files
.
resize
(
_fileNames
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
_fileNames
.
size
();
i
++
)
{
FileRecordMgr
*
frm
=
getNewFRM
(
_fileNames
[
i
]);
FileRecordMgr
*
frm
=
getNewFRM
(
_fileNames
[
i
]
,
i
);
if
(
hasGenomeFile
())
{
frm
->
setGenomeFile
(
_genomeFile
);
}
...
...
@@ -281,7 +287,7 @@ int ContextBase::getBamHeaderAndRefIdx() {
if
(
_files
[
_queryFileIdx
]
->
getFileType
()
==
FileRecordTypeChecker
::
BAM_FILE_TYPE
)
{
_bamHeaderAndRefIdx
=
_queryFileIdx
;
}
else
{
_bamHeaderAndRefIdx
=
_d
atabase
FileIdx
;
_bamHeaderAndRefIdx
=
_d
b
FileIdx
s
[
0
]
;
}
return
_bamHeaderAndRefIdx
;
}
...
...
@@ -492,6 +498,13 @@ bool ContextBase::handle_delim()
return
true
;
}
bool
ContextBase
::
handle_sortout
()
{
setSortOutput
(
true
);
markUsed
(
_i
-
_skipFirstArgs
);
return
true
;
}
void
ContextBase
::
setColumnOpsMethods
(
bool
val
)
{
if
(
val
&&
!
_hasColumnOpsMethods
)
{
...
...
@@ -508,13 +521,17 @@ const QuickString &ContextBase::getColumnOpsVal(RecordKeyList &keyList) const {
return
_keyListOps
->
getOpVals
(
keyList
);
}
FileRecordMgr
*
ContextBase
::
getNewFRM
(
const
QuickString
&
filename
)
{
if
(
!
_useMergedIntervals
)
{
return
new
FileRecordMgr
(
filename
);
}
else
{
FileRecordMgr
*
ContextBase
::
getNewFRM
(
const
QuickString
&
filename
,
int
fileIdx
)
{
if
(
_useMergedIntervals
)
{
FileRecordMergeMgr
*
frm
=
new
FileRecordMergeMgr
(
filename
);
frm
->
setStrandType
(
_desiredStrand
);
frm
->
setMaxDistance
(
_maxDistance
);
frm
->
setFileIdx
(
fileIdx
);
return
frm
;
}
else
{
FileRecordMgr
*
frm
=
new
FileRecordMgr
(
filename
);
frm
->
setFileIdx
(
fileIdx
);
return
frm
;
}
}
...
...
src/utils/Contexts/ContextBase.h
View file @
509cc91d
...
...
@@ -98,6 +98,15 @@ public:
virtual
bool
getSortedInput
()
const
{
return
_sortedInput
;
}
virtual
void
setSortedInput
(
bool
val
)
{
_sortedInput
=
val
;
}
virtual
bool
getSortOutput
()
const
{
return
_sortOutput
;
}
virtual
void
setSortOutput
(
bool
val
)
{
_sortOutput
=
val
;
}
virtual
bool
getUseDBnameTags
()
const
{
return
_reportDBnameTags
;
}
virtual
void
setUseDBnameTags
(
bool
val
)
{
_reportDBnameTags
=
val
;
}
virtual
bool
getUseDBfileNames
()
const
{
return
_reportDBfileNames
;
}
virtual
void
setUseDBfileNames
(
bool
val
)
{
_reportDBfileNames
=
val
;
}
virtual
bool
getPrintHeader
()
const
{
return
_printHeader
;
}
virtual
void
setPrintHeader
(
bool
val
)
{
_printHeader
=
val
;
}
...
...
@@ -107,24 +116,6 @@ public:
virtual
bool
getUseFullBamTags
()
const
{
return
_useFullBamTags
;
}
virtual
void
setUseFullBamTags
(
bool
val
)
{
_useFullBamTags
=
val
;
}
// //
// // MERGE METHODS
// //
// virtual bool getReportCount() const { return _reportCount; }
// virtual void setReportCount(bool val) { _reportCount = val; }
//
// virtual int getMaxDistance() const { return _maxDistance; }
// virtual void setMaxDistance(int distance) { _maxDistance = distance; }
//
// virtual bool getReportNames() const { return _reportNames; }
// virtual void setReportNames(bool val) { _reportNames = val; }
//
// virtual bool getReportScores() const { return _reportScores; }
// virtual void setReportScores(bool val) { _reportScores = val; }
//
// virtual const QuickString &getScoreOp() const { return _scoreOp; }
// virtual void setScoreOp(const QuickString &op) { _scoreOp = op; }
// METHODS FOR PROGRAMS WITH USER_SPECIFIED NUMBER
// OF OUTPUT RECORDS.
...
...
@@ -192,18 +183,19 @@ protected:
bool
_sameStrand
;
bool
_diffStrand
;
bool
_sortedInput
;
bool
_sortOutput
;
bool
_reportDBnameTags
;
bool
_reportDBfileNames
;
bool
_printHeader
;
bool
_printable
;
bool
_explicitBedOutput
;
int
_queryFileIdx
;
int
_databaseFileIdx
;
vector
<
int
>
_dbFileIdxs
;
vector
<
QuickString
>
_dbNameTags
;
map
<
int
,
int
>
_fileIdsToDbIdxs
;
int
_bamHeaderAndRefIdx
;
int
_maxNumDatabaseFields
;
bool
_useFullBamTags
;
bool
_reportCount
;
bool
_reportNames
;
bool
_reportScores
;
QuickString
_scoreOp
;
int
_numOutputRecords
;
...
...
@@ -227,7 +219,7 @@ protected:
bool
isUsed
(
int
i
)
const
{
return
_argsProcessed
[
i
];
}
bool
cmdArgsValid
();
bool
openFiles
();
virtual
FileRecordMgr
*
getNewFRM
(
const
QuickString
&
filename
);
virtual
FileRecordMgr
*
getNewFRM
(
const
QuickString
&
filename
,
int
fileIdx
);
//set cmd line params and counter, i, as members so code
//is more readable (as opposed to passing all 3 everywhere).
...
...
@@ -256,6 +248,7 @@ protected:
virtual
bool
handle_o
();
virtual
bool
handle_null
();
virtual
bool
handle_delim
();
virtual
bool
handle_sortout
();
bool
parseIoBufSize
(
QuickString
bufStr
);
...
...
src/utils/Contexts/ContextIntersect.cpp
View file @
509cc91d
...
...
@@ -44,6 +44,12 @@ bool ContextIntersect::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
else
if
(
strcmp
(
_argv
[
_i
],
"-b"
)
==
0
)
{
if
(
!
handle_b
())
return
false
;
}
else
if
(
strcmp
(
_argv
[
_i
],
"-names"
)
==
0
)
{
if
(
!
handle_names
())
return
false
;
}
else
if
(
strcmp
(
_argv
[
_i
],
"-filenames"
)
==
0
)
{
if
(
!
handle_filenames
())
return
false
;
}
else
if
(
strcmp
(
_argv
[
_i
],
"-u"
)
==
0
)
{
if
(
!
handle_u
())
return
false
;
}
...
...
@@ -92,7 +98,7 @@ bool ContextIntersect::isValidState()
return
false
;
}
if
(
_queryFileIdx
==
-
1
||
_d
atabaseFileIdx
==
-
1
)
{
if
(
_queryFileIdx
==
-
1
||
_d
bFileIdxs
.
size
()
==
-
0
)
{
_errorMsg
=
"
\n
***** ERROR: query and database files not specified. *****"
;
return
false
;
}
...
...
@@ -113,6 +119,11 @@ bool ContextIntersect::isValidState()
return
false
;
}
}
if
(
getUseDBnameTags
()
&&
_dbNameTags
.
size
()
!=
_dbFileIdxs
.
size
())
{
_errorMsg
=
"
\n
***** ERROR: Number of database name tags given does not match number of databases. *****"
;
return
false
;
}
if
(
getWriteOverlap
())
{
if
(
getWriteA
())
{
...
...
@@ -149,7 +160,7 @@ bool ContextIntersect::isValidState()
if
(
getAnyHit
()
||
getNoHit
()
||
getWriteCount
())
{
setPrintable
(
false
);
}
if
(
_files
.
size
()
!=
2
)
{
if
(
_files
.
size
()
<
2
)
{
return
false
;
}
return
true
;
...
...
@@ -161,8 +172,8 @@ bool ContextIntersect::determineOutputType() {
}
//determine the maximum number of database fields.
for
(
int
i
=
0
;
i
<
(
int
)
_files
.
size
();
i
++
)
{
int
numFields
=
_files
[
i
]
->
getNumFields
();
for
(
int
i
=
0
;
i
<
getNumDatabaseFiles
();
i
++
)
{
int
numFields
=
getDatabaseFile
(
i
)
->
getNumFields
();
if
(
numFields
>
_maxNumDatabaseFields
)
{
_maxNumDatabaseFields
=
numFields
;
}
...
...
@@ -211,19 +222,46 @@ bool ContextIntersect::handle_abam()
bool
ContextIntersect
::
handle_b
()
{
if
(
_argc
<=
_i
+
1
)
{
_errorMsg
=
"
\n
***** ERROR: -b option given, but no
query
file specified. *****"
;
_errorMsg
=
"
\n
***** ERROR: -b option given, but no
database
file specified. *****"
;
return
false
;
}
addInputFile
(
_argv
[
_i
+
1
]);
_databaseFileIdx
=
getNumInputFiles
()
-
1
;
markUsed
(
_i
-
_skipFirstArgs
);
_i
++
;
markUsed
(
_i
-
_skipFirstArgs
);
do
{
addInputFile
(
_argv
[
_i
+
1
]);
int
fileId
=
getNumInputFiles
()
-
1
;
_dbFileIdxs
.
push_back
(
fileId
);
_fileIdsToDbIdxs
[
fileId
]
=
_dbFileIdxs
.
size
()
-
1
;
markUsed
(
_i
-
_skipFirstArgs
);
_i
++
;
markUsed
(
_i
-
_skipFirstArgs
);
}
while
(
_argc
>
_i
+
1
&&
_argv
[
_i
+
1
][
0
]
!=
'-'
);
return
true
;
}
bool
ContextIntersect
::
handle_names
()
{
if
(
_argc
<=
_i
+
1
)
{
_errorMsg
=
"
\n
***** ERROR: -b option given, but no database names specified. *****"
;
return
false
;
}
do
{
addDatabaseNameTag
(
_argv
[
_i
+
1
]);
markUsed
(
_i
-
_skipFirstArgs
);
_i
++
;
markUsed
(
_i
-
_skipFirstArgs
);
}
while
(
_argc
>
_i
+
1
&&
_argv
[
_i
+
1
][
0
]
!=
'-'
);
setUseDBnameTags
(
true
);
return
true
;
}
bool
ContextIntersect
::
handle_filenames
()
{
markUsed
(
_i
-
_skipFirstArgs
);
setUseDBfileNames
(
true
);
return
true
;
}
bool
ContextIntersect
::
handle_c
()
{
...
...
src/utils/Contexts/ContextIntersect.h
View file @
509cc91d
...
...
@@ -22,19 +22,22 @@ public:
//NOTE: Query and database files will only be marked as such by either the
//parseCmdArgs method, or by explicitly setting them.
FileRecordMgr
*
getQueryFile
()
{
return
getFile
(
_queryFileIdx
);
}
FileRecordMgr
*
getDatabaseFile
()
{
return
getFile
(
_d
atabase
FileIdx
);
}
FileRecordMgr
*
getDatabaseFile
(
int
idx
)
{
return
getFile
(
_d
b
FileIdx
s
[
idx
]
);
}
int
getQueryFileIdx
()
const
{
return
_queryFileIdx
;
}
void
setQueryFileIdx
(
int
idx
)
{
_queryFileIdx
=
idx
;
}
int
getDatabaseFile
Idx
()
const
{
return
_databaseFileIdx
;
}
void
setDatabaseFileIdx
(
int
idx
)
{
_database
FileIdx
=
idx
;
}
int
get
Num
DatabaseFile
s
()
{
return
(
int
)
_dbFileIdxs
.
size
()
;
}
const
vector
<
int
>
&
getDbFileIdxs
()
const
{
return
_db
FileIdx
s
;
}
const
QuickString
&
getQueryFileName
()
const
{
return
_files
[
_queryFileIdx
]
->
getFileName
();
}
const
QuickString
&
getDatabaseFileName
()
const
{
return
_files
[
_d
atabase
FileIdx
]
->
getFileName
();
}
const
QuickString
&
getDatabaseFileName
(
int
idx
)
const
{
return
_files
[
_d
b
FileIdx
s
[
idx
]
]
->
getFileName
();
}
ContextFileType
getQueryFileType
()
const
{
return
_files
[
_queryFileIdx
]
->
getFileType
();
}
ContextFileType
getDatabaseFileType
()
const
{
return
_files
[
_d
atabase
FileIdx
]
->
getFileType
();
}
ContextFileType
getDatabaseFileType
(
int
idx
)
const
{
return
_files
[
_d
b
FileIdx
s
[
idx
]
]
->
getFileType
();
}
ContextRecordType
getQueryRecordType
()
const
{
return
_files
[
_queryFileIdx
]
->
getRecordType
();
}
ContextRecordType
getDatabaseRecordType
()
const
{
return
_files
[
_d
atabase
FileIdx
]
->
getRecordType
();
}
ContextRecordType
getDatabaseRecordType
(
int
idx
)
const
{
return
_files
[
_d
b
FileIdx
s
[
idx
]
]
->
getRecordType
();
}
int
getMaxNumDatabaseFields
()
const
{
return
_maxNumDatabaseFields
;
}
void
setMaxNumDatabaseFields
(
int
val
)
{
_maxNumDatabaseFields
=
val
;
}
int
getDbIdx
(
int
fileId
)
{
return
_fileIdsToDbIdxs
.
find
(
fileId
)
->
second
;
}
void
addDatabaseNameTag
(
const
QuickString
&
tag
)
{
_dbNameTags
.
push_back
(
tag
);
}
const
QuickString
&
getDatabaseNameTag
(
int
dbIdx
)
const
{
return
_dbNameTags
[
dbIdx
];
}
bool
getAnyHit
()
const
{
return
_anyHit
;
}
void
setAnyHit
(
bool
val
)
{
_anyHit
=
val
;
}
...
...
@@ -83,6 +86,9 @@ private:
virtual
bool
handle_a
();
virtual
bool
handle_abam
();
virtual
bool
handle_b
();
virtual
bool
handle_names
();
virtual
bool
handle_filenames
();
virtual
bool
handle_c
();
virtual
bool
handle_f
();
virtual
bool
handle_loj
();
...
...
src/utils/Contexts/ContextMap.cpp
View file @
509cc91d
...
...
@@ -47,3 +47,17 @@ bool ContextMap::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
}
return
ContextIntersect
::
parseCmdArgs
(
argc
,
argv
,
_skipFirstArgs
);
}
bool
ContextMap
::
isValidState
()
{
if
(
!
ContextIntersect
::
isValidState
())
{
return
false
;
}
// Multiple databases are currently not supported
if
(
getNumDatabaseFiles
()
>
1
)
{
_errorMsg
=
"
\n
***** ERROR: multiple database files currently not supported for map. *****"
;
return
false
;
}
return
true
;
}
src/utils/Contexts/ContextMap.h
View file @
509cc91d
...
...
@@ -16,6 +16,8 @@ public:
virtual
~
ContextMap
();
virtual
bool
parseCmdArgs
(
int
argc
,
char
**
argv
,
int
skipFirstArgs
);
virtual
bool
hasIntersectMethods
()
const
{
return
true
;
}
virtual
bool
isValidState
();
private:
...
...
src/utils/FileRecordTools/FileReaders/FileReader.cpp
View file @
509cc91d
...
...
@@ -4,7 +4,7 @@
#include "BufferedStreamMgr.h"
FileReader
::
FileReader
()
:
:
_fileIdx
(
-
1
),
_bufStreamMgr
(
NULL
),
_isFileOpen
(
false
),
_currChromId
(
-
1
)
...
...
src/utils/FileRecordTools/FileReaders/FileReader.h
View file @
509cc91d
...
...
@@ -16,6 +16,9 @@ public:
FileReader
();
virtual
~
FileReader
();
void
setFileName
(
const
string
&
filename
)
{
_filename
=
filename
;
}
virtual
int
getFileIdx
()
const
{
return
_fileIdx
;
}
virtual
void
setFileIdx
(
int
fileIdx
)
{
_fileIdx
=
fileIdx
;
}
void
setInputStream
(
BufferedStreamMgr
*
bufStreamMgr
)
{
_bufStreamMgr
=
bufStreamMgr
;
_isFileOpen
=
true
;
...
...
@@ -31,6 +34,7 @@ public:
virtual
const
QuickString
&
getHeader
()
const
=
0
;
virtual
int
getNumFields
()
const
=
0
;
protected:
int
_fileIdx
;
string
_filename
;
BufferedStreamMgr
*
_bufStreamMgr
;
...
...
src/utils/FileRecordTools/FileRecordMgr.cpp
View file @
509cc91d
...
...
@@ -5,7 +5,7 @@
#include "NewGenomeFile.h"
FileRecordMgr
::
FileRecordMgr
(
const
QuickString
&
filename
)
:
:
_fileIdx
(
-
1
),
_filename
(
filename
),
_bufStreamMgr
(
NULL
),
_fileReader
(
NULL
),
...
...
@@ -224,6 +224,7 @@ void FileRecordMgr::allocateFileReader()
default:
break
;
}
_fileReader
->
setFileIdx
(
_fileIdx
);
}
const
BamTools
::
RefVector
&
FileRecordMgr
::
getBamReferences
()
{
...
...
src/utils/FileRecordTools/FileRecordMgr.h
View file @
509cc91d
...
...
@@ -37,6 +37,9 @@ public:
bool
open
();
void
close
();
virtual
bool
eof
();
void
setFileIdx
(
int
fileIdx
)
{
_fileIdx
=
fileIdx
;
}
int
getFileIdx
()
const
{
return
_fileIdx
;
}
//This is an all-in-one method to give the user a new record that is initialized with
//the next entry in the data file.
...
...
@@ -46,6 +49,7 @@ public:
void
deleteRecord
(
const
Record
*
);
virtual
void
deleteRecord
(
RecordKeyList
*
keyList
);
const
QuickString
&
getFileName
()
const
{
return
_filename
;}
bool
hasHeader
()
const
{
return
_fileReader
->
hasHeader
();
}
const
QuickString
&
getHeader
()
const
{
return
_fileReader
->
getHeader
();
}
...
...
@@ -103,6 +107,7 @@ public:
void
setIoBufSize
(
int
val
)
{
_ioBufSize
=
val
;
}
protected:
int
_fileIdx
;
QuickString
_filename
;
BufferedStreamMgr
*
_bufStreamMgr
;
...
...
src/utils/FileRecordTools/Records/BamRecord.cpp
View file @
509cc91d
...
...
@@ -45,6 +45,7 @@ bool BamRecord::initFromFile(FileReader *fileReader)
bool
BamRecord
::
initFromFile
(
BamFileReader
*
bamFileReader
)
{
setFileIdx
(
bamFileReader
->
getFileIdx
());
_bamAlignment
=
bamFileReader
->
getAlignment
();
bamFileReader
->
getChrName
(
_chrName
);
...
...
src/utils/FileRecordTools/Records/Bed3Interval.cpp
View file @
509cc91d
...
...
@@ -23,6 +23,7 @@ bool Bed3Interval::initFromFile(FileReader *fileReader)
bool
Bed3Interval
::
initFromFile
(
SingleLineDelimTextFileReader
*
fileReader
)
{
setFileIdx
(
fileReader
->
getFileIdx
());
fileReader
->
getField
(
0
,
_chrName
);
fileReader
->
getField
(
1
,
_startPosStr
);
fileReader
->
getField
(
2
,
_endPosStr
);
...
...
src/utils/FileRecordTools/Records/GffRecord.cpp
View file @
509cc91d
...
...
@@ -23,6 +23,7 @@ void GffRecord::clear()
bool
GffRecord
::
initFromFile
(
SingleLineDelimTextFileReader
*
fileReader
)
{
setFileIdx
(
fileReader
->
getFileIdx
());
fileReader
->
getField
(
0
,
_chrName
);
fileReader
->
getField
(
3
,
_startPosStr
);
_startPos
=
str2chrPos
(
_startPosStr
);
...
...
src/utils/FileRecordTools/Records/Makefile
View file @
509cc91d
...
...
@@ -17,12 +17,12 @@ INCLUDES = -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES
=
RecordMgr.cpp RecordMgr.h Record.h Record.cpp EmptyRecord.h EmptyRecord.cpp Bed3Interval.h Bed3Interval.cpp
\
SOURCES
=
RecordMgr.cpp RecordMgr.h
RecordList.h RecordList.cpp
Record.h Record.cpp EmptyRecord.h EmptyRecord.cpp Bed3Interval.h Bed3Interval.cpp
\
Bed4Interval.h Bed4Interval.cpp BedGraphInterval.h BedGraphInterval.cpp Bed5Interval.h Bed5Interval.cpp
\
Bed6Interval.h Bed6Interval.cpp
\
BedPlusInterval.h BedPlusInterval.cpp Bed12Interval.h Bed12Interval.cpp BamRecord.h BamRecord.cpp VcfRecord.h VcfRecord.cpp
\
GffRecord.h GffRecord.cpp RecordKeyList.h RecordKeyList.cpp BlockMgr.h BlockMgr.cpp StrandQueue.h StrandQueue.cpp
OBJECTS
=
RecordMgr.o Record.o EmptyRecord.o Bed3Interval.o Bed4Interval.o BedGraphInterval.o Bed5Interval.o Bed6Interval.o BedPlusInterval.o Bed12Interval.o BamRecord.o
\
OBJECTS
=
RecordMgr.o
RecordList.o
Record.o EmptyRecord.o Bed3Interval.o Bed4Interval.o BedGraphInterval.o Bed5Interval.o Bed6Interval.o BedPlusInterval.o Bed12Interval.o BamRecord.o
\
VcfRecord.o GffRecord.o RecordKeyList.o BlockMgr.o StrandQueue.o
_EXT_OBJECTS
=
ParseTools.o QuickString.o ChromIdLookup.o
EXT_OBJECTS
=
$(
patsubst
%,
$(OBJ_DIR)
/%,
$(_EXT_OBJECTS)
)
...
...
@@ -38,7 +38,7 @@ $(BUILT_OBJECTS): $(SOURCES)
clean
:
@
echo
"Cleaning up."
@
rm
-f
$(OBJ_DIR)
/RecordMgr.o
$(OBJ_DIR)
/Record.o
$(OBJ_DIR)
/EmptyRecord.o
$(OBJ_DIR)
/Bed3Interval.o
$(OBJ_DIR)
/Bed4Interval.o
\
@
rm
-f
$(OBJ_DIR)
/RecordMgr.o
$(OBJ_DIR)
/RecordList.o
$(OBJ_DIR)
/Record.o
$(OBJ_DIR)
/EmptyRecord.o
$(OBJ_DIR)
/Bed3Interval.o
$(OBJ_DIR)
/Bed4Interval.o
\
$(OBJ_DIR)
/BedGraphInterval.o
$(OBJ_DIR)
/Bed5Interval.o
$(OBJ_DIR)
/Bed6Interval.o
\
$(OBJ_DIR)
/BedPlusInterval.o
$(OBJ_DIR)
/Bed12Interval.o
$(OBJ_DIR)
/BamRecord.o
$(OBJ_DIR)
/VcfRecord.o
$(OBJ_DIR)
/GffRecord.o
$(OBJ_DIR)
/BlockMgr.o
$(OBJ_DIR)
/StrandQueue.o
...
...
src/utils/FileRecordTools/Records/Record.cpp
View file @
509cc91d
...
...
@@ -3,7 +3,8 @@
#include <cstdio>
Record
::
Record
()
:
_chrId
(
-
1
),
:
_fileIdx
(
-
1
),
_chrId
(
-
1
),
_startPos
(
-
1
),