Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
bedtools2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
External wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
R3
legacy
bedtools2
Commits
4f6253a0
Commit
4f6253a0
authored
13 years ago
by
Aaron
Browse files
Options
Downloads
Patches
Plain Diff
chromsweep is now a python-style generator. Big smile\!
parent
b4928906
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/chromsweep/chromsweep.cpp
+157
-64
157 additions, 64 deletions
src/chromsweep/chromsweep.cpp
src/chromsweep/chromsweep.h
+22
-6
22 additions, 6 deletions
src/chromsweep/chromsweep.h
src/chromsweep/chromsweepMain.cpp
+12
-2
12 additions, 2 deletions
src/chromsweep/chromsweepMain.cpp
with
191 additions
and
72 deletions
src/chromsweep/chromsweep.cpp
+
157
−
64
View file @
4f6253a0
...
...
@@ -51,8 +51,14 @@ ChromSweep::ChromSweep(string bedAFile, string bedBFile, bool anyHit,
// create new BED file objects for A and B
_bedA
=
new
BedFile
(
bedAFile
);
_bedB
=
new
BedFile
(
bedBFile
);
Sweep
();
// prime the results pump.
_qy_lineNum
=
0
;
_db_lineNum
=
0
;
_bedA
->
Open
();
_bedB
->
Open
();
_qy_status
=
_bedA
->
GetNextBed
(
_curr_qy
,
_qy_lineNum
);
_db_status
=
_bedB
->
GetNextBed
(
_curr_db
,
_db_lineNum
);
}
...
...
@@ -67,65 +73,120 @@ bool after(const BED &a, const BED &b) {
return
(
a
.
start
>=
b
.
end
);
}
void
ChromSweep
::
ScanCache
(
const
BED
&
curr_qy
,
BedLineStatus
qy_status
,
vector
<
BED
>
&
db_cache
,
vector
<
BED
>
&
hits
)
{
if
(
qy_status
!=
BED_INVALID
)
{
vector
<
BED
>::
iterator
c
=
db
_cache
.
begin
();
while
(
c
!=
db
_cache
.
end
())
void
ChromSweep
::
ScanCache
()
{
if
(
_
qy_status
!=
BED_INVALID
)
{
vector
<
BED
>::
iterator
c
=
_cache
.
begin
();
while
(
c
!=
_cache
.
end
())
{
if
((
curr_qy
.
chrom
==
c
->
chrom
)
&&
!
(
after
(
curr_qy
,
*
c
)))
{
if
(
overlaps
(
curr_qy
.
start
,
curr_qy
.
end
,
c
->
start
,
c
->
end
)
>
0
)
{
hits
.
push_back
(
*
c
);
if
((
_
curr_qy
.
chrom
==
c
->
chrom
)
&&
!
(
after
(
_
curr_qy
,
*
c
)))
{
if
(
overlaps
(
_
curr_qy
.
start
,
_
curr_qy
.
end
,
c
->
start
,
c
->
end
)
>
0
)
{
_
hits
.
push_back
(
*
c
);
}
++
c
;
}
else
{
c
=
db
_cache
.
erase
(
c
);
c
=
_cache
.
erase
(
c
);
}
}
}
}
// void ChromSweep::ScanCache(const BED &curr_qy, BedLineStatus qy_status, vector<BED> &db_cache, vector<BED> &hits) {
// if (qy_status != BED_INVALID) {
// vector<BED>::iterator c = db_cache.begin();
// while (c != db_cache.end())
// {
// if ((curr_qy.chrom == c->chrom) && !(after(curr_qy, *c))) {
// if (overlaps(curr_qy.start, curr_qy.end, c->start, c->end) > 0) {
// hits.push_back(*c);
// }
// ++c;
// }
// else {
// c = db_cache.erase(c);
// }
// }
// }
// }
void
ChromSweep
::
ChromCheck
(
BED
&
curr_qy
,
BED
&
curr_db
,
BedLineStatus
&
qy_status
,
BedLineStatus
&
db_status
,
int
&
qy_lineNum
,
int
&
db_lineNum
,
vector
<
BED
>
&
db_cache
,
vector
<
BED
>
&
hits
)
void
ChromSweep
::
ChromCheck
()
{
if
((
curr_qy
.
chrom
==
curr_db
.
chrom
)
||
(
db_status
==
BED_INVALID
)
||
(
qy_status
==
BED_INVALID
))
{
if
((
_
curr_qy
.
chrom
==
_
curr_db
.
chrom
)
||
(
_
db_status
==
BED_INVALID
)
||
(
_
qy_status
==
BED_INVALID
))
{
return
;
}
if
(
curr_qy
.
chrom
>
curr_db
.
chrom
)
{
while
(
!
_bedB
->
Empty
()
&&
curr_db
.
chrom
<
curr_qy
.
chrom
)
if
(
_
curr_qy
.
chrom
>
_
curr_db
.
chrom
)
{
while
(
!
_bedB
->
Empty
()
&&
_
curr_db
.
chrom
<
_
curr_qy
.
chrom
)
{
db_status
=
_bedB
->
GetNextBed
(
curr_db
,
db_lineNum
);
_
db_status
=
_bedB
->
GetNextBed
(
_
curr_db
,
_
db_lineNum
);
}
db
_cache
.
clear
();
_cache
.
clear
();
}
else
if
(
curr_qy
.
chrom
<
curr_db
.
chrom
)
{
else
if
(
_
curr_qy
.
chrom
<
_
curr_db
.
chrom
)
{
// report hits for the remaining queries on this chrom
BED
tmp_curr_qy
=
curr_qy
;
while
(
!
_bedA
->
Empty
()
&&
tmp_curr_qy
.
chrom
==
curr_qy
.
chrom
)
BED
tmp_curr_qy
=
_
curr_qy
;
while
(
!
_bedA
->
Empty
()
&&
tmp_curr_qy
.
chrom
==
_
curr_qy
.
chrom
)
{
//db_cache = ScanCache(tmp_curr_qy, qy_status, db_cache, hits);
ScanCache
(
tmp_curr_qy
,
qy_status
,
db_cache
,
hits
);
ReportHits
(
tmp_curr_qy
,
hits
);
qy_status
=
_bedA
->
GetNextBed
(
tmp_curr_qy
,
qy_lineNum
);
hits
.
clear
();
ScanCache
();
_results
.
push
(
make_pair
(
tmp_curr_qy
,
_hits
));
_qy_status
=
_bedA
->
GetNextBed
(
tmp_curr_qy
,
_qy_lineNum
);
_hits
.
clear
();
}
// now fast forward query to catch up to database
while
(
!
_bedA
->
Empty
()
&&
tmp_curr_qy
.
chrom
<
curr_db
.
chrom
)
while
(
!
_bedA
->
Empty
()
&&
tmp_curr_qy
.
chrom
<
_
curr_db
.
chrom
)
{
// hits is empty to reflect the fact that no hits are found in catch-up mode
ReportHits
(
tmp_curr_qy
,
hits
);
qy_status
=
_bedA
->
GetNextBed
(
tmp_curr_qy
,
qy_lineNum
);
_results
.
push
(
make_pair
(
tmp_curr_qy
,
_
hits
)
)
;
_
qy_status
=
_bedA
->
GetNextBed
(
tmp_curr_qy
,
_
qy_lineNum
);
}
curr_qy
=
tmp_curr_qy
;
db
_cache
.
clear
();
_
curr_qy
=
tmp_curr_qy
;
_cache
.
clear
();
}
}
//
// void ChromSweep::ChromCheck(BED &curr_qy, BED &curr_db,
// BedLineStatus &qy_status, BedLineStatus &db_status,
// int &qy_lineNum, int &db_lineNum,
// vector<BED> &db_cache, vector<BED> &hits)
// {
// if ((curr_qy.chrom == curr_db.chrom) || (db_status == BED_INVALID) || (qy_status == BED_INVALID)) {
// return;
// }
//
// if (curr_qy.chrom > curr_db.chrom) {
// while (!_bedB->Empty() && curr_db.chrom < curr_qy.chrom)
// {
// db_status = _bedB->GetNextBed(curr_db, db_lineNum);
// }
// db_cache.clear();
// }
// else if (curr_qy.chrom < curr_db.chrom) {
// // report hits for the remaining queries on this chrom
// BED tmp_curr_qy = curr_qy;
// while (!_bedA->Empty() && tmp_curr_qy.chrom == curr_qy.chrom)
// {
// //db_cache = ScanCache(tmp_curr_qy, qy_status, db_cache, hits);
// ScanCache(tmp_curr_qy, qy_status, db_cache, hits);
//
// //ReportHits(tmp_curr_qy, hits);
// _results.push(make_pair(tmp_curr_qy, hits));
// qy_status = _bedA->GetNextBed(tmp_curr_qy, qy_lineNum);
// hits.clear();
// }
// // now fast forward query to catch up to database
// while (!_bedA->Empty() && tmp_curr_qy.chrom < curr_db.chrom)
// {
// // hits is empty to reflect the fact that no hits are found in catch-up mode
// ReportHits(tmp_curr_qy, hits);
// qy_status = _bedA->GetNextBed(tmp_curr_qy, qy_lineNum);
// }
// curr_qy = tmp_curr_qy;
// db_cache.clear();
// }
// }
void
ChromSweep
::
ReportHits
(
const
BED
&
curr_qy
,
const
vector
<
BED
>
&
hits
)
{
_bedA
->
reportBedTab
(
curr_qy
);
...
...
@@ -133,44 +194,76 @@ void ChromSweep::ReportHits(const BED &curr_qy, const vector<BED> &hits) {
}
void
ChromSweep
::
Sweep
()
{
int
qy_lineNum
=
0
;
int
db_lineNum
=
0
;
// current feature from each file
BED
curr_qy
,
curr_db
;
// status of the current lines
BedLineStatus
qy_status
,
db_status
;
vector
<
BED
>
db_cache
;
vector
<
BED
>
hits
;
// open the files; get the first line from each
_bedA
->
Open
();
_bedB
->
Open
();
qy_status
=
_bedA
->
GetNextBed
(
curr_qy
,
qy_lineNum
);
db_status
=
_bedB
->
GetNextBed
(
curr_db
,
db_lineNum
);
while
(
!
_bedA
->
Empty
())
{
bool
ChromSweep
::
Next
(
pair
<
BED
,
vector
<
BED
>
>
&
next
)
{
if
(
!
_bedA
->
Empty
())
{
// have we changed chromosomes?
ChromCheck
(
curr_qy
,
curr_db
,
qy_status
,
db_status
,
qy_lineNum
,
db_lineNum
,
db_cache
,
hits
);
ChromCheck
();
// scan the database cache for hits
//db_cache = ScanCache(curr_qy, qy_status, db_cache, hits);
ScanCache
(
curr_qy
,
qy_status
,
db_cache
,
hits
);
ScanCache
();
// advance the db until we are ahead of the query. update hits and cache as necessary
while
(
!
_bedB
->
Empty
()
&&
curr_qy
.
chrom
==
curr_db
.
chrom
&&
!
(
after
(
curr_db
,
curr_qy
)))
_
curr_qy
.
chrom
==
_
curr_db
.
chrom
&&
!
(
after
(
_
curr_db
,
_
curr_qy
)))
{
if
(
overlaps
(
curr_qy
.
start
,
curr_qy
.
end
,
curr_db
.
start
,
curr_db
.
end
)
>
0
)
{
hits
.
push_back
(
curr_db
);
if
(
overlaps
(
_
curr_qy
.
start
,
_
curr_qy
.
end
,
_
curr_db
.
start
,
_
curr_db
.
end
)
>
0
)
{
_
hits
.
push_back
(
_
curr_db
);
}
db
_cache
.
push_back
(
curr_db
);
db_status
=
_bedB
->
GetNextBed
(
curr_db
,
db_lineNum
);
_cache
.
push_back
(
_
curr_db
);
_
db_status
=
_bedB
->
GetNextBed
(
_
curr_db
,
_
db_lineNum
);
}
// report the hits for this query and reset for the next query
ReportHits
(
curr_qy
,
hits
);
hits
.
clear
();
qy_status
=
_bedA
->
GetNextBed
(
curr_qy
,
qy_lineNum
);
_results
.
push
(
make_pair
(
_curr_qy
,
_hits
));
_hits
.
clear
();
_qy_status
=
_bedA
->
GetNextBed
(
_curr_qy
,
_qy_lineNum
);
}
if
(
!
_results
.
empty
())
{
next
=
_results
.
front
();
_results
.
pop
();
return
true
;
}
else
{
return
false
;}
}
// void ChromSweep::Sweep() {
//
// int qy_lineNum = 0;
// int db_lineNum = 0;
//
// // current feature from each file
// BED curr_qy, curr_db;
//
// // status of the current lines
// BedLineStatus qy_status, db_status;
// vector<BED> db_cache;
// vector<BED> hits;
//
// // open the files; get the first line from each
// _bedA->Open();
// _bedB->Open();
// qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
// db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
// while (!_bedA->Empty()) {
// // have we changed chromosomes?
// ChromCheck(curr_qy, curr_db, qy_status, db_status, qy_lineNum, db_lineNum, db_cache, hits);
// // scan the database cache for hits
// //db_cache = ScanCache(curr_qy, qy_status, db_cache, hits);
// ScanCache(curr_qy, qy_status, db_cache, hits);
// // advance the db until we are ahead of the query. update hits and cache as necessary
// while (!_bedB->Empty() &&
// curr_qy.chrom == curr_db.chrom &&
// !(after(curr_db, curr_qy)))
// {
// if (overlaps(curr_qy.start, curr_qy.end, curr_db.start, curr_db.end) > 0) {
// hits.push_back(curr_db);
// }
// db_cache.push_back(curr_db);
// db_status = _bedB->GetNextBed(curr_db, db_lineNum);
// }
// // report the hits for this query and reset for the next query
// //ReportHits(curr_qy, hits);
// _results.push(make_pair(curr_qy, hits));
// hits.clear();
// qy_status = _bedA->GetNextBed(curr_qy, qy_lineNum);
// }
// }
This diff is collapsed.
Click to expand it.
src/chromsweep/chromsweep.h
+
22
−
6
View file @
4f6253a0
...
...
@@ -41,6 +41,10 @@ public:
// destructor
~
ChromSweep
(
void
);
bool
Next
(
pair
<
BED
,
vector
<
BED
>
>
&
next
);
//pair<BED, vector<BED> > GetNextResult(void);
private:
...
...
@@ -77,15 +81,27 @@ private:
// instance of a bed file class.
BedFile
*
_bedA
,
*
_bedB
;
vector
<
BED
>
_cache
;
vector
<
BED
>
_hits
;
queue
<
pair
<
BED
,
vector
<
BED
>
>
>
_results
;
// variables for the current query and db entries.
BED
_curr_qy
,
_curr_db
;
BedLineStatus
_qy_status
,
_db_status
;
int
_qy_lineNum
,
_db_lineNum
;
//------------------------------------------------
// private methods
//------------------------------------------------
void
ScanCache
(
const
BED
&
curr_qy
,
BedLineStatus
qy_status
,
vector
<
BED
>
&
db_cache
,
vector
<
BED
>
&
hits
);
void
ChromCheck
(
BED
&
curr_qy
,
BED
&
curr_db
,
BedLineStatus
&
qy_status
,
BedLineStatus
&
db_status
,
int
&
qy_lineNum
,
int
&
db_lineNum
,
vector
<
BED
>
&
db_cache
,
vector
<
BED
>
&
hits
);
// void ScanCache(const BED &curr_qy, BedLineStatus qy_status, vector<BED> &db_cache, vector<BED> &hits);
//
// void ChromCheck(BED &curr_qy, BED &curr_db,
// BedLineStatus &qy_status, BedLineStatus &db_status,
// int &qy_lineNum, int &db_lineNum,
// vector<BED> &db_cache, vector<BED> &hits);
void
ScanCache
();
void
ChromCheck
();
void
Sweep
();
void
ReportHits
(
const
BED
&
curr_qy
,
const
vector
<
BED
>
&
hits
);
...
...
This diff is collapsed.
Click to expand it.
src/chromsweep/chromsweepMain.cpp
+
12
−
2
View file @
4f6253a0
...
...
@@ -195,10 +195,20 @@ int main(int argc, char* argv[]) {
if
(
!
showHelp
)
{
ChromSweep
*
bi
=
new
ChromSweep
(
bedAFile
,
bedBFile
,
anyHit
,
writeA
,
writeB
,
writeOverlap
,
ChromSweep
*
sweep
=
new
ChromSweep
(
bedAFile
,
bedBFile
,
anyHit
,
writeA
,
writeB
,
writeOverlap
,
writeAllOverlap
,
overlapFraction
,
noHit
,
writeCount
,
forceStrand
,
reciprocalFraction
,
obeySplits
,
inputIsBam
,
outputIsBam
);
delete
bi
;
pair
<
BED
,
vector
<
BED
>
>
hit_set
;
while
(
sweep
->
Next
(
hit_set
))
{
cout
<<
hit_set
.
first
.
chrom
<<
"
\t
"
<<
hit_set
.
first
.
start
<<
"
\t
"
<<
hit_set
.
first
.
end
<<
"
\t
"
<<
hit_set
.
second
.
size
()
<<
"
\n
"
;
}
delete
sweep
;
return
0
;
}
else
{
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment