Commit 83c6faa9 authored by lindenb's avatar lindenb
Browse files

sort BED on faidx file

parent ada04b66
......@@ -9,13 +9,14 @@
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include <set>
#include "lineFileUtilities.h"
#include "sortBed.h"
//
// Constructor
//
BedSort::BedSort(string &bedFile, bool printHeader) {
BedSort::BedSort(string &bedFile, bool printHeader,string &faidxFile):_faidxFile(faidxFile) {
_bedFile = bedFile;
_bed = new BedFile(bedFile);
......@@ -47,6 +48,72 @@ void BedSort::SortBed() {
}
}
void BedSort::SortBedOnFaidx()
{
set<string> all_chromosomes;
if(_faidxFile.empty())
{
cerr << "[sortBed] File for fasta index undefined." << endl;
exit(EXIT_FAILURE);
}
/* read FAIDX file */
ifstream faidx(_faidxFile.c_str(),ios::in);
if(!faidx.is_open()) {
cerr << "Cannot open \""<< _faidxFile << "\""
<< strerror(errno)
<< endl;
exit(EXIT_FAILURE);
}
string line;
while(getline(faidx,line,'\n')) {
if(line.empty()) continue;
string::size_type tab= line.find('\t');
if(tab!=string::npos) {
line.erase(tab);
}
if(all_chromosomes.find(line) != all_chromosomes.end()) {
cerr << "Chromosome \"" << line
<<"\" defined twice in "
<< _faidxFile
<< endl;
exit(EXIT_FAILURE);
}
_tid2chrom[_tid2chrom.size()]=line;
all_chromosomes.insert(line);
}
faidx.close();
/** end read FAIDX */
//check BED chromosomes
for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
if( all_chromosomes.find(m->first) == all_chromosomes.end()) {
cerr << "Chromosome \"" << m->first
<<"\" undefined in "
<< _faidxFile
<< endl;
exit(EXIT_FAILURE);
}
}
//loop over each chromosome using the faidx index
for(size_t tid=0; tid <_tid2chrom.size(); ++tid)
{
string chrom = _tid2chrom[tid];
masterBedMapNoBin::iterator m = _bed->bedMapNoBin.find(chrom);
if( m == _bed->bedMapNoBin.end() ) continue; //this chromosome is not present in BED
// bedList is already sorted by start position.
vector<BED> bedList = m->second;
for (unsigned int i = 0; i < bedList.size(); ++i) {
_bed->reportBedNewLine(bedList[i]);
}
}
}
void BedSort::SortBedBySizeAsc() {
......
......@@ -14,6 +14,7 @@
#include <algorithm>
#include <iostream>
#include <fstream>
#include <map>
using namespace std;
......@@ -26,12 +27,13 @@ class BedSort {
public:
// constructor
BedSort(string &bedFile, bool printHeader);
BedSort(string &bedFile, bool printHeader,string &faidxFile);
// destructor
~BedSort(void);
void SortBed(); // the default. sorts by chrom (asc.) then by start (asc.)
void SortBedOnFaidx(); // sort BED using a given Reference (e.g: faidx file )
void SortBedBySizeAsc();
void SortBedBySizeDesc();
void SortBedByChromThenSizeAsc();
......@@ -41,7 +43,8 @@ public:
private:
string _bedFile;
string _faidxFile;/* genome index, generated by samtools faidx 1st col is chromosome */
map<size_t,string> _tid2chrom; /** map chromosome name to sort order */
// instance of a bed file class.
BedFile *_bed;
};
......@@ -31,6 +31,7 @@ int sort_main(int argc, char* argv[]) {
// input files
string bedFile = "stdin";
string faidxFile;
bool haveBed = true;
int sortChoices = 0;
......@@ -40,6 +41,7 @@ int sort_main(int argc, char* argv[]) {
bool sortByChromThenSizeDesc = false;
bool sortByChromThenScoreAsc = false;
bool sortByChromThenScoreDesc = false;
bool sortByFaidx = false;
bool printHeader = false;
for(int i = 1; i < argc; i++) {
......@@ -88,6 +90,14 @@ int sort_main(int argc, char* argv[]) {
sortByChromThenScoreDesc = true;
sortChoices++;
}
else if(PARAMETER_CHECK("-faidx", 6, parameterLength)) {
if ((i+1) < argc) {
faidxFile = argv[i + 1];
i++;
}
sortByFaidx = true;
sortChoices++;
}
else if(PARAMETER_CHECK("-header", 7, parameterLength)) {
printHeader = true;
}
......@@ -109,7 +119,7 @@ int sort_main(int argc, char* argv[]) {
if (!showHelp) {
BedSort *bm = new BedSort(bedFile, printHeader);
BedSort *bm = new BedSort(bedFile, printHeader,faidxFile);
if (sortBySizeAsc) {
bm->SortBedBySizeAsc();
......@@ -129,6 +139,9 @@ int sort_main(int argc, char* argv[]) {
else if (sortByChromThenScoreDesc) {
bm->SortBedByChromThenScoreDesc();
}
else if (sortByFaidx) {
bm->SortBedOnFaidx();
}
else {
bm->SortBed();
}
......@@ -148,13 +161,14 @@ void sort_help(void) {
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf>" << endl << endl;
cerr << "Options: " << endl;
cerr << "\t" << "-sizeA\t\t" << "Sort by feature size in ascending order." << endl;
cerr << "\t" << "-sizeD\t\t" << "Sort by feature size in descending order." << endl;
cerr << "\t" << "-chrThenSizeA\t" << "Sort by chrom (asc), then feature size (asc)." << endl;
cerr << "\t" << "-chrThenSizeD\t" << "Sort by chrom (asc), then feature size (desc)." << endl;
cerr << "\t" << "-chrThenScoreA\t" << "Sort by chrom (asc), then score (asc)." << endl;
cerr << "\t" << "-chrThenScoreD\t" << "Sort by chrom (asc), then score (desc)." << endl;
cerr << "\t" << "-sizeA\t\t\t" << "Sort by feature size in ascending order." << endl;
cerr << "\t" << "-sizeD\t\t\t" << "Sort by feature size in descending order." << endl;
cerr << "\t" << "-chrThenSizeA\t\t" << "Sort by chrom (asc), then feature size (asc)." << endl;
cerr << "\t" << "-chrThenSizeD\t\t" << "Sort by chrom (asc), then feature size (desc)." << endl;
cerr << "\t" << "-chrThenScoreA\t\t" << "Sort by chrom (asc), then score (asc)." << endl;
cerr << "\t" << "-chrThenScoreD\t\t" << "Sort by chrom (asc), then score (desc)." << endl;
cerr << "\t" << "-faidx (names.txt)\t" << "Sort according to the chromosomes declared in \"names.txt\"" << endl;
cerr << "\t-header\t" << "Print the header from the A file prior to results." << endl << endl;
exit(1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment