-
Notifications
You must be signed in to change notification settings - Fork 3
/
base_bwt.h
64 lines (48 loc) · 1.33 KB
/
base_bwt.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#ifndef BASE_BWT_H
#define BASE_BWT_H
//C headers
#include <stdint.h>
//C++ headers
#include <string>
#include <vector>
using namespace std;
enum {
VC_LEN = 6,//$ A C G N T
LETTER_BITS = 3, //defined
NUMBER_BITS = 5, //8-letterBits
NUM_POWER = 32, //2**numberBits
MASK = 7, //255 >> numberBits
//These used to be pre-defined, but are set up as user options now
//BIT_POWER = 8, //defined
//BIN_SIZE = 256 //2**self.bitPower
};
struct bwtRange {
uint64_t l;
uint64_t h;
};
class BaseBWT {
protected:
//loaded from disk
string bwtFN;
vector<uint8_t> bwt;
//constructTotalCounts()
vector<uint64_t> totalCounts;
//constructIndexing()
vector<uint64_t> startIndex;
vector<uint64_t> endIndex;
uint64_t totalSize;
//these functions build all auxiliary structures required for the FM-index lookups
void constructTotalCounts();
void constructIndexing();
public:
//constructor and destructor
BaseBWT();
~BaseBWT();
//basic query functions
uint64_t countKmer(uint8_t * kmer, uint64_t kmerSize);
//multi-query functions
vector<uint64_t> countPileup_i(vector<uint8_t> seq, uint64_t kmerSize);
//query sub-routines
virtual bwtRange constrainRange(uint8_t sym, bwtRange inRange) = 0;
};
#endif