4
4
#include " ../../Utils/BinaryIO.h"
5
5
#include " SuffixArray.h"
6
6
#include < algorithm>
7
- #include < list>
8
7
#include < cstdint>
8
+ #include < deque>
9
+ #include < bit_string.h>
9
10
10
11
/* *
11
12
* Burrows - Wheeler Transform
@@ -19,30 +20,24 @@ class BWT {
19
20
static void encode (const std::string& filename, const std::string& outputFileName) {
20
21
21
22
std::string toBeEncoded = BinaryIO::readString (filename);
22
-
23
- toBeEncoded += ' \0 ' ;
23
+ toBeEncoded += ' \0 ' ; // Last char must be the smallest of all
24
24
25
25
std::vector<uint32_t > suffixArray = SuffixArray::buildSuffixArray (toBeEncoded);
26
26
27
27
std::string bwt = generateBWT (toBeEncoded, suffixArray);
28
28
29
29
remove (outputFileName.c_str ()); // Remove Output File If Exists
30
+ BinaryIO::write (outputFileName, bit_string::from_uint_32 (originalIndex));
30
31
BinaryIO::write (outputFileName, bwt);
31
- BinaryIO::write (outputFileName, originalIndex);
32
32
33
33
}
34
34
35
35
static void decode (const std::string& filename, const std::string& outputFileName) {
36
36
37
- std::string bwt = BinaryIO::readString (filename);
38
-
39
- std::string index;
40
- for (int i = 0 ; i < sizeof (originalIndex); ++i) {
41
- index.insert (0 , 1 , bwt.back ());
42
- bwt.pop_back ();
43
- }
37
+ // Original Index is the first 4 bytes of the file
38
+ originalIndex = BinaryIO::readBitString (filename, 0 , sizeof (originalIndex)).to_uint_32 ();
39
+ std::string bwt = BinaryIO::readString (filename, sizeof (originalIndex));
44
40
45
- originalIndex = (uint32_t ) Converter::string_ToInt64 (index);
46
41
std::string inverseBWT = BWT::invertBWT (bwt, originalIndex);
47
42
inverseBWT.pop_back (); // remove '\0' which was added at encoding
48
43
@@ -53,9 +48,28 @@ class BWT {
53
48
54
49
55
50
private:
56
- static void computeLeftShift (std::list<int >& list, int index, int leftShift[]) {
57
- leftShift[index] = list.front ();
58
- list.pop_front ();
51
+
52
+ static std::vector<uint32_t > computeLeftShift (const std::string& bwt) {
53
+
54
+ // Each element is a list of all indices the this element appeared at in bwt
55
+ // example: symbolsIndicesList[65] contains all indices that letter 'A' appears at
56
+ std::deque<uint32_t > symbolsIndicesList[256 ]; // 256 is the length all symbols as we us byte it is 256 maximum
57
+ for (int i = 0 ; i < bwt.length (); i++) {
58
+ symbolsIndicesList[(uint8_t ) bwt[i]].push_back (i);
59
+ }
60
+
61
+ std::string sortedBWT = bwt;
62
+ std::sort (sortedBWT.begin (), sortedBWT.end (), unsignedCharsCompare);
63
+
64
+ std::vector<uint32_t > leftShift (bwt.length ());
65
+ // Computes Left Shift
66
+ for (int i = 0 ; i < bwt.length (); i++) {
67
+ std::deque<uint32_t >& symbolIndices = symbolsIndicesList[(uint8_t ) sortedBWT[i]];
68
+ leftShift[i] = symbolIndices.front ();
69
+ symbolIndices.pop_front ();
70
+ }
71
+
72
+ return leftShift;
59
73
}
60
74
61
75
// Generate Burrows - Wheeler Transform of given text
@@ -87,41 +101,18 @@ class BWT {
87
101
}
88
102
89
103
static std::string invertBWT (const std::string& BWT, long long index) {
90
- int length = BWT.length ();
91
- std::string sortedBWT = BWT;
92
- int * leftShift = new int [length];
93
104
94
- // Sorts the characters of BWT[] alphabetically
95
- std::sort (sortedBWT.begin (), sortedBWT.end (), unsignedCharsCompare);
96
-
97
- // Array of pointers that act as head nodes
98
- // to linked lists created to compute leftShift[]
99
- std::list<int > arr[256 ]; // 256 is the length all symbols as we us byte it is 256 maximum
100
-
101
- // Takes each distinct character of BWT[] as head
102
- // of a linked list and appends to it the new node
103
- // whose data part contains index at which
104
- // character occurs in BWT[]
105
- for (int i = 0 ; i < length; i++) {
106
- arr[(uint8_t ) BWT[i]].push_back (i);
107
- }
108
-
109
- // Takes each distinct character of sorted_arr[] as head
110
- // of a linked list and finds leftShift[]
111
- for (int i = 0 ; i < length; i++) {
112
- computeLeftShift (arr[(uint8_t ) sortedBWT[i]], i, leftShift);
113
- }
105
+ std::vector<uint32_t > leftShift = computeLeftShift (BWT);
114
106
115
107
std::string inverseBWT;
108
+ inverseBWT.reserve (BWT.length ());
116
109
117
110
// Decodes the bwt
118
- for (int i = 0 ; i < length; i++) {
111
+ for (int i = 0 ; i < BWT. length () ; i++) {
119
112
index = leftShift[index];
120
113
inverseBWT += BWT[index];
121
114
}
122
115
123
- delete[] leftShift;
124
-
125
116
return inverseBWT;
126
117
}
127
118
0 commit comments