7
7
#include < cstdint>
8
8
9
9
#include " Node.h"
10
- #include " ../../Utils/Converter.h"
11
10
#include " ../../Utils/BinaryIO.h"
12
-
13
-
11
+ #include < bit_string.h>
12
+
13
+
14
+ /* *
15
+ * David Huffman Encoding and Decoding
16
+ *
17
+ * @File_Format
18
+ * ___________________________________________________ _
19
+ * | File_Header_Size_In_Bits (2 Bytes) | \
20
+ * |_________________________________________________| \
21
+ * | File Header (Dictionary): | \
22
+ * | Each Item consists of: | Length of This Part in bits is File_Header_Size_In_Bits
23
+ * | - Original Value (1 Byte) [ex. 'A' -> 0x41] | There may be some extra bits if size does not fit in bytes
24
+ * | - Huffman_Code_Length (1 Byte) | /
25
+ * | - Huffman Code (Huffman_Code_Length Bits) | /
26
+ * | | /
27
+ * |_________________________________________________| -
28
+ * | |
29
+ * | Huffman Coded Data (Rest of the file) |
30
+ * |_________________________________________________|
31
+ * | Extra Bits in Last Byte (1 Byte) |
32
+ * | There may be some extra bits, if size of Data |
33
+ * | does not fit in bytes |
34
+ * ---------------------------------------------------
35
+ */
14
36
class Huffman {
15
37
16
38
static const uint32_t BYTE = 8 ;
@@ -28,32 +50,41 @@ class Huffman {
28
50
deallocateHuffmanTree (huffmanTree); // release memory
29
51
frequencies.clear (); // release memory
30
52
31
- std::string binaryFileHeader = Converter::bitString_ToRealBinary ( generateFileHeader (huffmanCodes) );
53
+ bit_string binaryFileHeader = generateFileHeader (huffmanCodes);
32
54
33
55
remove (outputFileName.c_str ()); // Remove Output File If Exists
34
56
BinaryIO::write (outputFileName, binaryFileHeader);
35
57
36
58
binaryFileHeader.clear ();
37
59
binaryFileHeader.shrink_to_fit (); // release memory
38
60
39
- std::string encodedData = encode (toBeEncoded, huffmanCodes);
61
+ bit_string encodedData = encode (toBeEncoded, huffmanCodes);
40
62
41
63
BinaryIO::write (outputFileName, encodedData);
42
64
}
43
65
44
66
45
67
static void decode (const std::string& filename, const std::string& outputFileName) {
46
68
47
- uint32_t fileHeaderSizeInBytes = byteSize (decodeFileHeaderSizeInBits (BinaryIO::readString (filename, 0 , 2 )));
69
+ // File Header Size In Bits is The First 2 Bytes of Data
70
+ uint32_t fileHeaderSizeInBits = BinaryIO::readBitString (filename, 0 , 2 ).to_uint_32 ();
71
+
72
+ // The remaining of the file header is the huffman dictionary
73
+ bit_string dictionary = BinaryIO::readBitString (filename, 2 , byteSize (fileHeaderSizeInBits));
74
+
75
+ uint32_t dictionarySizeInBits = fileHeaderSizeInBits - 2 * BYTE;
48
76
49
- std::string fileHeader = BinaryIO::readString (filename, 0 , fileHeaderSizeInBytes);
77
+ // Remove Extra bits in last byte
78
+ dictionary.pop_back (dictionary.size () - dictionarySizeInBits);
50
79
51
- auto huffmanCodes = reconstructHuffmanCodes (fileHeader);
80
+ auto huffmanCodes = reconstructHuffmanCodes (dictionary);
81
+ dictionary.clear ();
82
+ dictionary.shrink_to_fit (); // release memory
52
83
Node* huffmanTree = reconstructHuffmanTree (huffmanCodes);
53
84
54
- std::string toBeDecoded = BinaryIO::readString (filename, fileHeaderSizeInBytes );
85
+ bit_string toBeDecoded = BinaryIO::readBitString (filename, byteSize (fileHeaderSizeInBits) );
55
86
56
- std::string decodedData = decode (toBeDecoded, huffmanCodes, huffmanTree);
87
+ bit_string decodedData = decode (toBeDecoded, huffmanCodes, huffmanTree);
57
88
58
89
remove (outputFileName.c_str ()); // Remove Output File If Exists
59
90
BinaryIO::write (outputFileName, decodedData);
@@ -69,37 +100,39 @@ class Huffman {
69
100
}
70
101
71
102
72
- static std::string encode (const std::string& toBeEncoded, std::unordered_map<char , std::string>& huffmanCodes) {
73
- std::string encodedString;
103
+ static bit_string encode (const std::string& toBeEncoded, std::unordered_map<uint8_t , bit_string>& huffmanCodes) {
104
+ bit_string encodedData;
105
+ encodedData.reserve (toBeEncoded.size () / (4 * BYTE));
74
106
for (char c : toBeEncoded) {
75
- encodedString += huffmanCodes[c];
107
+ encodedData += huffmanCodes[c];
76
108
}
77
109
78
- std::string encodedData = Converter::bitString_ToRealBinary (encodedString);
79
- uint8_t extraBitsInLastByte = getNumberOfExtraBitsInLastByte (encodedString);
110
+ uint8_t extraBitsInLastByte = encodedData.extra_bits_size ();
111
+ for (int i = 0 ; i < extraBitsInLastByte; ++i) {
112
+ encodedData.push_back (0 );
113
+ }
80
114
81
115
encodedData += extraBitsInLastByte;
82
-
83
116
return encodedData;
84
117
}
85
118
86
119
87
- static std::string decode (std::string & toBeDecoded, std::unordered_map<std::string, char >& huffmanCodes, Node* huffmanTree) {
120
+ static bit_string decode (bit_string & toBeDecoded, std::unordered_map<bit_string, uint8_t >& huffmanCodes, Node* huffmanTree) {
88
121
89
- int extraBitsInLastByte = toBeDecoded.back ();
90
- toBeDecoded.pop_back ();
122
+ uint8_t extraBitsInLastByte = toBeDecoded.back_byte ();
123
+ toBeDecoded.pop_back (BYTE);
124
+ toBeDecoded.pop_back (extraBitsInLastByte);
91
125
92
- std::string toBeDecodedBitString = Converter::string_ToBitString (toBeDecoded);
126
+ bit_string decodedData;
127
+ decodedData.reserve (toBeDecoded.size ());
93
128
94
- toBeDecodedBitString.erase (toBeDecodedBitString.length () - extraBitsInLastByte);
95
-
96
- std::string decodedData, currentCode;
129
+ bit_string currentCode;
97
130
Node* currentNode = huffmanTree;
98
- for (char toBeDecodedBit : toBeDecodedBitString ) {
131
+ for (bool toBeDecodedBit : toBeDecoded ) {
99
132
100
- if (toBeDecodedBit == ' 1 ' )
133
+ if (toBeDecodedBit == 1 )
101
134
currentNode = currentNode->left ;
102
- else if (toBeDecodedBit == ' 0 ' )
135
+ else if (toBeDecodedBit == 0 )
103
136
currentNode = currentNode->right ;
104
137
105
138
currentCode += toBeDecodedBit;
@@ -113,17 +146,10 @@ class Huffman {
113
146
return decodedData;
114
147
}
115
148
116
- static uint32_t decodeFileHeaderSizeInBits (const std::string& fileHeader) {
117
- uint32_t fileHeaderSizeInBits = (uint8_t ) fileHeader[0 ];
118
- fileHeaderSizeInBits <<= BYTE;
119
- fileHeaderSizeInBits += (uint8_t ) fileHeader[1 ];
120
- return fileHeaderSizeInBits;
121
- }
122
149
123
-
124
- static std::unordered_map<char , std::string> generateHuffmanCodes (Node* huffmanTree) {
125
- std::unordered_map<char , std::string> huffmanCodes;
126
- std::string code;
150
+ static std::unordered_map<uint8_t , bit_string> generateHuffmanCodes (Node* huffmanTree) {
151
+ std::unordered_map<uint8_t , bit_string> huffmanCodes;
152
+ bit_string code;
127
153
128
154
std::function<void (Node*)> generateHuffmanCodesRecursive = [&](Node* node) {
129
155
if (node == nullptr )
@@ -134,9 +160,9 @@ class Huffman {
134
160
return ;
135
161
}
136
162
137
- code.push_back (' 1 ' );
163
+ code.push_back (1 );
138
164
generateHuffmanCodesRecursive (node->left );
139
- code.back () = ' 0 ' ;
165
+ code.back () = 0 ;
140
166
generateHuffmanCodesRecursive (node->right );
141
167
code.pop_back ();
142
168
};
@@ -147,35 +173,26 @@ class Huffman {
147
173
}
148
174
149
175
150
- // Read the file Header and construct the Huffman Codes Dictionary
151
- static std::unordered_map<std::string, char > reconstructHuffmanCodes (const std::string& fileHeader) {
152
-
153
- uint32_t dictionaryLengthInBits = decodeFileHeaderSizeInBits (fileHeader) - (2u * BYTE);
176
+ // Read the File Header and construct the Huffman Codes Dictionary
177
+ static std::unordered_map<bit_string, uint8_t > reconstructHuffmanCodes (const bit_string& dictionary) {
154
178
155
- std::string dictionaryBits = Converter::string_ToBitString (fileHeader, 2 , fileHeader. length () );
179
+ std::unordered_map<bit_string, uint8_t > huffmanCodes (dictionary. size ()/ 4 );
156
180
157
- // if the header does not fit in bytes, i.e. there are extra bits which does not belong to it
158
- while (dictionaryBits.length () > dictionaryLengthInBits)
159
- dictionaryBits.pop_back ();
160
-
161
- std::unordered_map<std::string, char > huffmanCodes;
162
- uint8_t value;
163
- std::string code;
164
- for (int i = 0 ; i < dictionaryLengthInBits;) {
165
- value = Converter::bitString_ToRealBinary (dictionaryBits, i, BYTE)[0 ];
181
+ for (int i = 0 ; i < dictionary.size ();) {
182
+ uint8_t value = dictionary.substr (i, BYTE).to_uint_8 ();
166
183
i += BYTE;
167
- uint32_t codeLength = Converter::bitString_ToInt (dictionaryBits .substr (i, BYTE));
184
+ uint32_t codeLength = dictionary .substr (i, BYTE). to_uint_32 ( );
168
185
i += BYTE;
169
- code = dictionaryBits .substr (i, codeLength);
186
+ bit_string code = dictionary .substr (i, codeLength);
170
187
i += codeLength;
171
- huffmanCodes[code] = value;
188
+ huffmanCodes[std::move ( code) ] = value;
172
189
}
173
190
174
191
return huffmanCodes;
175
192
}
176
193
177
194
178
- static Node* buildHuffmanTree (const std::unordered_map<char , int >& frequencies) {
195
+ static Node* buildHuffmanTree (const std::unordered_map<uint8_t , uint32_t >& frequencies) {
179
196
std::priority_queue<Node*, std::vector<Node*>, Node::Compare> pq;
180
197
181
198
// for the readability purposes
@@ -202,7 +219,7 @@ class Huffman {
202
219
}
203
220
204
221
205
- static Node* reconstructHuffmanTree (const std::unordered_map<std::string, char > &huffmanCodes) {
222
+ static Node* reconstructHuffmanTree (const std::unordered_map<bit_string, uint8_t > &huffmanCodes) {
206
223
207
224
Node* root = new Node;
208
225
Node* currentNode;
@@ -213,13 +230,13 @@ class Huffman {
213
230
214
231
for (const auto & huffmanCode : huffmanCodes) {
215
232
currentNode = root;
216
- for (char currentCodeBit : huffmanCode.code ) {
217
- if (currentCodeBit == ' 1 ' ) { // goes left
233
+ for (bool currentCodeBit : huffmanCode.code ) {
234
+ if (currentCodeBit == 1 ) { // goes left
218
235
if (currentNode->left == nullptr ) {
219
236
currentNode->left = new Node;
220
237
}
221
238
currentNode = currentNode->left ;
222
- } else if (currentCodeBit == ' 0 ' ) { // goes right
239
+ } else if (currentCodeBit == 0 ) { // goes right
223
240
if (currentNode->right == nullptr ) {
224
241
currentNode->right = new Node;
225
242
}
@@ -256,25 +273,26 @@ class Huffman {
256
273
}
257
274
258
275
259
- static std::unordered_map<char , int > generateFrequencies (const std::string& input) {
260
- std::unordered_map<char , int > frequencies;
261
- for (char currentChar : input) {
276
+ static std::unordered_map<uint8_t , uint32_t > generateFrequencies (const std::string& input) {
277
+ std::unordered_map<uint8_t , uint32_t > frequencies ( 256 ) ;
278
+ for (uint8_t currentChar : input) {
262
279
frequencies[currentChar]++;
263
280
}
264
281
return frequencies;
265
282
}
266
283
267
284
268
- static std::string generateFileHeaderDictionary (const std::unordered_map<char , std::string >& huffmanCodes) {
285
+ static bit_string generateFileHeaderDictionary (const std::unordered_map<uint8_t , bit_string >& huffmanCodes) {
269
286
#define encodedValue first
270
287
#define code second
271
288
272
- std::string dictionary;
289
+ bit_string dictionary;
290
+ dictionary.reserve (huffmanCodes.size ());
273
291
274
292
for (auto & huffmanCode : huffmanCodes) {
275
- dictionary += Converter::int8_ToBitString ( huffmanCode.encodedValue ) ;
293
+ dictionary += huffmanCode.encodedValue ;
276
294
uint8_t currentCodeLength = huffmanCode.code .length ();
277
- dictionary += Converter::int8_ToBitString ( currentCodeLength) ;
295
+ dictionary += currentCodeLength;
278
296
dictionary += huffmanCode.code ;
279
297
}
280
298
@@ -285,27 +303,21 @@ class Huffman {
285
303
}
286
304
287
305
288
- static std::string generateFileHeader (const std::unordered_map<char , std::string >& huffmanCodes) {
289
-
290
- std::string dictionary = generateFileHeaderDictionary (huffmanCodes);
306
+ static bit_string generateFileHeader (const std::unordered_map<uint8_t , bit_string >& huffmanCodes) {
307
+ // TODO: More Optimization
308
+ bit_string dictionary = generateFileHeaderDictionary (huffmanCodes);
291
309
292
- // 2u * BYTE is the size of "fileHeaderSize" itself as it is added to the header
293
- uint16_t fileHeaderSize = 2u * BYTE + dictionary.length ();
310
+ // The size of "fileHeaderSizeInBits" itself as it is added to the header
311
+ uint16_t sizeOfFileHeaderSize = 2u * BYTE;
312
+ uint16_t fileHeaderSizeInBits = sizeOfFileHeaderSize + dictionary.length ();
294
313
295
- // File Header Size in bits (it is 2 Bytes -> max of 8KB header)
296
- std::string fileHeaderSizeInBits = Converter::int16_ToBitString (fileHeaderSize);
297
-
298
- std::string fileHeader = fileHeaderSizeInBits + dictionary;
314
+ bit_string fileHeader = bit_string::from_uint_16 (fileHeaderSizeInBits);
315
+ fileHeader += dictionary;
299
316
return fileHeader;
300
317
}
301
318
302
- static uint8_t getNumberOfExtraBitsInLastByte (const std::string& encodedString) {
303
- int maxLengthFitInBytes = encodedString.length () & (~(BYTE - 1u ));
304
- return (BYTE - (encodedString.length () - maxLengthFitInBytes));
305
- }
306
-
307
319
308
320
};
309
321
310
322
311
- #endif // HUFFMAN_H
323
+ #endif // HUFFMAN_H
0 commit comments