Skip to content

Commit 2cce6a8

Browse files
committed
Adding lz4 decompressor, tuned for mips.
1 parent 45675f1 commit 2cce6a8

File tree

2 files changed

+164
-0
lines changed

2 files changed

+164
-0
lines changed

src/mips/lz4/lz4.c

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/*
2+
3+
MIT License
4+
5+
Copyright (c) 2025 PCSX-Redux authors
6+
7+
Permission is hereby granted, free of charge, to any person obtaining a copy
8+
of this software and associated documentation files (the "Software"), to deal
9+
in the Software without restriction, including without limitation the rights
10+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
copies of the Software, and to permit persons to whom the Software is
12+
furnished to do so, subject to the following conditions:
13+
14+
The above copyright notice and this permission notice shall be included in all
15+
copies or substantial portions of the Software.
16+
17+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
SOFTWARE.
24+
25+
*/
26+
27+
#include <stddef.h>
28+
#include <stdint.h>
29+
30+
// See https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md
31+
// We are not taking too much care about the fault tolerance of the
32+
// decompression code. While the LZ4 format is not very complex, it
33+
// has a few failure modes that we are not handling. This means that
34+
// if the data is corrupted, this will likely result in a crash or an
35+
// infinite loop.
36+
void lz4_decompress_block(const void* source_, const void* sourceEnd_, void* dest_) {
37+
uint8_t* source = (uint8_t*)source_;
38+
uint8_t* sourceEnd = (uint8_t*)sourceEnd_;
39+
uint8_t* dest = (uint8_t*)dest_;
40+
uint8_t token;
41+
int state = 0;
42+
43+
do {
44+
size_t len;
45+
size_t offset;
46+
// Decompression flips flops between copying literal bytes and
47+
// copying back references. The first byte of the block is a
48+
// token that indicates the length of the literal bytes and
49+
// the length of the back reference. We always start with
50+
// copying literal bytes, which is indicated by state == 0.
51+
if (state == 0) {
52+
// The top 4 bits of the token indicate the length of the
53+
// literal bytes.
54+
token = *source++;
55+
len = token >> 4;
56+
} else {
57+
// In the second state, the lower 4 bits of the token
58+
// indicate the length of the back reference. The offset
59+
// is a 16-bit value that is stored in the next two bytes
60+
// of the stream.
61+
offset = source[0] | (source[1] << 8);
62+
source += 2;
63+
len = token & 0x0f;
64+
}
65+
// The length is always stored using a variable-length encoding,
66+
// which is what comes next. If the length is 15, we need to
67+
// read more bytes until we get a value that is not 255.
68+
if (len == 0x0f) {
69+
uint8_t b;
70+
do {
71+
b = *source++;
72+
len += b;
73+
} while (b == 255);
74+
}
75+
uint8_t* ptr;
76+
if (state == 0) {
77+
// When copying literal bytes, just copy the bytes
78+
// directly from the source to the destination.
79+
ptr = source;
80+
} else {
81+
// When copying back references, we need to copy the
82+
// bytes from the destination buffer. Also, the minimum
83+
// length of the back reference is 4 bytes, so we need
84+
// to bump the length by 4.
85+
ptr = dest - offset;
86+
len += 4;
87+
}
88+
// Do the actual copying of the bytes. It doesn't matter
89+
// whether we are copying literal bytes or back references,
90+
// the code is the same.
91+
uint8_t* ptrEnd = ptr + len;
92+
while (ptr != ptrEnd) {
93+
*dest++ = *ptr++;
94+
}
95+
if (state == 0) {
96+
// When copying literal bytes, we need to update the
97+
// source pointer to point to the next byte after the
98+
// literal bytes.
99+
source = ptr;
100+
}
101+
// We flip the state before we go to the next iteration.
102+
state ^= 1;
103+
// The lz4 format doesn't have a way to indicate the end of the
104+
// stream, so we need to check if we are at the end of the
105+
// source buffer.
106+
} while (source < sourceEnd);
107+
}

src/mips/lz4/lz4.h

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
3+
MIT License
4+
5+
Copyright (c) 2025 PCSX-Redux authors
6+
7+
Permission is hereby granted, free of charge, to any person obtaining a copy
8+
of this software and associated documentation files (the "Software"), to deal
9+
in the Software without restriction, including without limitation the rights
10+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
copies of the Software, and to permit persons to whom the Software is
12+
furnished to do so, subject to the following conditions:
13+
14+
The above copyright notice and this permission notice shall be included in all
15+
copies or substantial portions of the Software.
16+
17+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
SOFTWARE.
24+
25+
*/
26+
27+
#pragma once
28+
29+
#include <stddef.h>
30+
#include <stdint.h>
31+
32+
/**
33+
* @brief Decompress a block of lz4 compressed data.
34+
*
35+
* @details This function decompresses a block of lz4 compressed data. The
36+
* source pointer should point to the start of the compressed data, and
37+
* sourceEnd should point to the end of the compressed data. The dest
38+
* pointer should point to a buffer that is large enough to hold the
39+
* decompressed data, as the function does not perform any bounds checking.
40+
* The function can decompress data in place, and the way the compression
41+
* works allows for all of the compressed data to be at the end of the
42+
* same buffer used to store the decompressed data. In this case, the
43+
* whole of the compressed data will be completely overwritten by the
44+
* decompressed data.
45+
*
46+
* @note The function does not check for the validity of the
47+
* compressed data. If the data is corrupted, this will likely result
48+
* in a crash or an infinite loop. The caller is responsible for
49+
* ensuring that the data is valid and that the destination buffer
50+
* is large enough to hold the decompressed data.
51+
*
52+
* @param source The pointer to the start of the compressed data.
53+
* @param sourceEnd The pointer to the end of the compressed data.
54+
* @param dest The pointer to the destination buffer where the
55+
* decompressed data will be stored.
56+
*/
57+
void lz4_decompress_block(const void* source, const void* sourceEnd, void* dest);

0 commit comments

Comments
 (0)