Skip to content

Commit 806ed43

Browse files
committed
Adding archive manager.
1 parent 6eef468 commit 806ed43

File tree

4 files changed

+941
-0
lines changed

4 files changed

+941
-0
lines changed

src/mips/psyqo-paths/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ TYPE = library
55

66
SRCS = \
77
src/cdrom-loader.cpp \
8+
src/archive-manager.cpp \
9+
../ucl-demo/n2e-d.S \
810

911
EXTRA_DEPS += $(PSYQOPATHSDIR)Makefile
1012

Lines changed: 390 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,390 @@
1+
/*
2+
3+
MIT License
4+
5+
Copyright (c) 2025 PCSX-Redux authors
6+
7+
Permission is hereby granted, free of charge, to any person obtaining a copy
8+
of this software and associated documentation files (the "Software"), to deal
9+
in the Software without restriction, including without limitation the rights
10+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
copies of the Software, and to permit persons to whom the Software is
12+
furnished to do so, subject to the following conditions:
13+
14+
The above copyright notice and this permission notice shall be included in all
15+
copies or substantial portions of the Software.
16+
17+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
SOFTWARE.
24+
25+
*/
26+
27+
#pragma once
28+
29+
#include <EASTL/array.h>
30+
#include <EASTL/functional.h>
31+
#include <EASTL/string_view.h>
32+
#include <stdint.h>
33+
34+
#include <coroutine>
35+
36+
#include "common/util/bitfield.hh"
37+
#include "common/util/djbhash.h"
38+
#include "psyqo/buffer.hh"
39+
#include "psyqo/iso9660-parser.hh"
40+
#include "psyqo/task.hh"
41+
#include "psyqo/utility-polyfill.h"
42+
43+
namespace psyqo::paths {
44+
45+
/**
46+
* @brief This class manages the reading and decompression of files from an archive.
47+
*
48+
* @details The ArchiveManager class is a helper class that manages the reading and
49+
* decompression of files from an archive. The archive format is specified in the
50+
* mkarchive.lua tool available in the tools directory, and this is where the reader
51+
* can find rationales and details on the format itself. The archive is a collection
52+
* of files that are compressed using different compression methods, and is designed
53+
* to be used specifically with the PlayStation 1. Parsing the iso9660 filesystem is
54+
* an expensive operation, so using a single archive for all files will speed up
55+
* loading times, as the archive index is kept in memory in a compact and efficient
56+
* format.
57+
*
58+
* If multiple archives are used, it is reasonable to create and destroy the
59+
* ArchiveManager object multiple times, or to have multiple ArchiveManager objects.
60+
* The latter is recommended, as it allows for caching of the index in memory, and
61+
* allows for faster loading times.
62+
*/
63+
class ArchiveManager {
64+
struct InitAwaiterWithFilename {
65+
InitAwaiterWithFilename(eastl::string_view name, ISO9660Parser &parser, ArchiveManager &manager)
66+
: m_name(name), m_parser(parser), m_manager(manager) {}
67+
bool await_ready() const { return false; }
68+
template <typename U>
69+
void await_suspend(std::coroutine_handle<U> handle) {
70+
m_manager.initialize(m_name, m_parser, [handle, this](bool success) {
71+
m_success = success;
72+
handle.resume();
73+
});
74+
}
75+
bool await_resume() { return m_success; }
76+
77+
private:
78+
eastl::string_view m_name;
79+
ISO9660Parser &m_parser;
80+
ArchiveManager &m_manager;
81+
bool m_success;
82+
};
83+
struct InitAwaiter {
84+
InitAwaiter(uint32_t LBA, CDRom &device, ArchiveManager &manager)
85+
: m_LBA(LBA), m_device(device), m_manager(manager) {}
86+
bool await_ready() const { return false; }
87+
template <typename U>
88+
void await_suspend(std::coroutine_handle<U> handle) {
89+
m_manager.initialize(m_LBA, m_device, [handle, this](bool success) {
90+
m_success = success;
91+
handle.resume();
92+
});
93+
}
94+
bool await_resume() { return m_success; }
95+
96+
private:
97+
uint32_t m_LBA;
98+
CDRom &m_device;
99+
ArchiveManager &m_manager;
100+
bool m_success;
101+
};
102+
103+
public:
104+
union IndexEntry;
105+
106+
private:
107+
struct ReadFileAwaiter {
108+
ReadFileAwaiter(const IndexEntry *entry, CDRom &device, ArchiveManager &manager)
109+
: m_entry(entry), m_device(device), m_manager(manager) {}
110+
constexpr bool await_ready() const { return false; }
111+
template <typename U>
112+
void await_suspend(std::coroutine_handle<U> handle) {
113+
m_manager.readFile(m_entry, m_device, [handle, this](Buffer<uint8_t> &&data) {
114+
m_data = eastl::move(data);
115+
handle.resume();
116+
});
117+
}
118+
Buffer<uint8_t> await_resume() { return eastl::move(m_data); }
119+
120+
private:
121+
const IndexEntry *m_entry;
122+
CDRom &m_device;
123+
ArchiveManager &m_manager;
124+
Buffer<uint8_t> m_data;
125+
};
126+
127+
public:
128+
/**
129+
* @brief The IndexEntry struct represents an entry in the archive index.
130+
*
131+
* @details The IndexEntry struct contains information about a file in the
132+
* archive, including its hash, decompressed size, padding size, sector offset,
133+
* compressed size, and compression method. While technically used by the
134+
* archive manager itself, the user can also use this struct to access the
135+
* information about the file in the archive and make decisions based on it.
136+
*/
137+
union IndexEntry {
138+
enum class Method : uint32_t {
139+
NONE = 0,
140+
UCL_NRV2E = 1,
141+
LZ4 = 2,
142+
COUNT = 3,
143+
};
144+
typedef Utilities::BitSpan<uint32_t, 21> DecompSizeField;
145+
typedef Utilities::BitSpan<uint32_t, 11> PaddingField;
146+
typedef Utilities::BitSpan<uint32_t, 19> SectorOffsetField;
147+
typedef Utilities::BitSpan<uint32_t, 10> CompressedSizeField;
148+
typedef Utilities::BitSpan<Method, 3> MethodField;
149+
typedef Utilities::BitField<DecompSizeField, PaddingField, SectorOffsetField, CompressedSizeField, MethodField>
150+
CompressedEntry;
151+
// Return the decompressed size of the file in bytes.
152+
uint32_t getDecompSize() const { return entry.get<DecompSizeField>(); }
153+
// Return the padding size in bytes. This is only relevant for
154+
// compressed files. The padding is used at the beginning of the
155+
// compressed data to align it to a 2048 byte boundary, and
156+
// allows in-place decompression of the data.
157+
uint32_t getPadding() const { return entry.get<PaddingField>(); }
158+
// Return the offset from the beginning of the archive to the compressed data in sectors.
159+
// This includes the index sectors at the beginning of the archive.
160+
uint32_t getSectorOffset() const { return entry.get<SectorOffsetField>(); }
161+
// Return the size of the compressed data in sectors. For uncompressed
162+
// data, this is the same as the size of the data in bytes, just rounded
163+
// up to the next 2048 byte boundary.
164+
uint32_t getCompressedSize() const { return entry.get<CompressedSizeField>(); }
165+
// Return the compression method used to compress the data.
166+
Method getCompressionMethod() const { return entry.get<MethodField>(); }
167+
uint32_t asArray[4];
168+
struct {
169+
uint64_t hash;
170+
CompressedEntry entry;
171+
};
172+
};
173+
174+
/**
175+
* @brief Asynchronous initialization of the archive manager.
176+
*
177+
* @details This function initializes the archive manager asynchronously.
178+
* There are two overloads of this function. The first one takes a filename
179+
* and an ISO9660Parser object, and the second one takes a LBA and a CDRom
180+
* object. The first overload is when the user wants the system to find the
181+
* archive in the ISO9660 filesystem, while the second one is when the user
182+
* already knows the LBA of the archive. Note that using exclusively the
183+
* second overload means the iso9660 filesystem parsing code will not be
184+
* used, which is a further reduction in the final binary's code footprint.
185+
*/
186+
void initialize(eastl::string_view archiveName, ISO9660Parser &parser, eastl::function<void(bool)> &&callback) {
187+
setupInitQueue(archiveName, parser, eastl::move(callback));
188+
m_queueInitFilename.run();
189+
}
190+
psyqo::TaskQueue::Task scheduleInitialize(eastl::string_view archiveName, ISO9660Parser &parser) {
191+
setupInitQueue(archiveName, parser, {});
192+
return m_queueInitFilename.schedule();
193+
}
194+
InitAwaiterWithFilename initialize(eastl::string_view archiveName, ISO9660Parser &parser) {
195+
return {archiveName, parser, *this};
196+
}
197+
void initialize(uint32_t LBA, CDRom &device, eastl::function<void(bool)> &&callback) {
198+
setupInitQueue(LBA, device, eastl::move(callback));
199+
m_queue.run();
200+
}
201+
psyqo::TaskQueue::Task scheduleInitialize(uint32_t LBA, CDRom &device) {
202+
setupInitQueue(LBA, device, {});
203+
return m_queue.schedule();
204+
}
205+
InitAwaiter initialize(uint32_t LBA, CDRom &device) { return {LBA, device, *this}; }
206+
207+
/**
208+
* @brief Get the First IndexEntry object.
209+
*
210+
* @details This function returns a pointer to the first IndexEntry object
211+
* in the index. In case the user has used a custom hashing mechanism for
212+
* locating the files, this function becomes relevant in order to do any
213+
* sort of custom search over the index. If the archive manager was not
214+
* initialized, or failed to initialize, this function will return nullptr.
215+
*
216+
* @return IndexEntry* Pointer to the first IndexEntry object in the index.
217+
*/
218+
const IndexEntry *getFirstIndexEntry() const {
219+
if (m_index.size() == 0) {
220+
return nullptr;
221+
}
222+
return &m_index[1];
223+
}
224+
225+
/**
226+
* @brief Get the number of entries in the index.
227+
*
228+
* @details This function returns the number of entries in the index.
229+
* Calling this function before the archive manager is initialized
230+
* successfully is undefined behavior.
231+
*
232+
* @return uint32_t The number of entries in the index.
233+
*/
234+
uint32_t getIndexCount() const { return m_index[0].asArray[2]; }
235+
236+
/**
237+
* @brief Get the IndexEntry object for a given path.
238+
*
239+
* @details This function returns a pointer to the IndexEntry object
240+
* corresponding to the given path. The path is hashed using the djb2
241+
* hash function, and the resulting hash is used to look up the entry
242+
* in the index using a binary search. If the archive manager was not
243+
* initialized, failed to initialize, or the path was not found in the
244+
* index, this function will return nullptr.
245+
*
246+
* @param path The path to look up.
247+
* @return IndexEntry* Pointer to the IndexEntry object corresponding to the path.
248+
*/
249+
const IndexEntry *getIndexEntry(eastl::string_view path) const;
250+
template <unsigned S>
251+
const IndexEntry *getIndexEntry(const char (&path)[S]) const {
252+
return getIndexEntry(djb::hash<uint64_t>(path));
253+
}
254+
const IndexEntry *getIndexEntry(uint64_t hash) const;
255+
256+
/**
257+
* @brief Get the LBA of the first sector of the file in the archive.
258+
*
259+
* @param entry The IndexEntry object for the file.
260+
* @return uint32_t The LBA of the first sector of the file in the archive.
261+
*/
262+
uint32_t getIndexEntrySectorStart(const IndexEntry *entry) const {
263+
return m_archiveDirentry.LBA + entry->getSectorOffset();
264+
}
265+
266+
/**
267+
* @brief Set the Buffer object for the next read operation.
268+
*
269+
* @details This function sets the buffer to be used for the next read
270+
* operation. By default, the archive manager will allocate a buffer of the
271+
* appropriate size for the file being read. However, if the user wants to
272+
* use an already allocated buffer, they can use this function to set the buffer
273+
* to be used.
274+
*
275+
* @param buffer The buffer to be used for the next read operation.
276+
*/
277+
void setBuffer(Buffer<uint8_t> &&buffer) { m_data = eastl::move(buffer); }
278+
279+
/**
280+
* @brief Read a file from the archive.
281+
*
282+
* @details This function reads a file from the archive. The file may
283+
* be specified by its path, hash, or IndexEntry object. The callback
284+
* will be called with the data of the file, or an empty buffer if the
285+
* file could not be read. Note that the template variants of this
286+
* function should be guaranteed to be computing the hash of the
287+
* string at compile time.
288+
*/
289+
template <unsigned S>
290+
void readFile(const char (&path)[S], CDRom &device, eastl::function<void(Buffer<uint8_t> &&)> &&callback) {
291+
setupQueue(getIndexEntry(path), device, eastl::move(callback));
292+
m_queue.run();
293+
}
294+
void readFile(eastl::string_view path, CDRom &device, eastl::function<void(Buffer<uint8_t> &&)> &&callback) {
295+
setupQueue(getIndexEntry(path), device, eastl::move(callback));
296+
m_queue.run();
297+
}
298+
void readFile(uint64_t hash, CDRom &device, eastl::function<void(Buffer<uint8_t> &&)> &&callback) {
299+
setupQueue(getIndexEntry(hash), device, eastl::move(callback));
300+
m_queue.run();
301+
}
302+
void readFile(const IndexEntry *entry, CDRom &device, eastl::function<void(Buffer<uint8_t> &&)> &&callback) {
303+
setupQueue(entry, device, eastl::move(callback));
304+
m_queue.run();
305+
}
306+
template <unsigned S>
307+
psyqo::TaskQueue::Task scheduleReadFile(const char (&path)[S], CDRom &device) {
308+
setupQueue(getIndexEntry(path), device, {});
309+
return m_queue.schedule();
310+
}
311+
psyqo::TaskQueue::Task scheduleReadFile(eastl::string_view path, CDRom &device) {
312+
setupQueue(getIndexEntry(path), device, {});
313+
return m_queue.schedule();
314+
}
315+
psyqo::TaskQueue::Task scheduleReadFile(uint64_t hash, CDRom &device) {
316+
setupQueue(getIndexEntry(hash), device, {});
317+
return m_queue.schedule();
318+
}
319+
psyqo::TaskQueue::Task scheduleReadFile(const IndexEntry *entry, CDRom &device) {
320+
setupQueue(entry, device, {});
321+
return m_queue.schedule();
322+
}
323+
template <unsigned S>
324+
ReadFileAwaiter readFile(const char (&path)[S], CDRom &device) {
325+
return {getIndexEntry(path), device, *this};
326+
}
327+
ReadFileAwaiter readFile(eastl::string_view path, CDRom &device) { return {getIndexEntry(path), device, *this}; }
328+
ReadFileAwaiter readFile(uint64_t hash, CDRom &device) { return {getIndexEntry(hash), device, *this}; }
329+
ReadFileAwaiter readFile(const IndexEntry *entry, CDRom &device) { return {entry, device, *this}; }
330+
331+
/**
332+
* @brief Register a decompressor for a specific compression method.
333+
*
334+
* @details In the spirit of paying for what you use, these functions allow
335+
* the user to register a decompressor for a specific compression method.
336+
* For instance, if the user knows that the archive will only contain
337+
* LZ4 compressed files, they can register the LZ4 decompressor and the
338+
* UCL_NRV2E decompressor will not be registered. This will reduce the
339+
* final binary size of the program, as the decompressor code will not
340+
* be included in the final binary. The UCL_NRV2E decompressor takes
341+
* 340 bytes of code, while the LZ4 decompressor takes 200 bytes of code.
342+
* It is also reasonable to not register any decompressors at all, if the
343+
* user is sure that the archive will not contain any compressed files.
344+
*/
345+
static void registerUCL_NRV2EDecompressor() {
346+
s_decompressors[toUnderlying(IndexEntry::Method::UCL_NRV2E)] = &ArchiveManager::decompressUCL_NRV2E;
347+
}
348+
static void registerLZ4Decompressor() {
349+
s_decompressors[toUnderlying(IndexEntry::Method::LZ4)] = &ArchiveManager::decompressLZ4;
350+
}
351+
352+
/**
353+
* @brief Register all decompressors.
354+
*
355+
* @details This function registers all decompressors. This is in the case
356+
* the user doesn't know which decompressors will be used, or if the user wants
357+
* to use all decompressors. This will increase the final binary size of the
358+
* program, as all decompressor code will be included in the final binary.
359+
*/
360+
static void registerAllDecompressors() {
361+
registerUCL_NRV2EDecompressor();
362+
registerLZ4Decompressor();
363+
}
364+
365+
private:
366+
eastl::function<void(bool)> m_initCallback;
367+
eastl::function<void(Buffer<uint8_t> &&)> m_callback;
368+
psyqo::TaskQueue m_queueInitFilename;
369+
psyqo::TaskQueue m_queue;
370+
Buffer<uint8_t> m_data;
371+
Buffer<IndexEntry> m_index;
372+
ISO9660Parser::DirEntry m_archiveDirentry;
373+
CDRom::ReadRequest m_request;
374+
bool m_pending = false;
375+
bool m_success = false;
376+
377+
void setupInitQueue(eastl::string_view archiveName, ISO9660Parser &parser, eastl::function<void(bool)> &&callback);
378+
void setupInitQueue(uint32_t LBA, CDRom &device, eastl::function<void(bool)> &&callback);
379+
void setupQueue(const IndexEntry *entry, CDRom &device, eastl::function<void(Buffer<uint8_t> &&)> &&callback);
380+
uint32_t getIndexSectorCount() const {
381+
static_assert(sizeof(IndexEntry) == 16, "IndexEntry size is not 16 bytes");
382+
uint32_t indexSize = (getIndexCount() + 1) * sizeof(IndexEntry);
383+
return (indexSize + 2047) / 2048;
384+
}
385+
static eastl::array<void (ArchiveManager::*)(const IndexEntry *), toUnderlying(IndexEntry::Method::COUNT)> s_decompressors;
386+
void decompressUCL_NRV2E(const IndexEntry *entry);
387+
void decompressLZ4(const IndexEntry *entry);
388+
};
389+
390+
} // namespace psyqo::paths

0 commit comments

Comments
 (0)