Skip to content

Commit f57f8cb

Browse files
Cong Liuflynnjiang
authored andcommitted
ggml/kompute: Reimplement kompute_manager
Signed-off-by: Cong Liu <liucong@kylinos.cn>
1 parent 3676778 commit f57f8cb

File tree

2 files changed

+167
-60
lines changed

2 files changed

+167
-60
lines changed

ggml/include/ggml-kompute.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
extern "C" {
1212
#endif
1313

14+
#define GGML_KOMPUTE_MAX_DEVICES 16
15+
1416
struct ggml_vk_device {
1517
int index;
1618
int type; // same as VkPhysicalDeviceType

ggml/src/ggml-kompute.cpp

Lines changed: 165 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -85,28 +85,18 @@ struct ggml_backend_kompute_context {
8585
// is only created when a device is set and vulkan is explicitly turned on.
8686
static ggml_backend_kompute_context *s_kompute_context = nullptr;
8787

88-
class kompute_manager {
89-
kp::Manager *s_mgr = nullptr;
9088

91-
public:
92-
kp::Manager *operator()() {
93-
if (s_mgr && !s_mgr->hasInstance()) {
94-
destroy();
95-
}
96-
if (!s_mgr) {
97-
s_mgr = new kp::Manager;
98-
}
99-
return s_mgr;
100-
}
89+
struct ggml_backend_kompute_buffer_type_context {
90+
int device;
91+
int device_ref = 0;
92+
uint64_t buffer_alignment;
93+
uint64_t max_alloc;
94+
std::string name;
10195

102-
void destroy() {
103-
delete s_mgr;
104-
s_mgr = nullptr;
105-
}
96+
ggml_backend_kompute_buffer_type_context(int device, uint64_t buffer_alignment, uint64_t max_alloc)
97+
: device(device), buffer_alignment(buffer_alignment), max_alloc(max_alloc), name(ggml_kompute_format_name(device)) {}
10698
};
10799

108-
static kompute_manager komputeManager;
109-
110100
struct ggml_vk_memory {
111101
void *data = nullptr;
112102
size_t size = 0;
@@ -120,6 +110,61 @@ struct ggml_backend_kompute_buffer_context {
120110
struct ggml_vk_memory memory;
121111
};
122112

113+
class kompute_manager {
114+
public:
115+
kompute_manager();
116+
~kompute_manager();
117+
118+
kp::Manager *get_kp_manager(void);
119+
ggml_backend_t create_backend(int device);
120+
void destroy_backend(ggml_backend_t backend);
121+
ggml_backend_t get_backend(int device);
122+
123+
private:
124+
// Only for global queries, not for creating devices
125+
kp::Manager *m_kp_manager;
126+
127+
std::vector<ggml_backend_t> m_backends;
128+
};
129+
130+
131+
static kompute_manager komputeManager;
132+
133+
134+
static ggml_backend_t kompute_backend(int device)
135+
{
136+
return komputeManager.get_backend(device);
137+
}
138+
139+
static ggml_backend_t kompute_backend(ggml_backend_buffer_type_t buffer_type)
140+
{
141+
auto *buft_ctx = static_cast<ggml_backend_kompute_buffer_type_context *>(buffer_type->context);
142+
return kompute_backend(buft_ctx->device);
143+
}
144+
145+
static ggml_backend_t kompute_backend(ggml_backend_buffer_t buffer)
146+
{
147+
return kompute_backend(buffer->buft);
148+
}
149+
150+
static ggml_backend_kompute_context *kompute_backend_context(int device)
151+
{
152+
auto * backend = kompute_backend(device);
153+
return backend ? static_cast<ggml_backend_kompute_context *>(backend->context) : nullptr;
154+
}
155+
156+
static ggml_backend_kompute_context *kompute_backend_context(ggml_backend_buffer_t buffer)
157+
{
158+
auto * backend = kompute_backend(buffer);
159+
return backend ? static_cast<ggml_backend_kompute_context *>(backend->context) : nullptr;
160+
}
161+
162+
static ggml_backend_kompute_context *kompute_backend_context(ggml_backend_buffer_type_t buffer_type)
163+
{
164+
auto * backend = kompute_backend(buffer_type);
165+
return backend ? static_cast<ggml_backend_kompute_context *>(backend->context) : nullptr;
166+
}
167+
123168
#ifdef __linux__
124169
__attribute__((constructor))
125170
static void enable_sam() {
@@ -175,12 +220,12 @@ static const char * ggml_vk_getVendorName(uint32_t vendorID) {
175220

176221
static std::vector<ggml_vk_device> ggml_vk_available_devices_internal(size_t memoryRequired) {
177222
std::vector<ggml_vk_device> results;
178-
if (!komputeManager()->hasVulkan() || !komputeManager()->hasInstance())
223+
if (!komputeManager.get_kp_manager()->hasVulkan() || !komputeManager.get_kp_manager()->hasInstance())
179224
return results;
180225

181226
std::vector<vk::PhysicalDevice> physical_devices;
182227
try {
183-
physical_devices = komputeManager()->listDevices();
228+
physical_devices = komputeManager.get_kp_manager()->listDevices();
184229
} catch (vk::SystemError & err) {
185230
std::cerr << __func__ << ": ignoring Vulkan exception: " << err.what() << "\n";
186231
return results;
@@ -338,7 +383,7 @@ bool ggml_vk_get_device(ggml_vk_device * device, size_t memoryRequired, const ch
338383
}
339384

340385
bool ggml_vk_has_vulkan() {
341-
return komputeManager()->hasVulkan();
386+
return komputeManager.get_kp_manager()->hasVulkan();
342387
}
343388

344389
static bool ggml_vk_has_device(struct ggml_backend_kompute_context *ctx) {
@@ -1808,16 +1853,6 @@ kp::TensorT<uint8_t>::dataType()
18081853

18091854
// backend interface
18101855

1811-
struct ggml_backend_kompute_buffer_type_context {
1812-
int device;
1813-
int device_ref = 0;
1814-
uint64_t buffer_alignment;
1815-
uint64_t max_alloc;
1816-
std::string name;
1817-
1818-
ggml_backend_kompute_buffer_type_context(int device, uint64_t buffer_alignment, uint64_t max_alloc)
1819-
: device(device), buffer_alignment(buffer_alignment), max_alloc(max_alloc), name(ggml_kompute_format_name(device)) {}
1820-
};
18211856

18221857
static void ggml_backend_kompute_device_ref(ggml_backend_buffer_type_t buft) {
18231858
auto * ctx = static_cast<ggml_backend_kompute_buffer_type_context *>(buft->context);
@@ -1854,8 +1889,9 @@ static const char * ggml_backend_kompute_buffer_get_name(ggml_backend_buffer_t b
18541889

18551890
static void ggml_backend_kompute_buffer_free_buffer(ggml_backend_buffer_t buffer) {
18561891
auto * ctx = static_cast<ggml_backend_kompute_buffer_context *>(buffer->context);
1857-
if (ggml_vk_has_device(s_kompute_context)) {
1858-
ggml_vk_free_memory(s_kompute_context, ctx->memory);
1892+
auto * backend_ctx = kompute_backend_context(buffer);
1893+
if (backend_ctx && ggml_vk_has_device(backend_ctx)) {
1894+
ggml_vk_free_memory(backend_ctx, ctx->memory);
18591895
}
18601896
delete ctx;
18611897
}
@@ -1866,33 +1902,34 @@ static void * ggml_backend_kompute_buffer_get_base(ggml_backend_buffer_t buffer)
18661902
}
18671903

18681904
static void ggml_backend_kompute_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
1869-
GGML_UNUSED(buffer);
1905+
auto * backend_ctx = kompute_backend_context(buffer);
18701906

1871-
const auto res = ggml_vk_get_tensor(s_kompute_context, tensor);
1907+
const auto res = ggml_vk_get_tensor(backend_ctx, tensor);
18721908
GGML_ASSERT(res);
18731909

18741910
memcpy((char *)tensor->data + offset, data, size);
18751911

1876-
s_kompute_context->manager.sequence()->eval<kp::OpTensorSyncDevice>({res});
1912+
backend_ctx->manager.sequence()->eval<kp::OpTensorSyncDevice>({res});
18771913
}
18781914

18791915
static void ggml_backend_kompute_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
1880-
GGML_UNUSED(buffer);
1916+
auto * backend_ctx = kompute_backend_context(buffer);
18811917

1882-
const auto res = ggml_vk_get_tensor(s_kompute_context, tensor);
1918+
const auto res = ggml_vk_get_tensor(backend_ctx, tensor);
18831919
GGML_ASSERT(res);
18841920

1885-
s_kompute_context->manager.sequence()->eval<kp::OpTensorSyncLocal>({res});
1921+
backend_ctx->manager.sequence()->eval<kp::OpTensorSyncLocal>({res});
18861922

18871923
memcpy(data, (const char *)tensor->data + offset, size);
18881924
}
18891925

18901926
static void ggml_backend_kompute_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
18911927
auto * ctx = static_cast<ggml_backend_kompute_buffer_context *>(buffer->context);
1928+
auto * backend_ctx = kompute_backend_context(buffer);
18921929
memset(ctx->memory.data, value, ctx->memory.size);
18931930

18941931
if (ctx->memory.stagingBuffer)
1895-
s_kompute_context->manager.sequence()->eval<kp::OpBufferSyncDevice>(ctx->memory.primaryBuffer, ctx->memory.stagingBuffer, ctx->memory.size);
1932+
backend_ctx->manager.sequence()->eval<kp::OpBufferSyncDevice>(ctx->memory.primaryBuffer, ctx->memory.stagingBuffer, ctx->memory.size);
18961933
}
18971934

18981935
static ggml_backend_buffer_i ggml_backend_kompute_buffer_i = {
@@ -1915,9 +1952,9 @@ static const char * ggml_backend_kompute_buffer_type_get_name(ggml_backend_buffe
19151952
}
19161953

19171954
static ggml_backend_buffer_t ggml_backend_kompute_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
1918-
ggml_backend_kompute_device_ref(buft);
1955+
auto * backend_ctx = kompute_backend_context(buft);
19191956
auto * ctx = new ggml_backend_kompute_buffer_context;
1920-
ctx->memory = ggml_vk_allocate(s_kompute_context, size);
1957+
ctx->memory = ggml_vk_allocate(backend_ctx, size);
19211958
return ggml_backend_buffer_init(buft, ggml_backend_kompute_buffer_i, ctx, size);
19221959
}
19231960

@@ -1941,10 +1978,9 @@ static ggml_backend_buffer_type_i ggml_backend_kompute_buffer_type_interface = {
19411978
};
19421979

19431980
ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device) {
1944-
if (!s_kompute_context)
1945-
s_kompute_context = new ggml_backend_kompute_context(device);
1981+
auto * backend = komputeManager.create_backend(device);
1982+
auto * buft = &(static_cast<ggml_backend_kompute_context *>(backend->context))->buft;
19461983

1947-
auto * buft = &s_kompute_context->buft;
19481984
if (!buft->context) {
19491985
auto devices = ggml_vk_available_devices_internal(0);
19501986
for (std::size_t i = 0; i < devices.size(); i++) {
@@ -1970,15 +2006,7 @@ static const char * ggml_backend_kompute_name(ggml_backend_t backend) {
19702006
}
19712007

19722008
static void ggml_backend_kompute_free(ggml_backend_t backend) {
1973-
auto * ctx = static_cast<ggml_backend_kompute_context *>(backend->context);
1974-
1975-
assert(ctx == s_kompute_context);
1976-
s_kompute_context = nullptr;
1977-
if (ctx != nullptr) {
1978-
delete ctx;
1979-
}
1980-
1981-
delete backend;
2009+
komputeManager.destroy_backend(backend);
19822010
}
19832011

19842012
static ggml_backend_buffer_type_t ggml_backend_kompute_get_default_buffer_type(ggml_backend_t backend) {
@@ -2038,17 +2066,94 @@ static ggml_guid_t ggml_backend_kompute_guid() {
20382066
return &guid;
20392067
}
20402068

2041-
ggml_backend_t ggml_backend_kompute_init(int device) {
2042-
if (!s_kompute_context)
2043-
s_kompute_context = new ggml_backend_kompute_context(device);
20442069

2045-
ggml_backend_t kompute_backend = new ggml_backend {
2070+
2071+
kompute_manager::kompute_manager() : m_backends(GGML_KOMPUTE_MAX_DEVICES, nullptr)
2072+
{
2073+
m_kp_manager = nullptr;
2074+
}
2075+
2076+
kompute_manager::~kompute_manager()
2077+
{
2078+
if (m_kp_manager) {
2079+
delete m_kp_manager;
2080+
m_kp_manager = nullptr;
2081+
}
2082+
2083+
for (std::size_t i = 0; i < m_backends.size(); i++) {
2084+
destroy_backend(m_backends[i]);
2085+
}
2086+
}
2087+
2088+
kp::Manager * kompute_manager::get_kp_manager(void)
2089+
{
2090+
if (!m_kp_manager)
2091+
m_kp_manager = new kp::Manager;
2092+
2093+
return m_kp_manager;
2094+
}
2095+
2096+
ggml_backend_t kompute_manager::create_backend(int device)
2097+
{
2098+
if (device < 0 || device >= GGML_KOMPUTE_MAX_DEVICES)
2099+
return nullptr;
2100+
2101+
// already exist
2102+
ggml_backend_t backend = get_backend(device);
2103+
if (backend)
2104+
return backend;
2105+
2106+
// create new one
2107+
auto *context = new ggml_backend_kompute_context(device);
2108+
context->manager.initializeDevice(device, {},
2109+
{
2110+
"VK_KHR_shader_float16_int8",
2111+
"VK_KHR_8bit_storage",
2112+
"VK_KHR_16bit_storage",
2113+
"VK_KHR_shader_non_semantic_info"
2114+
});
2115+
2116+
backend = new ggml_backend {
20462117
/* .guid = */ ggml_backend_kompute_guid(),
20472118
/* .interface = */ kompute_backend_i,
2048-
/* .context = */ s_kompute_context,
2119+
/* .context = */ context,
20492120
};
20502121

2051-
return kompute_backend;
2122+
m_backends[device] = backend;
2123+
2124+
std::cerr << "Kompute: Init device " << device << std::endl;
2125+
2126+
return backend;
2127+
}
2128+
2129+
void kompute_manager::destroy_backend(ggml_backend_t backend)
2130+
{
2131+
if (!backend)
2132+
return;
2133+
2134+
for (std::size_t i = 0; i < m_backends.size(); i++) {
2135+
if (backend == m_backends[i]) {
2136+
auto *context = static_cast<ggml_backend_kompute_context *>(backend->context);
2137+
delete context;
2138+
delete backend;
2139+
m_backends[i] = nullptr;
2140+
break;
2141+
}
2142+
}
2143+
}
2144+
2145+
ggml_backend_t kompute_manager::get_backend(int device)
2146+
{
2147+
if (device >= 0 && static_cast<std::size_t>(device) < m_backends.size())
2148+
return m_backends[device];
2149+
2150+
return nullptr;
2151+
}
2152+
2153+
2154+
2155+
ggml_backend_t ggml_backend_kompute_init(int device) {
2156+
return komputeManager.create_backend(device);
20522157
}
20532158

20542159
bool ggml_backend_is_kompute(ggml_backend_t backend) {

0 commit comments

Comments
 (0)