@@ -85,28 +85,18 @@ struct ggml_backend_kompute_context {
85
85
// is only created when a device is set and vulkan is explicitly turned on.
86
86
static ggml_backend_kompute_context *s_kompute_context = nullptr ;
87
87
88
- class kompute_manager {
89
- kp::Manager *s_mgr = nullptr ;
90
88
91
- public:
92
- kp::Manager *operator ()() {
93
- if (s_mgr && !s_mgr->hasInstance ()) {
94
- destroy ();
95
- }
96
- if (!s_mgr) {
97
- s_mgr = new kp::Manager;
98
- }
99
- return s_mgr;
100
- }
89
+ struct ggml_backend_kompute_buffer_type_context {
90
+ int device;
91
+ int device_ref = 0 ;
92
+ uint64_t buffer_alignment;
93
+ uint64_t max_alloc;
94
+ std::string name;
101
95
102
- void destroy () {
103
- delete s_mgr;
104
- s_mgr = nullptr ;
105
- }
96
+ ggml_backend_kompute_buffer_type_context (int device, uint64_t buffer_alignment, uint64_t max_alloc)
97
+ : device(device), buffer_alignment(buffer_alignment), max_alloc(max_alloc), name(ggml_kompute_format_name(device)) {}
106
98
};
107
99
108
- static kompute_manager komputeManager;
109
-
110
100
struct ggml_vk_memory {
111
101
void *data = nullptr ;
112
102
size_t size = 0 ;
@@ -120,6 +110,61 @@ struct ggml_backend_kompute_buffer_context {
120
110
struct ggml_vk_memory memory;
121
111
};
122
112
113
+ class kompute_manager {
114
+ public:
115
+ kompute_manager ();
116
+ ~kompute_manager ();
117
+
118
+ kp::Manager *get_kp_manager (void );
119
+ ggml_backend_t create_backend (int device);
120
+ void destroy_backend (ggml_backend_t backend);
121
+ ggml_backend_t get_backend (int device);
122
+
123
+ private:
124
+ // Only for global queries, not for creating devices
125
+ kp::Manager *m_kp_manager;
126
+
127
+ std::vector<ggml_backend_t > m_backends;
128
+ };
129
+
130
+
131
+ static kompute_manager komputeManager;
132
+
133
+
134
+ static ggml_backend_t kompute_backend (int device)
135
+ {
136
+ return komputeManager.get_backend (device);
137
+ }
138
+
139
+ static ggml_backend_t kompute_backend (ggml_backend_buffer_type_t buffer_type)
140
+ {
141
+ auto *buft_ctx = static_cast <ggml_backend_kompute_buffer_type_context *>(buffer_type->context );
142
+ return kompute_backend (buft_ctx->device );
143
+ }
144
+
145
+ static ggml_backend_t kompute_backend (ggml_backend_buffer_t buffer)
146
+ {
147
+ return kompute_backend (buffer->buft );
148
+ }
149
+
150
+ static ggml_backend_kompute_context *kompute_backend_context (int device)
151
+ {
152
+ auto * backend = kompute_backend (device);
153
+ return backend ? static_cast <ggml_backend_kompute_context *>(backend->context ) : nullptr ;
154
+ }
155
+
156
+ static ggml_backend_kompute_context *kompute_backend_context (ggml_backend_buffer_t buffer)
157
+ {
158
+ auto * backend = kompute_backend (buffer);
159
+ return backend ? static_cast <ggml_backend_kompute_context *>(backend->context ) : nullptr ;
160
+ }
161
+
162
+ static ggml_backend_kompute_context *kompute_backend_context (ggml_backend_buffer_type_t buffer_type)
163
+ {
164
+ auto * backend = kompute_backend (buffer_type);
165
+ return backend ? static_cast <ggml_backend_kompute_context *>(backend->context ) : nullptr ;
166
+ }
167
+
123
168
#ifdef __linux__
124
169
__attribute__ ((constructor))
125
170
static void enable_sam() {
@@ -175,12 +220,12 @@ static const char * ggml_vk_getVendorName(uint32_t vendorID) {
175
220
176
221
static std::vector<ggml_vk_device> ggml_vk_available_devices_internal (size_t memoryRequired) {
177
222
std::vector<ggml_vk_device> results;
178
- if (!komputeManager ()->hasVulkan () || !komputeManager ()->hasInstance ())
223
+ if (!komputeManager. get_kp_manager ()->hasVulkan () || !komputeManager. get_kp_manager ()->hasInstance ())
179
224
return results;
180
225
181
226
std::vector<vk::PhysicalDevice> physical_devices;
182
227
try {
183
- physical_devices = komputeManager ()->listDevices ();
228
+ physical_devices = komputeManager. get_kp_manager ()->listDevices ();
184
229
} catch (vk::SystemError & err) {
185
230
std::cerr << __func__ << " : ignoring Vulkan exception: " << err.what () << " \n " ;
186
231
return results;
@@ -338,7 +383,7 @@ bool ggml_vk_get_device(ggml_vk_device * device, size_t memoryRequired, const ch
338
383
}
339
384
340
385
bool ggml_vk_has_vulkan () {
341
- return komputeManager ()->hasVulkan ();
386
+ return komputeManager. get_kp_manager ()->hasVulkan ();
342
387
}
343
388
344
389
static bool ggml_vk_has_device (struct ggml_backend_kompute_context *ctx) {
@@ -1808,16 +1853,6 @@ kp::TensorT<uint8_t>::dataType()
1808
1853
1809
1854
// backend interface
1810
1855
1811
- struct ggml_backend_kompute_buffer_type_context {
1812
- int device;
1813
- int device_ref = 0 ;
1814
- uint64_t buffer_alignment;
1815
- uint64_t max_alloc;
1816
- std::string name;
1817
-
1818
- ggml_backend_kompute_buffer_type_context (int device, uint64_t buffer_alignment, uint64_t max_alloc)
1819
- : device(device), buffer_alignment(buffer_alignment), max_alloc(max_alloc), name(ggml_kompute_format_name(device)) {}
1820
- };
1821
1856
1822
1857
static void ggml_backend_kompute_device_ref (ggml_backend_buffer_type_t buft) {
1823
1858
auto * ctx = static_cast <ggml_backend_kompute_buffer_type_context *>(buft->context );
@@ -1854,8 +1889,9 @@ static const char * ggml_backend_kompute_buffer_get_name(ggml_backend_buffer_t b
1854
1889
1855
1890
static void ggml_backend_kompute_buffer_free_buffer (ggml_backend_buffer_t buffer) {
1856
1891
auto * ctx = static_cast <ggml_backend_kompute_buffer_context *>(buffer->context );
1857
- if (ggml_vk_has_device (s_kompute_context)) {
1858
- ggml_vk_free_memory (s_kompute_context, ctx->memory );
1892
+ auto * backend_ctx = kompute_backend_context (buffer);
1893
+ if (backend_ctx && ggml_vk_has_device (backend_ctx)) {
1894
+ ggml_vk_free_memory (backend_ctx, ctx->memory );
1859
1895
}
1860
1896
delete ctx;
1861
1897
}
@@ -1866,33 +1902,34 @@ static void * ggml_backend_kompute_buffer_get_base(ggml_backend_buffer_t buffer)
1866
1902
}
1867
1903
1868
1904
static void ggml_backend_kompute_buffer_set_tensor (ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
1869
- GGML_UNUSED (buffer);
1905
+ auto * backend_ctx = kompute_backend_context (buffer);
1870
1906
1871
- const auto res = ggml_vk_get_tensor (s_kompute_context , tensor);
1907
+ const auto res = ggml_vk_get_tensor (backend_ctx , tensor);
1872
1908
GGML_ASSERT (res);
1873
1909
1874
1910
memcpy ((char *)tensor->data + offset, data, size);
1875
1911
1876
- s_kompute_context ->manager .sequence ()->eval <kp::OpTensorSyncDevice>({res});
1912
+ backend_ctx ->manager .sequence ()->eval <kp::OpTensorSyncDevice>({res});
1877
1913
}
1878
1914
1879
1915
static void ggml_backend_kompute_buffer_get_tensor (ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
1880
- GGML_UNUSED (buffer);
1916
+ auto * backend_ctx = kompute_backend_context (buffer);
1881
1917
1882
- const auto res = ggml_vk_get_tensor (s_kompute_context , tensor);
1918
+ const auto res = ggml_vk_get_tensor (backend_ctx , tensor);
1883
1919
GGML_ASSERT (res);
1884
1920
1885
- s_kompute_context ->manager .sequence ()->eval <kp::OpTensorSyncLocal>({res});
1921
+ backend_ctx ->manager .sequence ()->eval <kp::OpTensorSyncLocal>({res});
1886
1922
1887
1923
memcpy (data, (const char *)tensor->data + offset, size);
1888
1924
}
1889
1925
1890
1926
static void ggml_backend_kompute_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value) {
1891
1927
auto * ctx = static_cast <ggml_backend_kompute_buffer_context *>(buffer->context );
1928
+ auto * backend_ctx = kompute_backend_context (buffer);
1892
1929
memset (ctx->memory .data , value, ctx->memory .size );
1893
1930
1894
1931
if (ctx->memory .stagingBuffer )
1895
- s_kompute_context ->manager .sequence ()->eval <kp::OpBufferSyncDevice>(ctx->memory .primaryBuffer , ctx->memory .stagingBuffer , ctx->memory .size );
1932
+ backend_ctx ->manager .sequence ()->eval <kp::OpBufferSyncDevice>(ctx->memory .primaryBuffer , ctx->memory .stagingBuffer , ctx->memory .size );
1896
1933
}
1897
1934
1898
1935
static ggml_backend_buffer_i ggml_backend_kompute_buffer_i = {
@@ -1915,9 +1952,9 @@ static const char * ggml_backend_kompute_buffer_type_get_name(ggml_backend_buffe
1915
1952
}
1916
1953
1917
1954
static ggml_backend_buffer_t ggml_backend_kompute_buffer_type_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size) {
1918
- ggml_backend_kompute_device_ref (buft);
1955
+ auto * backend_ctx = kompute_backend_context (buft);
1919
1956
auto * ctx = new ggml_backend_kompute_buffer_context;
1920
- ctx->memory = ggml_vk_allocate (s_kompute_context , size);
1957
+ ctx->memory = ggml_vk_allocate (backend_ctx , size);
1921
1958
return ggml_backend_buffer_init (buft, ggml_backend_kompute_buffer_i, ctx, size);
1922
1959
}
1923
1960
@@ -1941,10 +1978,9 @@ static ggml_backend_buffer_type_i ggml_backend_kompute_buffer_type_interface = {
1941
1978
};
1942
1979
1943
1980
ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type (int device) {
1944
- if (!s_kompute_context)
1945
- s_kompute_context = new ggml_backend_kompute_context (device) ;
1981
+ auto * backend = komputeManager. create_backend (device);
1982
+ auto * buft = &( static_cast < ggml_backend_kompute_context *>(backend-> context ))-> buft ;
1946
1983
1947
- auto * buft = &s_kompute_context->buft ;
1948
1984
if (!buft->context ) {
1949
1985
auto devices = ggml_vk_available_devices_internal (0 );
1950
1986
for (std::size_t i = 0 ; i < devices.size (); i++) {
@@ -1970,15 +2006,7 @@ static const char * ggml_backend_kompute_name(ggml_backend_t backend) {
1970
2006
}
1971
2007
1972
2008
static void ggml_backend_kompute_free (ggml_backend_t backend) {
1973
- auto * ctx = static_cast <ggml_backend_kompute_context *>(backend->context );
1974
-
1975
- assert (ctx == s_kompute_context);
1976
- s_kompute_context = nullptr ;
1977
- if (ctx != nullptr ) {
1978
- delete ctx;
1979
- }
1980
-
1981
- delete backend;
2009
+ komputeManager.destroy_backend (backend);
1982
2010
}
1983
2011
1984
2012
static ggml_backend_buffer_type_t ggml_backend_kompute_get_default_buffer_type (ggml_backend_t backend) {
@@ -2038,17 +2066,94 @@ static ggml_guid_t ggml_backend_kompute_guid() {
2038
2066
return &guid;
2039
2067
}
2040
2068
2041
- ggml_backend_t ggml_backend_kompute_init (int device) {
2042
- if (!s_kompute_context)
2043
- s_kompute_context = new ggml_backend_kompute_context (device);
2044
2069
2045
- ggml_backend_t kompute_backend = new ggml_backend {
2070
+
2071
+ kompute_manager::kompute_manager () : m_backends(GGML_KOMPUTE_MAX_DEVICES, nullptr )
2072
+ {
2073
+ m_kp_manager = nullptr ;
2074
+ }
2075
+
2076
+ kompute_manager::~kompute_manager ()
2077
+ {
2078
+ if (m_kp_manager) {
2079
+ delete m_kp_manager;
2080
+ m_kp_manager = nullptr ;
2081
+ }
2082
+
2083
+ for (std::size_t i = 0 ; i < m_backends.size (); i++) {
2084
+ destroy_backend (m_backends[i]);
2085
+ }
2086
+ }
2087
+
2088
+ kp::Manager * kompute_manager::get_kp_manager (void )
2089
+ {
2090
+ if (!m_kp_manager)
2091
+ m_kp_manager = new kp::Manager;
2092
+
2093
+ return m_kp_manager;
2094
+ }
2095
+
2096
+ ggml_backend_t kompute_manager::create_backend (int device)
2097
+ {
2098
+ if (device < 0 || device >= GGML_KOMPUTE_MAX_DEVICES)
2099
+ return nullptr ;
2100
+
2101
+ // already exist
2102
+ ggml_backend_t backend = get_backend (device);
2103
+ if (backend)
2104
+ return backend;
2105
+
2106
+ // create new one
2107
+ auto *context = new ggml_backend_kompute_context (device);
2108
+ context->manager .initializeDevice (device, {},
2109
+ {
2110
+ " VK_KHR_shader_float16_int8" ,
2111
+ " VK_KHR_8bit_storage" ,
2112
+ " VK_KHR_16bit_storage" ,
2113
+ " VK_KHR_shader_non_semantic_info"
2114
+ });
2115
+
2116
+ backend = new ggml_backend {
2046
2117
/* .guid = */ ggml_backend_kompute_guid (),
2047
2118
/* .interface = */ kompute_backend_i,
2048
- /* .context = */ s_kompute_context ,
2119
+ /* .context = */ context ,
2049
2120
};
2050
2121
2051
- return kompute_backend;
2122
+ m_backends[device] = backend;
2123
+
2124
+ std::cerr << " Kompute: Init device " << device << std::endl;
2125
+
2126
+ return backend;
2127
+ }
2128
+
2129
+ void kompute_manager::destroy_backend (ggml_backend_t backend)
2130
+ {
2131
+ if (!backend)
2132
+ return ;
2133
+
2134
+ for (std::size_t i = 0 ; i < m_backends.size (); i++) {
2135
+ if (backend == m_backends[i]) {
2136
+ auto *context = static_cast <ggml_backend_kompute_context *>(backend->context );
2137
+ delete context;
2138
+ delete backend;
2139
+ m_backends[i] = nullptr ;
2140
+ break ;
2141
+ }
2142
+ }
2143
+ }
2144
+
2145
+ ggml_backend_t kompute_manager::get_backend (int device)
2146
+ {
2147
+ if (device >= 0 && static_cast <std::size_t >(device) < m_backends.size ())
2148
+ return m_backends[device];
2149
+
2150
+ return nullptr ;
2151
+ }
2152
+
2153
+
2154
+
2155
+ ggml_backend_t ggml_backend_kompute_init (int device) {
2156
+ return komputeManager.create_backend (device);
2052
2157
}
2053
2158
2054
2159
bool ggml_backend_is_kompute (ggml_backend_t backend) {
0 commit comments