Skip to content

Commit a3f2520

Browse files
committed
add rpc_msg_set_tensor_hash_req
1 parent 06711b0 commit a3f2520

File tree

1 file changed

+27
-33
lines changed

1 file changed

+27
-33
lines changed

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 27 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,12 @@ struct rpc_msg_buffer_clear_req {
151151
uint8_t value;
152152
};
153153

154+
struct rpc_msg_set_tensor_hash_req {
155+
rpc_tensor tensor;
156+
uint64_t offset;
157+
uint64_t hash;
158+
};
159+
154160
struct rpc_msg_set_tensor_hash_rsp {
155161
uint8_t result;
156162
};
@@ -534,15 +540,12 @@ static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggm
534540
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
535541
rpc_tensor rpc_tensor = serialize_tensor(tensor);
536542
if (size > HASH_THRESHOLD) {
537-
// input serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes)
538-
size_t input_size = sizeof(rpc_tensor) + sizeof(uint64_t) + sizeof(uint64_t);
539-
std::vector<uint8_t> input(input_size, 0);
540-
uint64_t hash = fnv_hash((const uint8_t*)data, size);
541-
memcpy(input.data(), &rpc_tensor, sizeof(rpc_tensor));
542-
memcpy(input.data() + sizeof(rpc_tensor), &offset, sizeof(offset));
543-
memcpy(input.data() + sizeof(rpc_tensor) + sizeof(offset), &hash, sizeof(hash));
543+
rpc_msg_set_tensor_hash_req request;
544+
request.tensor = serialize_tensor(tensor);
545+
request.offset = offset;
546+
request.hash = fnv_hash((const uint8_t*)data, size);
544547
rpc_msg_set_tensor_hash_rsp response;
545-
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR_HASH, input.data(), input.size(), &response, sizeof(response));
548+
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR_HASH, &request, sizeof(request), &response, sizeof(response));
546549
GGML_ASSERT(status);
547550
if (response.result) {
548551
// the server has the same data, no need to send it
@@ -590,15 +593,12 @@ static bool ggml_backend_rpc_buffer_cpy_tensor(ggml_backend_buffer_t buffer, con
590593

591594
bool ggml_backend_rpc_buffer_load_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, size_t offset, uint64_t hash) {
592595
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
593-
rpc_tensor rpc_tensor = serialize_tensor(tensor);
594-
// input serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes)
595-
size_t input_size = sizeof(rpc_tensor) + sizeof(uint64_t) + sizeof(uint64_t);
596-
std::vector<uint8_t> input(input_size, 0);
597-
memcpy(input.data(), &rpc_tensor, sizeof(rpc_tensor));
598-
memcpy(input.data() + sizeof(rpc_tensor), &offset, sizeof(offset));
599-
memcpy(input.data() + sizeof(rpc_tensor) + sizeof(offset), &hash, sizeof(hash));
596+
rpc_msg_set_tensor_hash_req request;
597+
request.tensor = serialize_tensor(tensor);
598+
request.offset = offset;
599+
request.hash = hash;
600600
rpc_msg_set_tensor_hash_rsp response;
601-
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR_HASH, input.data(), input.size(), &response, sizeof(response));
601+
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR_HASH, &request, sizeof(request), &response, sizeof(response));
602602
GGML_ASSERT(status);
603603
return response.result;
604604
}
@@ -865,7 +865,7 @@ class rpc_server {
865865
bool free_buffer(const rpc_msg_free_buffer_req & request);
866866
bool buffer_clear(const rpc_msg_buffer_clear_req & request);
867867
bool set_tensor(const std::vector<uint8_t> & input);
868-
bool set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set_tensor_hash_rsp & response);
868+
bool set_tensor_hash(const rpc_msg_set_tensor_hash_req & request, rpc_msg_set_tensor_hash_rsp & response);
869869
bool get_tensor(const rpc_msg_get_tensor_req & request, std::vector<uint8_t> & response);
870870
bool copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_copy_tensor_rsp & response);
871871
bool graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph_compute_rsp & response);
@@ -1087,18 +1087,10 @@ bool rpc_server::get_cached_file(uint64_t hash, std::vector<uint8_t> & data) {
10871087
return true;
10881088
}
10891089

1090-
bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set_tensor_hash_rsp & response)
1090+
bool rpc_server::set_tensor_hash(const rpc_msg_set_tensor_hash_req & request, rpc_msg_set_tensor_hash_rsp & response)
10911091
{
1092-
// serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes) |
1093-
if (input.size() != sizeof(rpc_tensor) + 16) {
1094-
return false;
1095-
}
1096-
const rpc_tensor * in_tensor = (const rpc_tensor *)input.data();
1097-
uint64_t offset;
1098-
memcpy(&offset, input.data() + sizeof(rpc_tensor), sizeof(offset));
1099-
const uint64_t * hash = (const uint64_t *)(input.data() + sizeof(rpc_tensor) + sizeof(offset));
11001092
std::vector<uint8_t> cached_file;
1101-
if (!get_cached_file(*hash, cached_file)) {
1093+
if (!get_cached_file(request.hash, cached_file)) {
11021094
response.result = 0;
11031095
return true;
11041096
}
@@ -1111,7 +1103,7 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
11111103
ggml_context_ptr ctx_ptr { ggml_init(params) };
11121104
GGML_ASSERT(ctx_ptr != nullptr);
11131105
ggml_context * ctx = ctx_ptr.get();
1114-
ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
1106+
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
11151107
if (tensor == nullptr) {
11161108
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
11171109
return false;
@@ -1123,11 +1115,13 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
11231115
const size_t p0 = (size_t) ggml_backend_buffer_get_base(tensor->buffer);
11241116
const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
11251117

1126-
if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
1118+
if (request.tensor.data + request.offset < p0
1119+
|| request.tensor.data + request.offset >= p1
1120+
|| size > (p1 - request.tensor.data - request.offset)) {
11271121
GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
11281122
}
11291123
}
1130-
ggml_backend_tensor_set(tensor, cached_file.data(), offset, size);
1124+
ggml_backend_tensor_set(tensor, cached_file.data(), request.offset, size);
11311125
response.result = 1;
11321126
return true;
11331127
}
@@ -1449,12 +1443,12 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
14491443
break;
14501444
}
14511445
case RPC_CMD_SET_TENSOR_HASH: {
1452-
std::vector<uint8_t> input;
1453-
if (!recv_msg(sockfd, input)) {
1446+
rpc_msg_set_tensor_hash_req request;
1447+
if (!recv_msg(sockfd, &request, sizeof(request))) {
14541448
return;
14551449
}
14561450
rpc_msg_set_tensor_hash_rsp response;
1457-
if (!server.set_tensor_hash(input, response)) {
1451+
if (!server.set_tensor_hash(request, response)) {
14581452
return;
14591453
}
14601454
if (!send_msg(sockfd, &response, sizeof(response))) {

0 commit comments

Comments
 (0)