@@ -151,6 +151,12 @@ struct rpc_msg_buffer_clear_req {
151
151
uint8_t value;
152
152
};
153
153
154
+ struct rpc_msg_set_tensor_hash_req {
155
+ rpc_tensor tensor;
156
+ uint64_t offset;
157
+ uint64_t hash;
158
+ };
159
+
154
160
struct rpc_msg_set_tensor_hash_rsp {
155
161
uint8_t result;
156
162
};
@@ -534,15 +540,12 @@ static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggm
534
540
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context ;
535
541
rpc_tensor rpc_tensor = serialize_tensor (tensor);
536
542
if (size > HASH_THRESHOLD) {
537
- // input serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes)
538
- size_t input_size = sizeof (rpc_tensor) + sizeof (uint64_t ) + sizeof (uint64_t );
539
- std::vector<uint8_t > input (input_size, 0 );
540
- uint64_t hash = fnv_hash ((const uint8_t *)data, size);
541
- memcpy (input.data (), &rpc_tensor, sizeof (rpc_tensor));
542
- memcpy (input.data () + sizeof (rpc_tensor), &offset, sizeof (offset));
543
- memcpy (input.data () + sizeof (rpc_tensor) + sizeof (offset), &hash, sizeof (hash));
543
+ rpc_msg_set_tensor_hash_req request;
544
+ request.tensor = serialize_tensor (tensor);
545
+ request.offset = offset;
546
+ request.hash = fnv_hash ((const uint8_t *)data, size);
544
547
rpc_msg_set_tensor_hash_rsp response;
545
- bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, input. data (), input. size ( ), &response, sizeof (response));
548
+ bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, &request, sizeof (request ), &response, sizeof (response));
546
549
GGML_ASSERT (status);
547
550
if (response.result ) {
548
551
// the server has the same data, no need to send it
@@ -590,15 +593,12 @@ static bool ggml_backend_rpc_buffer_cpy_tensor(ggml_backend_buffer_t buffer, con
590
593
591
594
bool ggml_backend_rpc_buffer_load_tensor (ggml_backend_buffer_t buffer, ggml_tensor * tensor, size_t offset, uint64_t hash) {
592
595
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context ;
593
- rpc_tensor rpc_tensor = serialize_tensor (tensor);
594
- // input serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes)
595
- size_t input_size = sizeof (rpc_tensor) + sizeof (uint64_t ) + sizeof (uint64_t );
596
- std::vector<uint8_t > input (input_size, 0 );
597
- memcpy (input.data (), &rpc_tensor, sizeof (rpc_tensor));
598
- memcpy (input.data () + sizeof (rpc_tensor), &offset, sizeof (offset));
599
- memcpy (input.data () + sizeof (rpc_tensor) + sizeof (offset), &hash, sizeof (hash));
596
+ rpc_msg_set_tensor_hash_req request;
597
+ request.tensor = serialize_tensor (tensor);
598
+ request.offset = offset;
599
+ request.hash = hash;
600
600
rpc_msg_set_tensor_hash_rsp response;
601
- bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, input. data (), input. size ( ), &response, sizeof (response));
601
+ bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, &request, sizeof (request ), &response, sizeof (response));
602
602
GGML_ASSERT (status);
603
603
return response.result ;
604
604
}
@@ -865,7 +865,7 @@ class rpc_server {
865
865
bool free_buffer (const rpc_msg_free_buffer_req & request);
866
866
bool buffer_clear (const rpc_msg_buffer_clear_req & request);
867
867
bool set_tensor (const std::vector<uint8_t > & input);
868
- bool set_tensor_hash (const std::vector< uint8_t > & input , rpc_msg_set_tensor_hash_rsp & response);
868
+ bool set_tensor_hash (const rpc_msg_set_tensor_hash_req & request , rpc_msg_set_tensor_hash_rsp & response);
869
869
bool get_tensor (const rpc_msg_get_tensor_req & request, std::vector<uint8_t > & response);
870
870
bool copy_tensor (const rpc_msg_copy_tensor_req & request, rpc_msg_copy_tensor_rsp & response);
871
871
bool graph_compute (const std::vector<uint8_t > & input, rpc_msg_graph_compute_rsp & response);
@@ -1087,18 +1087,10 @@ bool rpc_server::get_cached_file(uint64_t hash, std::vector<uint8_t> & data) {
1087
1087
return true ;
1088
1088
}
1089
1089
1090
- bool rpc_server::set_tensor_hash (const std::vector< uint8_t > & input , rpc_msg_set_tensor_hash_rsp & response)
1090
+ bool rpc_server::set_tensor_hash (const rpc_msg_set_tensor_hash_req & request , rpc_msg_set_tensor_hash_rsp & response)
1091
1091
{
1092
- // serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes) |
1093
- if (input.size () != sizeof (rpc_tensor) + 16 ) {
1094
- return false ;
1095
- }
1096
- const rpc_tensor * in_tensor = (const rpc_tensor *)input.data ();
1097
- uint64_t offset;
1098
- memcpy (&offset, input.data () + sizeof (rpc_tensor), sizeof (offset));
1099
- const uint64_t * hash = (const uint64_t *)(input.data () + sizeof (rpc_tensor) + sizeof (offset));
1100
1092
std::vector<uint8_t > cached_file;
1101
- if (!get_cached_file (* hash, cached_file)) {
1093
+ if (!get_cached_file (request. hash , cached_file)) {
1102
1094
response.result = 0 ;
1103
1095
return true ;
1104
1096
}
@@ -1111,7 +1103,7 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
1111
1103
ggml_context_ptr ctx_ptr { ggml_init (params) };
1112
1104
GGML_ASSERT (ctx_ptr != nullptr );
1113
1105
ggml_context * ctx = ctx_ptr.get ();
1114
- ggml_tensor * tensor = deserialize_tensor (ctx, in_tensor );
1106
+ ggml_tensor * tensor = deserialize_tensor (ctx, &request. tensor );
1115
1107
if (tensor == nullptr ) {
1116
1108
GGML_LOG_ERROR (" [%s] error deserializing tensor\n " , __func__);
1117
1109
return false ;
@@ -1123,11 +1115,13 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
1123
1115
const size_t p0 = (size_t ) ggml_backend_buffer_get_base (tensor->buffer );
1124
1116
const size_t p1 = p0 + ggml_backend_buffer_get_size (tensor->buffer );
1125
1117
1126
- if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
1118
+ if (request.tensor .data + request.offset < p0
1119
+ || request.tensor .data + request.offset >= p1
1120
+ || size > (p1 - request.tensor .data - request.offset )) {
1127
1121
GGML_ABORT (" [%s] tensor->data out of bounds\n " , __func__);
1128
1122
}
1129
1123
}
1130
- ggml_backend_tensor_set (tensor, cached_file.data (), offset, size);
1124
+ ggml_backend_tensor_set (tensor, cached_file.data (), request. offset , size);
1131
1125
response.result = 1 ;
1132
1126
return true ;
1133
1127
}
@@ -1449,12 +1443,12 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
1449
1443
break ;
1450
1444
}
1451
1445
case RPC_CMD_SET_TENSOR_HASH: {
1452
- std::vector< uint8_t > input ;
1453
- if (!recv_msg (sockfd, input )) {
1446
+ rpc_msg_set_tensor_hash_req request ;
1447
+ if (!recv_msg (sockfd, &request, sizeof (request) )) {
1454
1448
return ;
1455
1449
}
1456
1450
rpc_msg_set_tensor_hash_rsp response;
1457
- if (!server.set_tensor_hash (input , response)) {
1451
+ if (!server.set_tensor_hash (request , response)) {
1458
1452
return ;
1459
1453
}
1460
1454
if (!send_msg (sockfd, &response, sizeof (response))) {
0 commit comments