@@ -151,6 +151,12 @@ struct rpc_msg_buffer_clear_req {
151
151
uint8_t value;
152
152
};
153
153
154
+ struct rpc_msg_set_tensor_hash_req {
155
+ rpc_tensor tensor;
156
+ uint64_t offset;
157
+ uint64_t hash;
158
+ };
159
+
154
160
struct rpc_msg_set_tensor_hash_rsp {
155
161
uint8_t result;
156
162
};
@@ -543,15 +549,12 @@ static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggm
543
549
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context ;
544
550
rpc_tensor rpc_tensor = serialize_tensor (tensor);
545
551
if (size > HASH_THRESHOLD) {
546
- // input serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes)
547
- size_t input_size = sizeof (rpc_tensor) + sizeof (uint64_t ) + sizeof (uint64_t );
548
- std::vector<uint8_t > input (input_size, 0 );
549
- uint64_t hash = fnv_hash ((const uint8_t *)data, size);
550
- memcpy (input.data (), &rpc_tensor, sizeof (rpc_tensor));
551
- memcpy (input.data () + sizeof (rpc_tensor), &offset, sizeof (offset));
552
- memcpy (input.data () + sizeof (rpc_tensor) + sizeof (offset), &hash, sizeof (hash));
552
+ rpc_msg_set_tensor_hash_req request;
553
+ request.tensor = serialize_tensor (tensor);
554
+ request.offset = offset;
555
+ request.hash = fnv_hash ((const uint8_t *)data, size);
553
556
rpc_msg_set_tensor_hash_rsp response;
554
- bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, input. data (), input. size ( ), &response, sizeof (response));
557
+ bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, &request, sizeof (request ), &response, sizeof (response));
555
558
GGML_ASSERT (status);
556
559
if (response.result ) {
557
560
// the server has the same data, no need to send it
@@ -599,15 +602,12 @@ static bool ggml_backend_rpc_buffer_cpy_tensor(ggml_backend_buffer_t buffer, con
599
602
600
603
bool ggml_backend_rpc_buffer_load_tensor (ggml_backend_buffer_t buffer, ggml_tensor * tensor, size_t offset, uint64_t hash) {
601
604
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context ;
602
- rpc_tensor rpc_tensor = serialize_tensor (tensor);
603
- // input serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes)
604
- size_t input_size = sizeof (rpc_tensor) + sizeof (uint64_t ) + sizeof (uint64_t );
605
- std::vector<uint8_t > input (input_size, 0 );
606
- memcpy (input.data (), &rpc_tensor, sizeof (rpc_tensor));
607
- memcpy (input.data () + sizeof (rpc_tensor), &offset, sizeof (offset));
608
- memcpy (input.data () + sizeof (rpc_tensor) + sizeof (offset), &hash, sizeof (hash));
605
+ rpc_msg_set_tensor_hash_req request;
606
+ request.tensor = serialize_tensor (tensor);
607
+ request.offset = offset;
608
+ request.hash = hash;
609
609
rpc_msg_set_tensor_hash_rsp response;
610
- bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, input. data (), input. size ( ), &response, sizeof (response));
610
+ bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, &request, sizeof (request ), &response, sizeof (response));
611
611
GGML_ASSERT (status);
612
612
return response.result ;
613
613
}
@@ -874,7 +874,7 @@ class rpc_server {
874
874
bool free_buffer (const rpc_msg_free_buffer_req & request);
875
875
bool buffer_clear (const rpc_msg_buffer_clear_req & request);
876
876
bool set_tensor (const std::vector<uint8_t > & input);
877
- bool set_tensor_hash (const std::vector< uint8_t > & input , rpc_msg_set_tensor_hash_rsp & response);
877
+ bool set_tensor_hash (const rpc_msg_set_tensor_hash_req & request , rpc_msg_set_tensor_hash_rsp & response);
878
878
bool get_tensor (const rpc_msg_get_tensor_req & request, std::vector<uint8_t > & response);
879
879
bool copy_tensor (const rpc_msg_copy_tensor_req & request, rpc_msg_copy_tensor_rsp & response);
880
880
bool graph_compute (const std::vector<uint8_t > & input, rpc_msg_graph_compute_rsp & response);
@@ -1111,18 +1111,10 @@ bool rpc_server::get_cached_file(uint64_t hash, std::vector<uint8_t> & data) {
1111
1111
return true ;
1112
1112
}
1113
1113
1114
- bool rpc_server::set_tensor_hash (const std::vector< uint8_t > & input , rpc_msg_set_tensor_hash_rsp & response)
1114
+ bool rpc_server::set_tensor_hash (const rpc_msg_set_tensor_hash_req & request , rpc_msg_set_tensor_hash_rsp & response)
1115
1115
{
1116
- // serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes) |
1117
- if (input.size () != sizeof (rpc_tensor) + 16 ) {
1118
- return false ;
1119
- }
1120
- const rpc_tensor * in_tensor = (const rpc_tensor *)input.data ();
1121
- uint64_t offset;
1122
- memcpy (&offset, input.data () + sizeof (rpc_tensor), sizeof (offset));
1123
- const uint64_t * hash = (const uint64_t *)(input.data () + sizeof (rpc_tensor) + sizeof (offset));
1124
1116
std::vector<uint8_t > cached_file;
1125
- if (!get_cached_file (* hash, cached_file)) {
1117
+ if (!get_cached_file (request. hash , cached_file)) {
1126
1118
response.result = 0 ;
1127
1119
return true ;
1128
1120
}
@@ -1135,7 +1127,7 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
1135
1127
ggml_context_ptr ctx_ptr { ggml_init (params) };
1136
1128
GGML_ASSERT (ctx_ptr != nullptr );
1137
1129
ggml_context * ctx = ctx_ptr.get ();
1138
- ggml_tensor * tensor = deserialize_tensor (ctx, in_tensor );
1130
+ ggml_tensor * tensor = deserialize_tensor (ctx, &request. tensor );
1139
1131
if (tensor == nullptr ) {
1140
1132
GGML_LOG_ERROR (" [%s] error deserializing tensor\n " , __func__);
1141
1133
return false ;
@@ -1147,13 +1139,15 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
1147
1139
const size_t p0 = (size_t ) ggml_backend_buffer_get_base (tensor->buffer );
1148
1140
const size_t p1 = p0 + ggml_backend_buffer_get_size (tensor->buffer );
1149
1141
1150
- if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
1142
+ if (request.tensor .data + request.offset < p0
1143
+ || request.tensor .data + request.offset >= p1
1144
+ || size > (p1 - request.tensor .data - request.offset )) {
1151
1145
GGML_LOG_ERROR (" [%s] tensor data region (data=0x%" PRIx64 " , offset=%" PRIu64 " , size=%zu, hash=0x%" PRIx64 " ) out of buffer bounds [0x%zx, 0x%zx)\n " ,
1152
1146
__func__, in_tensor->data , offset, size, *hash, p0, p1);
1153
1147
return false ;
1154
1148
}
1155
1149
}
1156
- ggml_backend_tensor_set (tensor, cached_file.data (), offset, size);
1150
+ ggml_backend_tensor_set (tensor, cached_file.data (), request. offset , size);
1157
1151
response.result = 1 ;
1158
1152
return true ;
1159
1153
}
@@ -1513,12 +1507,12 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
1513
1507
break ;
1514
1508
}
1515
1509
case RPC_CMD_SET_TENSOR_HASH: {
1516
- std::vector< uint8_t > input ;
1517
- if (!recv_msg (sockfd, input )) {
1510
+ rpc_msg_set_tensor_hash_req request ;
1511
+ if (!recv_msg (sockfd, &request, sizeof (request) )) {
1518
1512
return ;
1519
1513
}
1520
1514
rpc_msg_set_tensor_hash_rsp response;
1521
- if (!server.set_tensor_hash (input , response)) {
1515
+ if (!server.set_tensor_hash (request , response)) {
1522
1516
return ;
1523
1517
}
1524
1518
if (!send_msg (sockfd, &response, sizeof (response))) {
0 commit comments