Skip to content

Commit d64f20a

Browse files
committed
Merge remote-tracking branch 'ggerganov/master' into fix_decoding
* ggerganov/master: sync : ggml ggml : support forward pass broadcasting in ggml_sub (ggml/914) metal : fix uninitialized abort_callback (llama/8968) rpc : sanitize tensor data + warnings (llama/0)
2 parents ae4f80c + 22fcd5f commit d64f20a

File tree

4 files changed

+85
-34
lines changed

4 files changed

+85
-34
lines changed

ggml/src/ggml-metal.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
310310
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
311311

312312
// Configure context
313-
struct ggml_backend_metal_context * ctx = malloc(sizeof(struct ggml_backend_metal_context));
313+
struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context));
314314
ctx->device = device;
315315
ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_BUFFERS);
316316
ctx->queue = [ctx->device newCommandQueue];

ggml/src/ggml-rpc.cpp

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,10 @@ static std::shared_ptr<socket_t> create_server_socket(const char * host, int por
197197
fprintf(stderr, "Failed to set SO_REUSEADDR\n");
198198
return nullptr;
199199
}
200+
if (inet_addr(host) == INADDR_NONE) {
201+
fprintf(stderr, "Invalid host address: %s\n", host);
202+
return nullptr;
203+
}
200204
struct sockaddr_in serv_addr;
201205
serv_addr.sin_family = AF_INET;
202206
serv_addr.sin_addr.s_addr = inet_addr(host);
@@ -879,6 +883,14 @@ ggml_tensor * rpc_server::deserialize_tensor(struct ggml_context * ctx, const rp
879883
if (result->buffer && buffers.find(result->buffer) == buffers.end()) {
880884
return nullptr;
881885
}
886+
887+
// require that the tensor data does not go beyond the buffer end
888+
uint64_t tensor_size = (uint64_t) ggml_nbytes(result);
889+
uint64_t buffer_start = (uint64_t) ggml_backend_buffer_get_base(result->buffer);
890+
uint64_t buffer_size = (uint64_t) ggml_backend_buffer_get_size(result->buffer);
891+
GGML_ASSERT(tensor->data + tensor_size >= tensor->data); // check for overflow
892+
GGML_ASSERT(tensor->data >= buffer_start && tensor->data + tensor_size <= buffer_start + buffer_size);
893+
882894
result->op = (ggml_op) tensor->op;
883895
for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) {
884896
result->op_params[i] = tensor->op_params[i];
@@ -898,7 +910,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
898910
const rpc_tensor * in_tensor = (const rpc_tensor *)input.data();
899911
uint64_t offset;
900912
memcpy(&offset, input.data() + sizeof(rpc_tensor), sizeof(offset));
901-
size_t size = input.size() - sizeof(rpc_tensor) - sizeof(offset);
913+
const size_t size = input.size() - sizeof(rpc_tensor) - sizeof(offset);
902914

903915
struct ggml_init_params params {
904916
/*.mem_size =*/ ggml_tensor_overhead(),
@@ -913,6 +925,17 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
913925
return false;
914926
}
915927
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
928+
929+
// sanitize tensor->data
930+
{
931+
const size_t p0 = (size_t) ggml_backend_buffer_get_base(tensor->buffer);
932+
const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
933+
934+
if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
935+
GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
936+
}
937+
}
938+
916939
const void * data = input.data() + sizeof(rpc_tensor) + sizeof(offset);
917940
ggml_backend_tensor_set(tensor, data, offset, size);
918941
ggml_free(ctx);
@@ -943,6 +966,17 @@ bool rpc_server::get_tensor(const std::vector<uint8_t> & input, std::vector<uint
943966
return false;
944967
}
945968
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %" PRIu64 "\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
969+
970+
// sanitize tensor->data
971+
{
972+
const size_t p0 = (size_t) ggml_backend_buffer_get_base(tensor->buffer);
973+
const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
974+
975+
if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
976+
GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
977+
}
978+
}
979+
946980
// output serialization format: | data (size bytes) |
947981
output.resize(size, 0);
948982
ggml_backend_tensor_get(tensor, output.data(), offset, size);

ggml/src/ggml.c

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3724,7 +3724,8 @@ static struct ggml_tensor * ggml_new_tensor_impl(
37243724
struct ggml_tensor * view_src,
37253725
size_t view_offs) {
37263726

3727-
assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
3727+
GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
3728+
GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
37283729

37293730
// find the base tensor and absolute offset
37303731
if (view_src != NULL && view_src->view_src != NULL) {
@@ -4660,11 +4661,13 @@ static struct ggml_tensor * ggml_sub_impl(
46604661
struct ggml_tensor * a,
46614662
struct ggml_tensor * b,
46624663
bool inplace) {
4663-
GGML_ASSERT(ggml_are_same_shape(a, b));
4664+
GGML_ASSERT(ggml_can_repeat(b, a));
46644665

46654666
bool is_node = false;
46664667

46674668
if (!inplace && (a->grad || b->grad)) {
4669+
// TODO: support backward pass for broadcasting
4670+
GGML_ASSERT(ggml_are_same_shape(a, b));
46684671
is_node = true;
46694672
}
46704673

@@ -10103,11 +10106,10 @@ static void ggml_compute_forward_sub_f32(
1010310106
const struct ggml_tensor * src0 = dst->src[0];
1010410107
const struct ggml_tensor * src1 = dst->src[1];
1010510108

10106-
if (params->ith != 0) {
10107-
return;
10108-
}
10109+
assert(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
1010910110

10110-
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
10111+
const int ith = params->ith;
10112+
const int nth = params->nth;
1011110113

1011210114
const int nr = ggml_nrows(src0);
1011310115

@@ -10116,40 +10118,55 @@ static void ggml_compute_forward_sub_f32(
1011610118
GGML_ASSERT( nb0 == sizeof(float));
1011710119
GGML_ASSERT(nb00 == sizeof(float));
1011810120

10121+
// rows per thread
10122+
const int dr = (nr + nth - 1)/nth;
10123+
10124+
// row range for this thread
10125+
const int ir0 = dr*ith;
10126+
const int ir1 = MIN(ir0 + dr, nr);
10127+
1011910128
if (nb10 == sizeof(float)) {
10120-
for (int ir = 0; ir < nr; ++ir) {
10121-
// src0, src1 and dst are same shape => same indices
10122-
const int i3 = ir/(ne2*ne1);
10123-
const int i2 = (ir - i3*ne2*ne1)/ne1;
10124-
const int i1 = (ir - i3*ne2*ne1 - i2*ne1);
10129+
for (int ir = ir0; ir < ir1; ++ir) {
10130+
// src1 is broadcastable across src0 and dst in i1, i2, i3
10131+
const int64_t i03 = ir/(ne02*ne01);
10132+
const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
10133+
const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
1012510134

10135+
const int64_t i13 = i03 % ne13;
10136+
const int64_t i12 = i02 % ne12;
10137+
const int64_t i11 = i01 % ne11;
10138+
const int64_t nr0 = ne00 / ne10;
10139+
10140+
float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
10141+
float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
10142+
float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
10143+
10144+
for (int64_t r = 0; r < nr0; ++r) {
1012610145
#ifdef GGML_USE_ACCELERATE
10127-
vDSP_vsub(
10128-
(float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1,
10129-
(float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1,
10130-
(float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1,
10131-
ne0);
10146+
vDSP_vsub(src1_ptr, 1, src0_ptr + r*ne10, 1, dst_ptr + r*ne10, 1, ne10);
1013210147
#else
10133-
ggml_vec_sub_f32(ne0,
10134-
(float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ),
10135-
(float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01),
10136-
(float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11));
10148+
ggml_vec_sub_f32(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr);
1013710149
#endif
10138-
// }
10139-
// }
10150+
}
1014010151
}
1014110152
} else {
1014210153
// src1 is not contiguous
10143-
for (int ir = 0; ir < nr; ++ir) {
10144-
// src0, src1 and dst are same shape => same indices
10145-
const int i3 = ir/(ne2*ne1);
10146-
const int i2 = (ir - i3*ne2*ne1)/ne1;
10147-
const int i1 = (ir - i3*ne2*ne1 - i2*ne1);
10154+
for (int ir = ir0; ir < ir1; ++ir) {
10155+
// src1 is broadcastable across src0 and dst in i1, i2, i3
10156+
const int64_t i03 = ir/(ne02*ne01);
10157+
const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
10158+
const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
10159+
10160+
const int64_t i13 = i03 % ne13;
10161+
const int64_t i12 = i02 % ne12;
10162+
const int64_t i11 = i01 % ne11;
10163+
10164+
float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
10165+
float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
1014810166

10149-
float * dst_ptr = (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 );
10150-
float * src0_ptr = (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01);
10151-
for (int i0 = 0; i0 < ne0; i0++) {
10152-
float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11 + i0*nb10);
10167+
for (int64_t i0 = 0; i0 < ne0; ++i0) {
10168+
const int64_t i10 = i0 % ne10;
10169+
float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i10*nb10);
1015310170

1015410171
dst_ptr[i0] = src0_ptr[i0] - *src1_ptr;
1015510172
}

scripts/sync-ggml.last

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
a06c68343e9976fdfc80917a958b903a0d7c8cc6
1+
a735a7b5fce27d23c2a6b0b3ccbb47b2c51e83e7

0 commit comments

Comments
 (0)