Skip to content

Commit 73b8583

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents 0d7053b + 62d4250 commit 73b8583

File tree

3 files changed

+5
-2
lines changed

3 files changed

+5
-2
lines changed

docs/multimodal.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ NOTE: some models may require large context window, for example: `-c 8192`
6969

7070
# InternVL 2.5 and 3
7171
(tool_name) -hf ggml-org/InternVL2_5-1B-GGUF
72-
(tool_name) -hf ggml-org/InternVL2_5-2B-GGUF
72+
(tool_name) -hf ggml-org/InternVL2_5-4B-GGUF
7373
(tool_name) -hf ggml-org/InternVL3-1B-Instruct-GGUF
7474
(tool_name) -hf ggml-org/InternVL3-2B-Instruct-GGUF
75-
(tool_name) -hf ggml-org/InternVL3-4B-Instruct-GGUF
75+
(tool_name) -hf ggml-org/InternVL3-8B-Instruct-GGUF
7676
(tool_name) -hf ggml-org/InternVL3-14B-Instruct-GGUF
7777
```

ggml/src/ggml-cuda/fattn-mma-f16.cuh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,8 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(
874874
}
875875
}
876876

877+
__syncthreads();
878+
877879
// Write back combined meta data:
878880
#pragma unroll
879881
for (int imeta = 0; imeta < nmeta; ++imeta) {

ggml/src/ggml-cuda/fattn-vec-f16.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ static __global__ void flash_attn_vec_ext_f16(
168168
for (int j = 0; j < ncols; ++j) {
169169
KQ[j*D + tid] = -HALF_MAX_HALF;
170170
}
171+
__syncthreads();
171172

172173
half2 VKQ[ncols] = {{0.0f, 0.0f}};
173174

0 commit comments

Comments
 (0)