Merge branch 'ggml-org:master' into master

Silver267 · web-flow · commit 73b858323842 · 2025-05-10T21:27:21.000-04:00
diff --git a/docs/multimodal.md b/docs/multimodal.md
@@ -69,9 +69,9 @@ NOTE: some models may require large context window, for example: `-c 8192`
 
 # InternVL 2.5 and 3
 (tool_name) -hf ggml-org/InternVL2_5-1B-GGUF
-(tool_name) -hf ggml-org/InternVL2_5-2B-GGUF
+(tool_name) -hf ggml-org/InternVL2_5-4B-GGUF
 (tool_name) -hf ggml-org/InternVL3-1B-Instruct-GGUF
 (tool_name) -hf ggml-org/InternVL3-2B-Instruct-GGUF
-(tool_name) -hf ggml-org/InternVL3-4B-Instruct-GGUF
+(tool_name) -hf ggml-org/InternVL3-8B-Instruct-GGUF
 (tool_name) -hf ggml-org/InternVL3-14B-Instruct-GGUF
 ```
diff --git a/ggml/src/ggml-cuda/fattn-mma-f16.cuh b/ggml/src/ggml-cuda/fattn-mma-f16.cuh
@@ -874,6 +874,8 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(
             }
         }
 
+        __syncthreads();
+
         // Write back combined meta data:
 #pragma unroll
         for (int imeta = 0; imeta < nmeta; ++imeta) {
diff --git a/ggml/src/ggml-cuda/fattn-vec-f16.cuh b/ggml/src/ggml-cuda/fattn-vec-f16.cuh
@@ -168,6 +168,7 @@ static __global__ void flash_attn_vec_ext_f16(
     for (int j = 0; j < ncols; ++j) {
         KQ[j*D + tid] = -HALF_MAX_HALF;
     }
+    __syncthreads();
 
     half2 VKQ[ncols] = {{0.0f, 0.0f}};
 

Original file line number	Diff line number	Diff line change
`@@ -874,6 +874,8 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(`
`874`	`874`	`}`
`875`	`875`	`}`
`876`	`876`
	`877`	`+ __syncthreads();`
	`878`	`+`
`877`	`879`	`// Write back combined meta data:`
`878`	`880`	`#pragma unroll`
`879`	`881`	`for (int imeta = 0; imeta < nmeta; ++imeta) {`
Original file line number	Diff line number	Diff line change
`@@ -168,6 +168,7 @@ static __global__ void flash_attn_vec_ext_f16(`
`168`	`168`	`for (int j = 0; j < ncols; ++j) {`
`169`	`169`	`KQ[j*D + tid] = -HALF_MAX_HALF;`
`170`	`170`	`}`
	`171`	`+ __syncthreads();`
`171`	`172`
`172`	`173`	`half2 VKQ[ncols] = {{0.0f, 0.0f}};`
`173`	`174`