Skip to content

Commit 415a128

Browse files
committed
Revert "Reapply "CUDA: fix bad asserts for partial offload (ggml-org#13337)""
This reverts commit b25ed015c47f647d6b3d7404217eb7e133d5144a for CUDA. revert on fattn_commun.cuh
1 parent 7ab3b0a commit 415a128

File tree

1 file changed

+0
-2
lines changed

1 file changed

+0
-2
lines changed

ggml/src/ggml-cuda/fattn-common.cuh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -871,7 +871,6 @@ void launch_fattn(
871871
size_t nb23 = V ? V->nb[3] : nb13;
872872

873873
if (need_f16_K && K->type != GGML_TYPE_F16) {
874-
GGML_ASSERT(ggml_is_contiguously_allocated(K));
875874
K_f16.alloc(ggml_nelements(K));
876875
to_fp16_cuda_t to_fp16 = ggml_get_to_fp16_cuda(K->type);
877876
to_fp16(K_data, K_f16.ptr, 1, ggml_nelements(K), main_stream);
@@ -887,7 +886,6 @@ void launch_fattn(
887886

888887
if (V && need_f16_V && V->type != GGML_TYPE_F16) {
889888
// GGML_ASSERT(ggml_is_contiguous(V));
890-
GGML_ASSERT(ggml_is_contiguously_allocated(V));
891889
V_f16.alloc(ggml_nelements(V));
892890
to_fp16_cuda_t to_fp16 = ggml_get_to_fp16_cuda(V->type);
893891
to_fp16(V_data, V_f16.ptr, 1, ggml_nelements(V), main_stream);

0 commit comments

Comments
 (0)