Skip to content

Commit a247762

Browse files
committed
Latest commits and fixes to Makefile
1 parent b31548a commit a247762

38 files changed

+24382
-17237
lines changed

Makefile

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ ARCH = -march=native -mtune=native
102102
TMP = o/
103103
endif
104104

105+
ARCH_F = x86
106+
105107
#the main folder for groups of sources
106108
base = base_sampling2
107109

@@ -120,6 +122,7 @@ ggmlsrc_f_h = $(ggmlsrc_f)/include
120122
ggmlsrc_f_s = $(ggmlsrc_f)/src
121123
# backends
122124
ggmlsrc_cpu_f = $(ggmlsrc_f_s)/ggml-cpu
125+
ggmlsrc_cpu_86_f = $(ggmlsrc_cpu_f)/arch/$(ARCH_F)
123126
ggmlsrc_blas_f = $(ggmlsrc_f_s)/ggml-blas
124127
ggmlsrc_vulkan_f = $(ggmlsrc_f_s)/ggml-vulkan
125128
# llama
@@ -178,7 +181,7 @@ OBJS += $(TMP)tinyfiledialogs/tinyfiledialogs.o
178181
# vpath=$(ggmlsrc_f):$(llamacpp_f):$(common_f)
179182

180183
FILE_D = -Itinyfiledialogs
181-
I_GGUF = -I$(common_f) -I$(ggmlsrc_f_h) -I$(ggmlsrc_f_s) -I$(ggmlsrc_cpu_f) -I$(ggmlsrc_blas_f) -I$(ggmlsrc_vulkan_f) -I$(llamacpp_f_s) -I$(llamacpp_f_h) -I$(uibackend_f) -I$(include_f)
184+
I_GGUF = -I$(common_f) -I$(ggmlsrc_f_h) -I$(ggmlsrc_f_s) -I$(ggmlsrc_cpu_f) -I$(ggmlsrc_cpu_86_f) -I$(ggmlsrc_blas_f) -I$(ggmlsrc_vulkan_f) -I$(llamacpp_f_s) -I$(llamacpp_f_h) -I$(uibackend_f) -I$(include_f)
182185
I_GGUF_PRE = -I. -Ipre_backend -Iinclude
183186
I_GGML = -Iggml -Iinclude
184187

@@ -419,8 +422,7 @@ HEADERS_GGUF_BASE = \
419422
$(ggmlsrc_f_s)/ggml-backend-impl.h \
420423
$(ggmlsrc_f_s)/ggml-quants.h \
421424
$(ggmlsrc_f_s)/ggml-threading.h \
422-
$(ggmlsrc_cpu_f)/ggml-cpu-aarch64.h \
423-
$(ggmlsrc_cpu_f)/ggml-cpu-hbm.h \
425+
$(ggmlsrc_cpu_f)/hbm.h \
424426
$(ggmlsrc_cpu_f)/ggml-cpu-impl.h \
425427
$(ggmlsrc_cpu_f)/ggml-cpu-quants.h \
426428
$(ggmlsrc_cpu_f)/ggml-cpu-traits.h \
@@ -446,10 +448,13 @@ OBJS_GGUF_CPU = \
446448
$(OBJS_GGUF_BASE) \
447449
$(TMP)$(PREFIX)_ggml-cpu.o \
448450
$(TMP)$(PREFIX)_ggml-cpu_cpp.o \
449-
$(TMP)$(PREFIX)_ggml-cpu-aarch64.o \
450-
$(TMP)$(PREFIX)_ggml-cpu-hbm.o \
451-
$(TMP)$(PREFIX)_ggml-cpu-quants.o \
452-
$(TMP)$(PREFIX)_ggml-cpu-traits.o \
451+
$(TMP)$(PREFIX)_repack.o \
452+
$(TMP)$(PREFIX)_hbm.o \
453+
$(TMP)$(PREFIX)_quants.o \
454+
$(TMP)$(PREFIX)_$(ARCH_F)_repack.o \
455+
$(TMP)$(PREFIX)_$(ARCH_F)_quants.o \
456+
$(TMP)$(PREFIX)_$(ARCH_F)_cpu-feats.o \
457+
$(TMP)$(PREFIX)_traits.o \
453458
$(TMP)$(PREFIX)_common.o \
454459
$(TMP)$(PREFIX)_binary-ops.o \
455460
$(TMP)$(PREFIX)_unary-ops.o \
@@ -461,7 +466,7 @@ ifdef DYNAMIC
461466
PREFIX = dyn_$(PREFIX_BASE)
462467
OBJS_GGUF = $(OBJS_GGUF_BASE)
463468
else
464-
CXXFLAGS += -DGGML_USE_CPU
469+
CXXFLAGS += -DGGML_USE_CPU -DGGML_USE_CPU_REPACK
465470
PREFIX = stt_$(PREFIX_BASE)
466471
OBJS_GGUF = $(OBJS_GGUF_CPU)
467472
endif
@@ -554,33 +559,51 @@ endif
554559
# inherited from llama.cpp, usually no custom changes
555560
$(TMP)$(PREFIX)_%.o: $(ggmlsrc_f_s)/%.c
556561
$(CC) $(CFLAGS) -MMD -c $< -o $@
562+
@echo
557563

558564
$(TMP)$(PREFIX)_%.o: $(ggmlsrc_f_s)/%.cpp
559565
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
566+
@echo
560567

561568
$(TMP)$(PREFIX)_%.o: $(ggmlsrc_f_s)/ggml-cpu/%.c
562569
$(CC) $(CFLAGS) -MMD -c $< -o $@
570+
@echo
571+
572+
$(TMP)$(PREFIX)_$(ARCH_F)_%.o: $(ggmlsrc_f_s)/ggml-cpu/arch/$(ARCH_F)/%.c
573+
$(CC) $(CFLAGS) -MMD -c $< -o $@
574+
@echo
563575

564576
$(TMP)$(PREFIX)_%.o: $(ggmlsrc_f_s)/ggml-cpu/%.cpp
565577
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
578+
@echo
579+
580+
$(TMP)$(PREFIX)_$(ARCH_F)_%.o: $(ggmlsrc_f_s)/ggml-cpu/arch/$(ARCH_F)/%.cpp
581+
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
582+
@echo
566583

567584
$(TMP)$(PREFIX)_%.o: $(ggmlsrc_f_s)/ggml-cpu/llamafile/%.cpp
568585
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
586+
@echo
569587

570588
$(TMP)$(PREFIX)_%.o: $(ggmlsrc_f_s)/ggml-blas/%.cpp
571589
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
590+
@echo
572591

573592
$(TMP)$(PREFIX)_%.o: $(ggmlsrc_f_s)/ggml-vulkan/%.cpp
574593
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
594+
@echo
575595

576596
$(TMP)$(PREFIX)_%_cpp.o: $(ggmlsrc_f_s)/%.cpp
577597
$(CC) $(CXXFLAGS) $(LDFLAGS) -MMD -c $< -o $@
598+
@echo
578599

579600
$(TMP)$(PREFIX)_%_cpp.o: $(ggmlsrc_f_s)/ggml-cpu/%.cpp
580601
$(CC) $(CXXFLAGS) $(LDFLAGS) -MMD -c $< -o $@
602+
@echo
581603

582604
$(TMP)$(PREFIX)_%.o: $(llamacpp_f_s)/%.cpp
583605
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
606+
@echo
584607

585608
# customized part
586609
COMMON_H_DEPS = $(common_f)/common.h $(common_f)/sampling.h $(common_f)/llama-addon.h $(llamacpp_f_h)/llama.h

base_sampling2/chat_layer.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -611,8 +611,9 @@ class chat
611611

612612
bool logit_bias_check_beginning(std::string_view token_str) {
613613
for (auto word : params.sparams.logit_bias_strings_beginning) {
614+
int len_min = (word.length() > 5 ? 3 : 2);
614615
if ((token_str.find(word) == 0 && (token_str.length() - word.length()) < 4) ||
615-
(token_str.length() > 2 && word.find(token_str) == 0)
616+
(token_str.length() > len_min && word.find(token_str) == 0)
616617
) return true;
617618
}
618619

base_sampling2/master/ggml/src/ggml-common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,10 @@ GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
10741074
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
10751075
GGML_TABLE_END()
10761076

1077+
GGML_TABLE_BEGIN(int8_t, kvalues_iq4nl, 16)
1078+
-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113,
1079+
GGML_TABLE_END()
1080+
10771081
#define NGRID_IQ1S 2048
10781082
#define IQ1S_DELTA 0.125f
10791083
#define IQ1M_DELTA 0.125f

base_sampling2/master/ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
1010
list (APPEND GGML_CPU_SOURCES
1111
ggml-cpu/ggml-cpu.c
1212
ggml-cpu/ggml-cpu.cpp
13-
ggml-cpu/ggml-cpu-aarch64.cpp
14-
ggml-cpu/ggml-cpu-aarch64.h
15-
ggml-cpu/ggml-cpu-hbm.cpp
16-
ggml-cpu/ggml-cpu-hbm.h
17-
ggml-cpu/ggml-cpu-quants.c
18-
ggml-cpu/ggml-cpu-quants.h
19-
ggml-cpu/ggml-cpu-traits.cpp
20-
ggml-cpu/ggml-cpu-traits.h
13+
ggml-cpu/repack.cpp
14+
ggml-cpu/repack.h
15+
ggml-cpu/hbm.cpp
16+
ggml-cpu/hbm.h
17+
ggml-cpu/quants.c
18+
ggml-cpu/quants.h
19+
ggml-cpu/traits.cpp
20+
ggml-cpu/traits.h
2121
ggml-cpu/amx/amx.cpp
2222
ggml-cpu/amx/amx.h
2323
ggml-cpu/amx/mmq.cpp
@@ -84,6 +84,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
8484

8585
if (GGML_SYSTEM_ARCH STREQUAL "ARM")
8686
message(STATUS "ARM detected")
87+
list(APPEND GGML_CPU_SOURCES
88+
ggml-cpu/arch/arm/quants.c
89+
ggml-cpu/arch/arm/repack.cpp
90+
)
91+
8792
if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
8893
message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
8994
else()
@@ -167,6 +172,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
167172
endif()
168173
elseif (GGML_SYSTEM_ARCH STREQUAL "x86")
169174
message(STATUS "x86 detected")
175+
list(APPEND GGML_CPU_SOURCES
176+
ggml-cpu/arch/x86/quants.c
177+
ggml-cpu/arch/x86/repack.cpp
178+
)
179+
170180
if (MSVC)
171181
# instruction set detection for MSVC only
172182
if (GGML_NATIVE)
@@ -302,7 +312,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
302312
# Since multiple variants of the CPU backend may be included in the same
303313
# build, using set_source_files_properties() to set the arch flags is not possible
304314
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
305-
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
315+
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/x86/cpu-feats.cpp)
306316
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
307317
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
308318
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
@@ -311,6 +321,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
311321
endif()
312322
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
313323
message(STATUS "PowerPC detected")
324+
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/powerpc/quants.c)
314325
if (GGML_NATIVE)
315326
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
316327
file(READ "/proc/cpuinfo" POWER10_M)
@@ -338,6 +349,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
338349
endif()
339350
elseif (GGML_SYSTEM_ARCH STREQUAL "loongarch64")
340351
message(STATUS "loongarch64 detected")
352+
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/loongarch/quants.c)
353+
341354
list(APPEND ARCH_FLAGS -march=loongarch64)
342355
if (GGML_LASX)
343356
list(APPEND ARCH_FLAGS -mlasx)
@@ -347,6 +360,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
347360
endif()
348361
elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
349362
message(STATUS "riscv64 detected")
363+
list(APPEND GGML_CPU_SOURCES
364+
ggml-cpu/arch/riscv/quants.c
365+
ggml-cpu/arch/riscv/repack.cpp
366+
)
350367
if (GGML_RVV)
351368
if (GGML_XTHEADVECTOR)
352369
list(APPEND ARCH_FLAGS -march=rv64gc_xtheadvector -mabi=lp64d)
@@ -358,6 +375,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
358375
endif()
359376
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
360377
message(STATUS "s390x detected")
378+
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/s390/quants.c)
361379
file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
362380
string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
363381

@@ -381,12 +399,16 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
381399
if (GGML_VXE)
382400
list(APPEND ARCH_FLAGS -mvx -mzvector)
383401
endif()
402+
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm")
403+
message(STATUS "Wasm detected")
404+
list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c)
384405
else()
385-
message(STATUS "Unknown architecture")
406+
message(WARNING "Unknown CPU architecture. Falling back to generic implementations.")
407+
list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC)
386408
endif()
387409

388-
if (GGML_CPU_AARCH64)
389-
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64)
410+
if (GGML_CPU_REPACK)
411+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK)
390412
endif()
391413

392414
if (GGML_CPU_KLEIDIAI)

base_sampling2/master/ggml/src/ggml-cpu/amx/amx.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include "ggml-backend.h"
66
#include "ggml-impl.h"
77
#include "ggml-cpu.h"
8-
#include "ggml-cpu-traits.h"
8+
#include "traits.h"
99

1010
#if defined(__gnu_linux__)
1111
#include <sys/syscall.h>

base_sampling2/master/ggml/src/ggml-cpu/amx/mmq.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#include "mmq.h"
99
#include "ggml-impl.h"
1010
#include "ggml-cpu-impl.h"
11-
#include "ggml-cpu-quants.h"
11+
#include "quants.h"
1212
#include "ggml-quants.h"
1313
#include <algorithm>
1414
#include <type_traits>

0 commit comments

Comments
 (0)