diff --git a/CMakeLists.txt b/CMakeLists.txt index 34ef7958f6d..4fec3332aa3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,6 +70,9 @@ option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF) option(WHISPER_USE_SYSTEM_GGML "whisper: use system-installed GGML library" OFF) +# flat bindings +option(BINDINGS_FLAT "Add extra flat definitions to Whisper + GGML" OFF) + # sanitizers option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF) option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index 54f73110d42..e48533ce597 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -1,4 +1,7 @@ #include "whisper.h" +#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) +#include "whisper-flat.h" +#endif #include #include @@ -61,6 +64,10 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para static int whisper_bench_full(const whisper_params & params) { // whisper init + #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) + whisper_flat_backend_load_all(); + #endif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp index 4b2b3521b80..46217672748 100644 --- a/examples/cli/cli.cpp +++ b/examples/cli/cli.cpp @@ -2,6 +2,9 @@ #include "common-whisper.h" #include "whisper.h" +#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) +#include "whisper-flat.h" +#endif #include "grammar-parser.h" #include @@ -1004,6 +1007,10 @@ int main(int argc, char ** argv) { // whisper init + #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) + whisper_flat_backend_load_all(); + #endif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; diff --git a/examples/command/command.cpp b/examples/command/command.cpp index 9dc8f629995..3cbc892a206 100644 --- a/examples/command/command.cpp +++ b/examples/command/command.cpp @@ -9,6 +9,9 @@ #include "common-sdl.h" #include "common.h" #include "whisper.h" +#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) +#include "whisper-flat.h" +#endif #include "grammar-parser.h" #include @@ -692,6 +695,10 @@ int main(int argc, char ** argv) { // whisper init + #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) + whisper_flat_backend_load_all(); + #endif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 38da61673df..230f6980003 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2,6 +2,9 @@ #include "common-whisper.h" #include "whisper.h" +#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) +#include "whisper-flat.h" +#endif #include "httplib.h" #include "json.hpp" @@ -544,6 +547,10 @@ int main(int argc, char ** argv) { check_ffmpeg_availibility(); } // whisper init + #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) + whisper_flat_backend_load_all(); + #endif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 65c6587db92..2505ffb79d9 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -6,6 +6,9 @@ #include "common.h" #include "common-whisper.h" #include "whisper.h" +#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) +#include "whisper-flat.h" +#endif #include #include @@ -155,6 +158,10 @@ int main(int argc, char ** argv) { exit(0); } + #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) + whisper_flat_backend_load_all(); + #endif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; diff --git a/examples/talk-llama/talk-llama.cpp b/examples/talk-llama/talk-llama.cpp index 9097c491b61..3b131590cd5 100644 --- a/examples/talk-llama/talk-llama.cpp +++ b/examples/talk-llama/talk-llama.cpp @@ -5,6 +5,9 @@ #include "common.h" #include "common-whisper.h" #include "whisper.h" +#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) +#include "whisper-flat.h" +#endif #include "llama.h" #include @@ -287,6 +290,10 @@ int main(int argc, char ** argv) { // whisper init + #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) + whisper_flat_backend_load_all(); + #endif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; diff --git a/examples/wchess/wchess.cmd/wchess.cmd.cpp b/examples/wchess/wchess.cmd/wchess.cmd.cpp index 4d049976315..7f60f262c7f 100644 --- a/examples/wchess/wchess.cmd/wchess.cmd.cpp +++ b/examples/wchess/wchess.cmd/wchess.cmd.cpp @@ -7,6 +7,9 @@ #include "WChess.h" #include "common-sdl.h" +#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) +#include "whisper-flat.h" +#endif #include #include @@ -182,6 +185,10 @@ int main(int argc, char ** argv) { // whisper init + #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) + whisper_flat_backend_load_all(); + #endif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index 64671495b38..b96a1f8579d 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -348,6 +348,8 @@ extern "C" { // CPU buffer types are always available GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size); GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void); + + GGML_API ggml_backend_reg_t ggml_backend_try_load_best(const char * name, const char * dir_path); #ifdef __cplusplus } diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 43d9fc4fe25..45af1d3e959 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -208,8 +208,22 @@ if (GGML_BACKEND_DL) target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL) endif() +set(GGML_LIBRARY_SOURCES + ggml-backend-reg.cpp) + +if(BINDINGS_FLAT) + message(STATUS "Adding FLAT GGML binding extras") + + set(FLAT_GGML_SOURCES + ggml-flat.cpp + ) + + list(APPEND GGML_LIBRARY_SOURCES ${FLAT_GGML_SOURCES}) +endif() + add_library(ggml - ggml-backend-reg.cpp) + ${GGML_LIBRARY_SOURCES} + ) target_link_libraries(ggml PUBLIC ggml-base) diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 405d8e31514..40ba454b695 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -584,3 +584,13 @@ void ggml_backend_load_all_from_path(const char * dir_path) { ggml_backend_load(backend_path); } } + +ggml_backend_reg_t ggml_backend_try_load_best(const char * name, const char * dir_path) { +#ifdef NDEBUG + bool silent = true; +#else + bool silent = false; +#endif + + return ggml_backend_load_best(name, silent, dir_path); +} diff --git a/ggml/src/ggml-flat.cpp b/ggml/src/ggml-flat.cpp new file mode 100644 index 00000000000..ea727ecbec9 --- /dev/null +++ b/ggml/src/ggml-flat.cpp @@ -0,0 +1,35 @@ + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +# define NOMINMAX +#endif +#include +#endif + +#include "ggml-backend.h" +#include "ggml-backend-impl.h" +#include "ggml-alloc.h" +#include "ggml-impl.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __APPLE__ +#include +#include +#endif + +#include "ggml-flat.h" + + + + + diff --git a/ggml/src/ggml-flat.h b/ggml/src/ggml-flat.h new file mode 100644 index 00000000000..7537d7249f2 --- /dev/null +++ b/ggml/src/ggml-flat.h @@ -0,0 +1,28 @@ +#pragma once + +#ifdef BINDINGS_FLAT +#endif + +#ifdef GGML_SHARED +# if defined(_WIN32) && !defined(__MINGW32__) +# ifdef GGML_BUILD +# define GGML_FLAT_API __declspec(dllexport) extern +# else +# define GGML_FLAT_API __declspec(dllimport) extern +# endif +# else +# define GGML_FLAT_API __attribute__ ((visibility ("default"))) extern +# endif +#else +# define GGML_FLAT_API extern +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + + +#ifdef __cplusplus +} +#endif diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp index 05a2f4e630a..b2c5c3a4979 100644 --- a/ggml/src/ggml-opencl/ggml-opencl.cpp +++ b/ggml/src/ggml-opencl/ggml-opencl.cpp @@ -2,9 +2,11 @@ #define CL_USE_DEPRECATED_OPENCL_1_2_APIS // suppress warnings in CL headers for GCC and Clang -#pragma GCC diagnostic ignored "-Woverlength-strings" -#ifdef __clang__ -#pragma GCC diagnostic ignored "-Wgnu-anonymous-struct" +#ifndef _MSC_VER + #pragma GCC diagnostic ignored "-Woverlength-strings" + #ifdef __clang__ + #pragma GCC diagnostic ignored "-Wgnu-anonymous-struct" + #endif #endif #include "ggml-opencl.h" diff --git a/include/whisper.h b/include/whisper.h index 1e1375033ad..37b43b2c073 100644 --- a/include/whisper.h +++ b/include/whisper.h @@ -668,6 +668,15 @@ extern "C" { // Get the no_speech probability for the specified segment WHISPER_API float whisper_full_get_segment_no_speech_prob (struct whisper_context * ctx, int i_segment); WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment); + + // For whisper-flat.cpp to expose + const char * whisper_get_system_info_json(void); + struct whisper_state * whisper_get_state_from_context(struct whisper_context * ctx); + struct whisper_timings * whisper_get_timings_with_state(struct whisper_state * state); + ggml_backend_t whisper_get_preferred_backend(struct whisper_state * state); + ggml_backend_t whisper_get_indexed_backend(struct whisper_state* state, size_t i); + size_t whisper_get_backend_count(struct whisper_state* state); + #ifdef __cplusplus } #endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a091e66a25f..5f90e81a3ac 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -100,10 +100,24 @@ endif() # whisper +set(WHISPER_LIBRARY_SOURCES + ../include/whisper.h + whisper-arch.h + whisper.cpp + ) + +if(BINDINGS_FLAT) + message(STATUS "Adding FLAT Whisper binding extras") + + set(FLAT_WHISPER_SOURCES + whisper-flat.cpp + ) + + list(APPEND WHISPER_LIBRARY_SOURCES ${FLAT_WHISPER_SOURCES}) +endif() + add_library(whisper - ../include/whisper.h - whisper-arch.h - whisper.cpp + ${WHISPER_LIBRARY_SOURCES} ) # Set the version numbers diff --git a/src/whisper-flat.cpp b/src/whisper-flat.cpp new file mode 100644 index 00000000000..33f51d67403 --- /dev/null +++ b/src/whisper-flat.cpp @@ -0,0 +1,58 @@ +#include "whisper.h" +#include "whisper-arch.h" + +#include "ggml.h" +#include "ggml-cpp.h" +#include "ggml-alloc.h" +#include "ggml-backend.h" + +#include +#include +#include +#define _USE_MATH_DEFINES +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "whisper-flat.h" + +void whisper_flat_backend_load_all(void) { + ggml_backend_load_all(); +} + +const char * whisper_flat_get_system_info_json(void) { + return whisper_get_system_info_json(); +} + +struct whisper_state * whisper_flat_get_state_from_context(struct whisper_context * ctx) { + return whisper_get_state_from_context(ctx); +} + +struct whisper_timings * whisper_flat_get_timings_with_state(struct whisper_state * state) { + return whisper_get_timings_with_state(state); +} + +ggml_backend_t whisper_flat_get_preferred_backend(struct whisper_state * state) { + return whisper_get_preferred_backend(state); +} + +ggml_backend_t whisper_flat_get_indexed_backend(struct whisper_state* state, size_t i) { + return whisper_get_indexed_backend(state, i); +} + +size_t whisper_flat_get_backend_count(struct whisper_state* state) { + return whisper_get_backend_count(state); +} diff --git a/src/whisper-flat.h b/src/whisper-flat.h new file mode 100644 index 00000000000..09909b8e91d --- /dev/null +++ b/src/whisper-flat.h @@ -0,0 +1,34 @@ +#pragma once + +#ifdef WHISPER_SHARED +# ifdef _WIN32 +# ifdef WHISPER_BUILD +# define WHISPER_FLAT_API __declspec(dllexport) +# else +# define WHISPER_FLAT_API __declspec(dllimport) +# endif +# else +# define WHISPER_FLAT_API __attribute__ ((visibility ("default"))) +# endif +#else +# define WHISPER_FLAT_API +#endif + +#include "whisper.h" + +#ifdef __cplusplus +extern "C" { +#endif + + WHISPER_FLAT_API void whisper_flat_backend_load_all(void); + WHISPER_FLAT_API struct whisper_timings * whisper_flat_get_timings_with_state(struct whisper_state * state); + WHISPER_FLAT_API struct whisper_state * whisper_flat_get_state_from_context(struct whisper_context * ctx); + WHISPER_FLAT_API const char * whisper_flat_get_system_info_json(void); + WHISPER_FLAT_API ggml_backend_t whisper_flat_get_preferred_backend(struct whisper_state * state); + WHISPER_FLAT_API ggml_backend_t whisper_flat_get_indexed_backend(struct whisper_state* state, size_t i); + WHISPER_FLAT_API size_t whisper_flat_get_backend_count(struct whisper_state* state); + +#ifdef __cplusplus +} +#endif + diff --git a/src/whisper.cpp b/src/whisper.cpp index 2c83f7bab3b..8bec3e2258b 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -209,12 +209,12 @@ static bool ggml_graph_compute_helper( } static void whisper_load_backends() { -#ifdef GGML_BACKEND_DL + #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT) static std::once_flag flag; std::call_once(flag, []() { ggml_backend_load_all(); }); -#endif + #endif } // TODO: move these functions to ggml-base with support for ggml-backend? @@ -1313,8 +1313,10 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) { static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) { ggml_log_set(g_state.log_callback, g_state.log_callback_user_data); + #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT) whisper_load_backends(); - + #endif + ggml_backend_dev_t dev = nullptr; int cnt = 0; @@ -4321,7 +4323,9 @@ static int whisper_has_openvino(void) { const char * whisper_print_system_info(void) { static std::string s; + #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT) whisper_load_backends(); + #endif s = ""; s += "WHISPER : "; @@ -6776,7 +6780,9 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) { } WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) { + #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT) whisper_load_backends(); + #endif static std::string s; s = ""; @@ -7550,3 +7556,99 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text fputs(text, stderr); fflush(stderr); } + +// whisper_get_system_info_json +// Returns system info as json, useful for language bindings +// NOTE : While testing features->value always returned an int. +// Even though ints are invariably returned they may be +// some values that return other types. +// This function returns everything quoted (i.e. as a string) +// and leaves type-casting to the caller. +// This also removes the unlikely but plausible state of +// a string being returned unquoted (thus invalidating JSON) + +const char * whisper_get_system_info_json(void) { + static std::string s; + + s = "{"; + s += "\"WHISPER\":{"; + s += "\"COREML\":\"" + std::to_string(whisper_has_coreml()) + "\","; + s += "\"OPENVINO\":\"" + std::to_string(whisper_has_openvino()) + "\"}"; + + for (size_t i = 0; i < ggml_backend_reg_count(); i++) { + auto * reg = ggml_backend_reg_get(i); + auto * get_features_fn = (ggml_backend_get_features_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_get_features"); + if (get_features_fn) { + ggml_backend_feature * features = get_features_fn(reg); + s += ",\""; + s += ggml_backend_reg_name(reg); + s += "\":{"; + auto first = true; + for (; features->name; features++) { + if(first) { + first = false; + } else { + s += ","; + } + s += "\""; + s += features->name; + s += "\":\""; + s += features->value; + s += "\""; + } + s += "}"; + } + } + s += "}"; + + return s.c_str(); +} + +// whisper_get_state_from_context +// Returns state from supplied context pointer +// This is mainly a helper for non-C++ language bindings as whisper_context +// has embedded C++ specific types (e.g. maps and vectors) +struct whisper_state * whisper_get_state_from_context(struct whisper_context * ctx) { + if (!ctx->state) { + return nullptr; + } + + return ctx->state; +} + +// whisper_get_timings_with_state +// Just a version of whisper_get_timings that takes state as a parameter +struct whisper_timings * whisper_get_timings_with_state(struct whisper_state * state) { + if (state == nullptr) { + return nullptr; + } + whisper_timings * timings = new whisper_timings; + timings->sample_ms = 1e-3f * state->t_sample_us / std::max(1, state->n_sample); + timings->encode_ms = 1e-3f * state->t_encode_us / std::max(1, state->n_encode); + timings->decode_ms = 1e-3f * state->t_decode_us / std::max(1, state->n_decode); + timings->batchd_ms = 1e-3f * state->t_batchd_us / std::max(1, state->n_batchd); + timings->prompt_ms = 1e-3f * state->t_prompt_us / std::max(1, state->n_prompt); + return timings; +} + +ggml_backend_t whisper_get_preferred_backend(struct whisper_state * state) { + if (state->backends.empty()) { + return nullptr; + } + + return state->backends[0]; +} + +ggml_backend_t whisper_get_indexed_backend(struct whisper_state* state, size_t i) { + if (state->backends.empty()) { + return nullptr; + } + if (i >= state->backends.size()) { + return nullptr; + } + return state->backends[i]; +} + +size_t whisper_get_backend_count(struct whisper_state* state) { + return state->backends.size(); +} \ No newline at end of file