diff --git a/CMakeLists.txt b/CMakeLists.txt
index 34ef7958f6d..4fec3332aa3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -70,6 +70,9 @@ option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in
 option(WHISPER_FATAL_WARNINGS  "whisper: enable -Werror flag"               OFF)
 option(WHISPER_USE_SYSTEM_GGML "whisper: use system-installed GGML library" OFF)
 
+# flat bindings
+option(BINDINGS_FLAT "Add extra flat definitions to Whisper + GGML" OFF)
+
 # sanitizers
 option(WHISPER_SANITIZE_THREAD    "whisper: enable thread sanitizer"    OFF)
 option(WHISPER_SANITIZE_ADDRESS   "whisper: enable address sanitizer"   OFF)
diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp
index 54f73110d42..e48533ce597 100644
--- a/examples/bench/bench.cpp
+++ b/examples/bench/bench.cpp
@@ -1,4 +1,7 @@
 #include "whisper.h"
+#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT)
+#include "whisper-flat.h"
+#endif
 
 #include <cstdio>
 #include <cstring>
@@ -61,6 +64,10 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
 static int whisper_bench_full(const whisper_params & params) {
     // whisper init
 
+    #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT)
+    whisper_flat_backend_load_all();
+    #endif
+
     struct whisper_context_params cparams = whisper_context_default_params();
 
     cparams.use_gpu    = params.use_gpu;
diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp
index 4b2b3521b80..46217672748 100644
--- a/examples/cli/cli.cpp
+++ b/examples/cli/cli.cpp
@@ -2,6 +2,9 @@
 #include "common-whisper.h"
 
 #include "whisper.h"
+#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT)
+#include "whisper-flat.h"
+#endif
 #include "grammar-parser.h"
 
 #include <cmath>
@@ -1004,6 +1007,10 @@ int main(int argc, char ** argv) {
 
     // whisper init
 
+    #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT)
+    whisper_flat_backend_load_all();
+    #endif
+
     struct whisper_context_params cparams = whisper_context_default_params();
 
     cparams.use_gpu    = params.use_gpu;
diff --git a/examples/command/command.cpp b/examples/command/command.cpp
index 9dc8f629995..3cbc892a206 100644
--- a/examples/command/command.cpp
+++ b/examples/command/command.cpp
@@ -9,6 +9,9 @@
 #include "common-sdl.h"
 #include "common.h"
 #include "whisper.h"
+#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT)
+#include "whisper-flat.h"
+#endif
 #include "grammar-parser.h"
 
 #include <algorithm>
@@ -692,6 +695,10 @@ int main(int argc, char ** argv) {
 
     // whisper init
 
+    #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT)
+    whisper_flat_backend_load_all();
+    #endif
+
     struct whisper_context_params cparams = whisper_context_default_params();
 
     cparams.use_gpu    = params.use_gpu;
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 38da61673df..230f6980003 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2,6 +2,9 @@
 #include "common-whisper.h"
 
 #include "whisper.h"
+#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT)
+#include "whisper-flat.h"
+#endif
 #include "httplib.h"
 #include "json.hpp"
 
@@ -544,6 +547,10 @@ int main(int argc, char ** argv) {
         check_ffmpeg_availibility();
     }
     // whisper init
+    #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT)
+    whisper_flat_backend_load_all();
+    #endif
+
     struct whisper_context_params cparams = whisper_context_default_params();
 
     cparams.use_gpu    = params.use_gpu;
diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp
index 65c6587db92..2505ffb79d9 100644
--- a/examples/stream/stream.cpp
+++ b/examples/stream/stream.cpp
@@ -6,6 +6,9 @@
 #include "common.h"
 #include "common-whisper.h"
 #include "whisper.h"
+#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT)
+#include "whisper-flat.h"
+#endif
 
 #include <chrono>
 #include <cstdio>
@@ -155,6 +158,10 @@ int main(int argc, char ** argv) {
         exit(0);
     }
 
+    #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT)
+    whisper_flat_backend_load_all();
+    #endif
+
     struct whisper_context_params cparams = whisper_context_default_params();
 
     cparams.use_gpu    = params.use_gpu;
diff --git a/examples/talk-llama/talk-llama.cpp b/examples/talk-llama/talk-llama.cpp
index 9097c491b61..3b131590cd5 100644
--- a/examples/talk-llama/talk-llama.cpp
+++ b/examples/talk-llama/talk-llama.cpp
@@ -5,6 +5,9 @@
 #include "common.h"
 #include "common-whisper.h"
 #include "whisper.h"
+#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT)
+#include "whisper-flat.h"
+#endif
 #include "llama.h"
 
 #include <chrono>
@@ -287,6 +290,10 @@ int main(int argc, char ** argv) {
 
     // whisper init
 
+    #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT)
+    whisper_flat_backend_load_all();
+    #endif
+
     struct whisper_context_params cparams = whisper_context_default_params();
 
     cparams.use_gpu    = params.use_gpu;
diff --git a/examples/wchess/wchess.cmd/wchess.cmd.cpp b/examples/wchess/wchess.cmd/wchess.cmd.cpp
index 4d049976315..7f60f262c7f 100644
--- a/examples/wchess/wchess.cmd/wchess.cmd.cpp
+++ b/examples/wchess/wchess.cmd/wchess.cmd.cpp
@@ -7,6 +7,9 @@
 
 #include "WChess.h"
 #include "common-sdl.h"
+#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT)
+#include "whisper-flat.h"
+#endif
 #include <iostream>
 
 #include <memory>
@@ -182,6 +185,10 @@ int main(int argc, char ** argv) {
 
     // whisper init
 
+    #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT)
+    whisper_flat_backend_load_all();
+    #endif
+
     struct whisper_context_params cparams = whisper_context_default_params();
 
     cparams.use_gpu    = params.use_gpu;
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index 64671495b38..b96a1f8579d 100644
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
@@ -348,6 +348,8 @@ extern "C" {
     // CPU buffer types are always available
     GGML_API ggml_backend_buffer_t      ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
     GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
+    
+    GGML_API ggml_backend_reg_t ggml_backend_try_load_best(const char * name, const char * dir_path);
 
 #ifdef  __cplusplus
 }
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index 43d9fc4fe25..45af1d3e959 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -208,8 +208,22 @@ if (GGML_BACKEND_DL)
     target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
 endif()
 
+set(GGML_LIBRARY_SOURCES
+    ggml-backend-reg.cpp)
+    
+if(BINDINGS_FLAT)
+    message(STATUS "Adding FLAT GGML binding extras")
+
+    set(FLAT_GGML_SOURCES
+       ggml-flat.cpp
+       )
+       
+    list(APPEND GGML_LIBRARY_SOURCES ${FLAT_GGML_SOURCES})
+endif()
+
 add_library(ggml
-            ggml-backend-reg.cpp)
+            ${GGML_LIBRARY_SOURCES}
+            )
 
 target_link_libraries(ggml PUBLIC ggml-base)
 
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
index 405d8e31514..40ba454b695 100644
--- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp
@@ -584,3 +584,13 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
         ggml_backend_load(backend_path);
     }
 }
+
+ggml_backend_reg_t ggml_backend_try_load_best(const char * name, const char * dir_path) {
+#ifdef NDEBUG
+    bool silent = true;
+#else
+    bool silent = false;
+#endif
+
+    return ggml_backend_load_best(name, silent, dir_path);
+}
diff --git a/ggml/src/ggml-flat.cpp b/ggml/src/ggml-flat.cpp
new file mode 100644
index 00000000000..ea727ecbec9
--- /dev/null
+++ b/ggml/src/ggml-flat.cpp
@@ -0,0 +1,35 @@
+
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#ifndef NOMINMAX
+#   define NOMINMAX
+#endif
+#include <windows.h>
+#endif
+
+#include "ggml-backend.h"
+#include "ggml-backend-impl.h"
+#include "ggml-alloc.h"
+#include "ggml-impl.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string>
+#include <vector>
+#include <algorithm>
+
+#ifdef __APPLE__
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#endif
+
+#include "ggml-flat.h"
+
+
+
+
+
diff --git a/ggml/src/ggml-flat.h b/ggml/src/ggml-flat.h
new file mode 100644
index 00000000000..7537d7249f2
--- /dev/null
+++ b/ggml/src/ggml-flat.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#ifdef BINDINGS_FLAT
+#endif
+
+#ifdef GGML_SHARED
+#    if defined(_WIN32) && !defined(__MINGW32__)
+#        ifdef GGML_BUILD
+#            define GGML_FLAT_API __declspec(dllexport) extern
+#        else
+#            define GGML_FLAT_API __declspec(dllimport) extern
+#        endif
+#    else
+#        define GGML_FLAT_API __attribute__ ((visibility ("default"))) extern
+#    endif
+#else
+#    define GGML_FLAT_API extern
+#endif
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+
+
+#ifdef  __cplusplus
+}
+#endif
diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp
index 05a2f4e630a..b2c5c3a4979 100644
--- a/ggml/src/ggml-opencl/ggml-opencl.cpp
+++ b/ggml/src/ggml-opencl/ggml-opencl.cpp
@@ -2,9 +2,11 @@
 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
 
 // suppress warnings in CL headers for GCC and Clang
-#pragma GCC diagnostic ignored "-Woverlength-strings"
-#ifdef __clang__
-#pragma GCC diagnostic ignored "-Wgnu-anonymous-struct"
+#ifndef _MSC_VER
+    #pragma GCC diagnostic ignored "-Woverlength-strings"
+    #ifdef __clang__
+    #pragma GCC diagnostic ignored "-Wgnu-anonymous-struct"
+    #endif
 #endif
 
 #include "ggml-opencl.h"
diff --git a/include/whisper.h b/include/whisper.h
index 1e1375033ad..37b43b2c073 100644
--- a/include/whisper.h
+++ b/include/whisper.h
@@ -668,6 +668,15 @@ extern "C" {
     // Get the no_speech probability for the specified segment
     WHISPER_API float whisper_full_get_segment_no_speech_prob           (struct whisper_context * ctx, int i_segment);
     WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment);
+
+    // For whisper-flat.cpp to expose
+    const char * whisper_get_system_info_json(void);
+    struct whisper_state * whisper_get_state_from_context(struct whisper_context * ctx);
+    struct whisper_timings * whisper_get_timings_with_state(struct whisper_state * state);
+    ggml_backend_t whisper_get_preferred_backend(struct whisper_state * state);
+    ggml_backend_t whisper_get_indexed_backend(struct whisper_state* state, size_t i);
+    size_t whisper_get_backend_count(struct whisper_state* state);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a091e66a25f..5f90e81a3ac 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -100,10 +100,24 @@ endif()
 
 # whisper
 
+set(WHISPER_LIBRARY_SOURCES
+    ../include/whisper.h
+    whisper-arch.h
+    whisper.cpp
+    )
+    
+if(BINDINGS_FLAT)
+    message(STATUS "Adding FLAT Whisper binding extras")
+
+    set(FLAT_WHISPER_SOURCES
+       whisper-flat.cpp
+       )
+       
+    list(APPEND WHISPER_LIBRARY_SOURCES ${FLAT_WHISPER_SOURCES})
+endif()
+
 add_library(whisper
-            ../include/whisper.h
-            whisper-arch.h
-            whisper.cpp
+            ${WHISPER_LIBRARY_SOURCES}
             )
 
 # Set the version numbers
diff --git a/src/whisper-flat.cpp b/src/whisper-flat.cpp
new file mode 100644
index 00000000000..33f51d67403
--- /dev/null
+++ b/src/whisper-flat.cpp
@@ -0,0 +1,58 @@
+#include "whisper.h"
+#include "whisper-arch.h"
+
+#include "ggml.h"
+#include "ggml-cpp.h"
+#include "ggml-alloc.h"
+#include "ggml-backend.h"
+
+#include <atomic>
+#include <algorithm>
+#include <cassert>
+#define _USE_MATH_DEFINES
+#include <cmath>
+#include <climits>
+#include <codecvt>
+#include <cstdarg>
+#include <cstdio>
+#include <cstring>
+#include <fstream>
+#include <functional>
+#include <map>
+#include <mutex>
+#include <random>
+#include <regex>
+#include <set>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "whisper-flat.h"
+
+void whisper_flat_backend_load_all(void) {
+    ggml_backend_load_all();
+}
+
+const char * whisper_flat_get_system_info_json(void) {
+    return whisper_get_system_info_json();
+}
+
+struct whisper_state * whisper_flat_get_state_from_context(struct whisper_context * ctx) {
+    return whisper_get_state_from_context(ctx);
+}
+
+struct whisper_timings * whisper_flat_get_timings_with_state(struct whisper_state * state) {
+    return whisper_get_timings_with_state(state);
+}
+
+ggml_backend_t whisper_flat_get_preferred_backend(struct whisper_state * state) {
+    return whisper_get_preferred_backend(state);
+}
+
+ggml_backend_t whisper_flat_get_indexed_backend(struct whisper_state* state, size_t i) {
+    return whisper_get_indexed_backend(state, i);
+}
+
+size_t whisper_flat_get_backend_count(struct whisper_state* state) {
+    return whisper_get_backend_count(state);
+}
diff --git a/src/whisper-flat.h b/src/whisper-flat.h
new file mode 100644
index 00000000000..09909b8e91d
--- /dev/null
+++ b/src/whisper-flat.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#ifdef WHISPER_SHARED
+#    ifdef _WIN32
+#        ifdef WHISPER_BUILD
+#            define WHISPER_FLAT_API __declspec(dllexport)
+#        else
+#            define WHISPER_FLAT_API __declspec(dllimport)
+#        endif
+#    else
+#        define WHISPER_FLAT_API __attribute__ ((visibility ("default")))
+#    endif
+#else
+#    define WHISPER_FLAT_API
+#endif
+
+#include "whisper.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    WHISPER_FLAT_API void whisper_flat_backend_load_all(void);
+    WHISPER_FLAT_API struct whisper_timings * whisper_flat_get_timings_with_state(struct whisper_state * state);
+    WHISPER_FLAT_API struct whisper_state * whisper_flat_get_state_from_context(struct whisper_context * ctx);
+    WHISPER_FLAT_API const char * whisper_flat_get_system_info_json(void);
+    WHISPER_FLAT_API ggml_backend_t whisper_flat_get_preferred_backend(struct whisper_state * state);
+    WHISPER_FLAT_API ggml_backend_t whisper_flat_get_indexed_backend(struct whisper_state* state, size_t i);
+    WHISPER_FLAT_API size_t whisper_flat_get_backend_count(struct whisper_state* state);
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/src/whisper.cpp b/src/whisper.cpp
index 2c83f7bab3b..8bec3e2258b 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -209,12 +209,12 @@ static bool ggml_graph_compute_helper(
 }
 
 static void whisper_load_backends() {
-#ifdef GGML_BACKEND_DL
+    #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT)
     static std::once_flag flag;
     std::call_once(flag, []() {
         ggml_backend_load_all();
     });
-#endif
+    #endif
 }
 
 // TODO: move these functions to ggml-base with support for ggml-backend?
@@ -1313,8 +1313,10 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) {
 static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) {
     ggml_log_set(g_state.log_callback, g_state.log_callback_user_data);
 
+    #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT)
     whisper_load_backends();
-
+    #endif
+    
     ggml_backend_dev_t dev = nullptr;
 
     int cnt = 0;
@@ -4321,7 +4323,9 @@ static int whisper_has_openvino(void) {
 const char * whisper_print_system_info(void) {
     static std::string s;
 
+    #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT)
     whisper_load_backends();
+    #endif
 
     s  = "";
     s += "WHISPER : ";
@@ -6776,7 +6780,9 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
 }
 
 WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
+    #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT)
     whisper_load_backends();
+    #endif
 
     static std::string s;
     s = "";
@@ -7550,3 +7556,99 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text
     fputs(text, stderr);
     fflush(stderr);
 }
+
+// whisper_get_system_info_json
+// Returns system info as json, useful for language bindings
+// NOTE : While testing features->value always returned an int.
+//        Even though ints are invariably returned they may be
+//        some values that return other types.
+//        This function returns everything quoted (i.e. as a string)
+//        and leaves type-casting to the caller.
+//        This also removes the unlikely but plausible state of
+//        a string being returned unquoted (thus invalidating JSON)
+
+const char * whisper_get_system_info_json(void) {
+    static std::string s;
+
+    s  = "{";
+    s += "\"WHISPER\":{";
+    s += "\"COREML\":\""    + std::to_string(whisper_has_coreml())     + "\",";
+    s += "\"OPENVINO\":\""  + std::to_string(whisper_has_openvino())   + "\"}";
+
+    for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
+        auto * reg = ggml_backend_reg_get(i);
+        auto * get_features_fn = (ggml_backend_get_features_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_get_features");
+        if (get_features_fn) {
+            ggml_backend_feature * features = get_features_fn(reg);
+            s += ",\"";
+            s += ggml_backend_reg_name(reg);
+            s += "\":{";
+            auto first = true;
+            for (; features->name; features++) {
+                if(first) {
+                    first = false;
+                } else {
+                    s += ",";
+                }
+                s += "\"";
+                s += features->name;
+                s += "\":\"";
+                s += features->value;
+                s += "\"";
+            }
+            s += "}";
+        }
+    }
+    s += "}";
+
+    return s.c_str();
+}
+
+// whisper_get_state_from_context
+// Returns state from supplied context pointer
+// This is mainly a helper for non-C++ language bindings as whisper_context
+// has embedded C++ specific types (e.g. maps and vectors)
+struct whisper_state * whisper_get_state_from_context(struct whisper_context * ctx) {
+    if (!ctx->state) {
+        return nullptr;
+    }
+
+    return ctx->state;
+}
+
+// whisper_get_timings_with_state
+// Just a version of whisper_get_timings that takes state as a parameter
+struct whisper_timings * whisper_get_timings_with_state(struct whisper_state * state) {
+    if (state == nullptr) {
+        return nullptr;
+    }
+    whisper_timings * timings = new whisper_timings;
+    timings->sample_ms = 1e-3f * state->t_sample_us / std::max(1, state->n_sample);
+    timings->encode_ms = 1e-3f * state->t_encode_us / std::max(1, state->n_encode);
+    timings->decode_ms = 1e-3f * state->t_decode_us / std::max(1, state->n_decode);
+    timings->batchd_ms = 1e-3f * state->t_batchd_us / std::max(1, state->n_batchd);
+    timings->prompt_ms = 1e-3f * state->t_prompt_us / std::max(1, state->n_prompt);
+    return timings;
+}
+
+ggml_backend_t whisper_get_preferred_backend(struct whisper_state * state) {
+    if (state->backends.empty()) {
+        return nullptr;
+    }
+    
+    return state->backends[0];
+}
+
+ggml_backend_t whisper_get_indexed_backend(struct whisper_state* state, size_t i) {
+    if (state->backends.empty()) {
+        return nullptr;
+    }
+    if (i >= state->backends.size()) {
+        return nullptr;
+    }
+    return state->backends[i];
+}
+
+size_t whisper_get_backend_count(struct whisper_state* state) {
+    return state->backends.size();
+}
\ No newline at end of file