leejet
diff --git a/‎clip.hpp
Lines changed: 2 additions & 0 deletions b/‎clip.hpp
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/cli/main.cpp
Lines changed: 44 additions & 0 deletions b/‎examples/cli/main.cpp
Lines changed: 44 additions & 0 deletions
diff --git a/‎ggml_extend.hpp
Lines changed: 38 additions & 8 deletions b/‎ggml_extend.hpp
Lines changed: 38 additions & 8 deletions
@@ -661,6 +661,7 @@ class CLIPTextModel : public GGMLBlock {
         if (version == OPEN_CLIP_VIT_BIGG_14) {
             enum ggml_type wtype      = GGML_TYPE_F32;  // tensor_types.find(prefix + "text_projection") != tensor_types.end() ? tensor_types[prefix + "text_projection"] : GGML_TYPE_F32;
             params["text_projection"] = ggml_new_tensor_2d(ctx, wtype, projection_dim, hidden_size);
+            ggml_set_name(params["text_projection"], (prefix + "text_projection").c_str());
         }
     }
 
@@ -812,6 +813,7 @@ class CLIPProjection : public UnaryBlock {
         } else {
             params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features);
         }
+        ggml_set_name(params["weight"], (prefix + "weight").c_str());
     }
 
 public:
 
@@ -22,6 +22,10 @@
 #define STB_IMAGE_RESIZE_STATIC
 #include "stb_image_resize.h"
 
+#define IMATRIX_IMPL
+#include "imatrix.hpp"
+static IMatrixCollector g_collector;
+
 const char* rng_type_to_str[] = {
     "std_default",
     "cuda",
@@ -147,6 +151,12 @@ struct SDParams {
     int preview_interval        = 1;
     std::string preview_path    = "preview.png";
     bool taesd_preview          = false;
+
+    /* Imatrix params */
+
+    std::string imatrix_out = "";
+
+    std::vector<std::string> imatrix_in = {};
 };
 
 void print_params(SDParams params) {
@@ -225,6 +235,8 @@ void print_usage(int argc, const char* argv[]) {
     printf("  --upscale-repeats                  Run the ESRGAN upscaler this many times (default 1)\n");
     printf("  --type [TYPE]                      weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K)\n");
     printf("                                     If not specified, the default is the type of the weight file\n");
+    printf("  --imat-out [PATH]                  If set, compute the imatrix for this run and save it to the provided path");
+    printf("  --imat-in [PATH]                   Use imatrix for quantization.");
     printf("  --lora-model-dir [DIR]             lora model directory\n");
     printf("  -i, --init-img [IMAGE]             path to the input image, required by img2img\n");
     printf("  --mask [MASK]                      path to the mask image, required by img2img with mask\n");
@@ -719,6 +731,18 @@ void parse_args(int argc, const char** argv, SDParams& params) {
                 break;
             }
             params.preview_path = argv[i];
+        } else if (arg == "--imat-out") {
+            if (++i >= argc) {
+                invalid_arg = true;
+                break;
+            }
+            params.imatrix_out = argv[i];
+        } else if (arg == "--imat-in") {
+            if (++i >= argc) {
+                invalid_arg = true;
+                break;
+            }
+            params.imatrix_in.push_back(std::string(argv[i]));
         } else {
             fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
             print_usage(argc, argv);
@@ -896,6 +920,10 @@ void step_callback(int step, sd_image_t image) {
     stbi_write_png(preview_path, image.width, image.height, image.channel, image.data, 0);
 }
 
+static bool collect_imatrix(struct ggml_tensor* t, bool ask, void* user_data) {
+    return g_collector.collect_imatrix(t, ask, user_data);
+}
+
 int main(int argc, const char* argv[]) {
     SDParams params;
 
@@ -910,6 +938,19 @@ int main(int argc, const char* argv[]) {
         printf("%s", sd_get_system_info());
     }
 
+    if (params.imatrix_out != "") {
+        sd_set_backend_eval_callback((sd_graph_eval_callback_t)collect_imatrix, &params);
+    }
+    if (params.imatrix_out != "" || params.mode == CONVERT || params.wtype != SD_TYPE_COUNT) {
+        setConvertImatrixCollector((void*)&g_collector);
+        for (const auto& in_file : params.imatrix_in) {
+            printf("loading imatrix from '%s'\n", in_file.c_str());
+            if (!g_collector.load_imatrix(in_file.c_str())) {
+                printf("Failed to load %s\n", in_file.c_str());
+            }
+        }
+    }
+
     if (params.mode == CONVERT) {
         bool success = convert(params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str(), params.wtype);
         if (!success) {
@@ -1233,6 +1274,9 @@ int main(int argc, const char* argv[]) {
         free(results[i].data);
         results[i].data = NULL;
     }
+    if (params.imatrix_out != "") {
+        g_collector.save_imatrix(params.imatrix_out);
+    }
     free(results);
     free_sd_ctx(sd_ctx);
     free(control_image_buffer);
 
@@ -23,9 +23,11 @@
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 #include "ggml-cpu.h"
+#include "ggml/src/ggml-impl.h"
 #include "ggml.h"
 
 #include "model.h"
+#include "util.h"
 
 #ifdef SD_USE_CUDA
 #include "ggml-cuda.h"
@@ -117,13 +119,6 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_kronecker(ggml_context* ctx, struct g
                     b);
 }
 
-__STATIC_INLINE__ void ggml_log_callback_default(ggml_log_level level, const char* text, void* user_data) {
-    (void)level;
-    (void)user_data;
-    fputs(text, stderr);
-    fflush(stderr);
-}
-
 __STATIC_INLINE__ void ggml_tensor_set_f32_randn(struct ggml_tensor* tensor, std::shared_ptr<RNG> rng) {
     uint32_t n                        = (uint32_t)ggml_nelements(tensor);
     std::vector<float> random_numbers = rng->randn(n);
@@ -1312,7 +1307,39 @@ struct GGMLRunner {
             ggml_backend_cpu_set_n_threads(backend, n_threads);
         }
 
-        ggml_backend_graph_compute(backend, gf);
+        auto callback_eval = get_callback_eval();
+        
+        if(!callback_eval){
+            ggml_backend_graph_compute(backend, gf);
+        }else{
+            void * callback_eval_user_data = get_callback_eval_user_data();
+            for (int j0 = 0; j0 < gf->n_nodes; j0++) {
+                struct ggml_tensor * t = gf->nodes[j0];
+                
+                // check if the user needs data from this node
+                bool need = callback_eval(t, true, callback_eval_user_data);
+                
+                int j1 = j0;
+                
+                // determine the range [j0, j1] of nodes that can be computed together
+                while (!need && j1 < gf->n_nodes - 1) {
+                    t = gf->nodes[++j1];
+                    need = callback_eval(t, true, callback_eval_user_data);
+                }
+                
+                struct ggml_cgraph gv = ggml_graph_view(gf, j0, j1 + 1);
+                
+                ggml_backend_graph_compute_async(backend, &gv);
+                
+                if (need && !callback_eval(t, false, callback_eval_user_data)) {
+                    break;
+                }
+                
+                j0 = j1;
+            }
+            ggml_backend_synchronize(backend);
+        }
+
 #ifdef GGML_PERF
         ggml_graph_print(gf);
 #endif
@@ -1416,6 +1443,7 @@ class Linear : public UnaryBlock {
             wtype = GGML_TYPE_F32;
         }
         params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features);
+        ggml_set_name(params["weight"], (prefix + "weight").c_str());
         if (bias) {
             enum ggml_type wtype = GGML_TYPE_F32;  //(tensor_types.ypes.find(prefix + "bias") != tensor_types.end()) ? tensor_types[prefix + "bias"] : GGML_TYPE_F32;
             params["bias"]       = ggml_new_tensor_1d(ctx, wtype, out_features);
@@ -1579,6 +1607,8 @@ class LayerNorm : public UnaryBlock {
         if (elementwise_affine) {
             enum ggml_type wtype = GGML_TYPE_F32;  //(tensor_types.ypes.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
             params["weight"]     = ggml_new_tensor_1d(ctx, wtype, normalized_shape);
+            ggml_set_name(params["weight"], (prefix + "weight").c_str());
+
             if (bias) {
                 enum ggml_type wtype = GGML_TYPE_F32;  //(tensor_types.ypes.find(prefix + "bias") != tensor_types.end()) ? tensor_types[prefix + "bias"] : GGML_TYPE_F32;
                 params["bias"]       = ggml_new_tensor_1d(ctx, wtype, normalized_shape);
Original file line number	Diff line number	Diff line change
`@@ -661,6 +661,7 @@ class CLIPTextModel : public GGMLBlock {`
`661`	`661`	`if (version == OPEN_CLIP_VIT_BIGG_14) {`
`662`	`662`	`enum ggml_type wtype = GGML_TYPE_F32; // tensor_types.find(prefix + "text_projection") != tensor_types.end() ? tensor_types[prefix + "text_projection"] : GGML_TYPE_F32;`
`663`	`663`	`params["text_projection"] = ggml_new_tensor_2d(ctx, wtype, projection_dim, hidden_size);`
	`664`	`+ ggml_set_name(params["text_projection"], (prefix + "text_projection").c_str());`
`664`	`665`	`}`
`665`	`666`	`}`
`666`	`667`
`@@ -812,6 +813,7 @@ class CLIPProjection : public UnaryBlock {`
`812`	`813`	`} else {`
`813`	`814`	`params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features);`
`814`	`815`	`}`
	`816`	`+ ggml_set_name(params["weight"], (prefix + "weight").c_str());`
`815`	`817`	`}`
`816`	`818`
`817`	`819`	`public:`