ml-explore
diff --git a/‎mlx_lm/MERGE.md
Lines changed: 0 additions & 50 deletions b/‎mlx_lm/MERGE.md
Lines changed: 0 additions & 50 deletions
diff --git a/‎mlx_lm/__main__.py
Lines changed: 0 additions & 1 deletion b/‎mlx_lm/__main__.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎mlx_lm/awq.py
Lines changed: 1 addition & 2 deletions b/‎mlx_lm/awq.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎mlx_lm/convert.py
Lines changed: 11 additions & 19 deletions b/‎mlx_lm/convert.py
Lines changed: 11 additions & 19 deletions
diff --git a/‎mlx_lm/dwq.py
Lines changed: 1 addition & 1 deletion b/‎mlx_lm/dwq.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎mlx_lm/fuse.py
Lines changed: 3 additions & 4 deletions b/‎mlx_lm/fuse.py
Lines changed: 3 additions & 4 deletions
diff --git a/‎mlx_lm/merge.py
Lines changed: 0 additions & 176 deletions b/‎mlx_lm/merge.py
Lines changed: 0 additions & 176 deletions
diff --git a/‎mlx_lm/tuner/utils.py
Lines changed: 14 additions & 10 deletions b/‎mlx_lm/tuner/utils.py
Lines changed: 14 additions & 10 deletions
@@ -14,7 +14,6 @@
         "fuse",
         "generate",
         "lora",
-        "merge",
         "server",
         "manage",
         "upload",
 
@@ -594,11 +594,10 @@ def main():
     )
 
     config = update_config(model, config)
-    weights = dict(tree_flatten(model.parameters()))
     save(
         args.mlx_path,
         model_path,
-        weights,
+        model,
         tokenizer,
         config,
         hf_repo=args.model,
 
@@ -6,7 +6,7 @@
 
 import mlx.core as mx
 import mlx.nn as nn
-from mlx.utils import tree_flatten
+from mlx.utils import tree_map_with_path
 
 from .utils import (
     dequantize_model,
@@ -120,44 +120,36 @@ def convert(
 
     if dtype is None:
         dtype = config.get("torch_dtype", None)
-    weights = dict(tree_flatten(model.parameters()))
     if dtype in MODEL_CONVERSION_DTYPES:
         print("[INFO] Using dtype:", dtype)
         dtype = getattr(mx, dtype)
+        cast_predicate = getattr(model, "cast_predicate", lambda _: True)
 
-        if hasattr(model, "cast_predicate"):
-            cast_predicate = model.cast_predicate()
-        else:
-            cast_predicate = lambda _: True
-        weights = {
-            k: (
-                v.astype(dtype)
-                if cast_predicate(k) and mx.issubdtype(v.dtype, mx.floating)
-                else v
-            )
-            for k, v in weights.items()
-        }
+        def set_dtype(k, v):
+            if cast_predicate(k) and mx.issubdtype(v.dtype, mx.floating):
+                return v.astype(dtype)
+            else:
+                return v
+
+        model.update(tree_map_with_path(set_dtype, model.parameters()))
 
     if quantize and dequantize:
         raise ValueError("Choose either quantize or dequantize, not both.")
 
     if quantize:
         print("[INFO] Quantizing")
-        model.load_weights(list(weights.items()))
-        weights, config = quantize_model(
+        model, config = quantize_model(
             model, config, q_group_size, q_bits, quant_predicate=quant_predicate
         )
 
     if dequantize:
         print("[INFO] Dequantizing")
         model = dequantize_model(model)
-        weights = dict(tree_flatten(model.parameters()))
 
-    del model
     save(
         mlx_path,
         model_path,
-        weights,
+        model,
         tokenizer,
         config,
         hf_repo=hf_path,
 
@@ -236,7 +236,7 @@ def main():
     save(
         args.mlx_path,
         model_path,
-        dict(tree_flatten(q_model.parameters())),
+        q_model,
         tokenizer,
         config,
         hf_repo=args.model,
 
@@ -86,18 +86,16 @@ def main() -> None:
         model = dequantize(model)
         config.pop("quantization", None)
 
-    weights = dict(tree_flatten(model.parameters()))
-
     save_path = Path(args.save_path)
     hf_path = args.hf_path or (args.model if not Path(args.model).exists() else None)
     save(
         save_path,
         model_path,
-        weights,
+        model,
         tokenizer,
         config,
         hf_repo=hf_path,
-        donate_weights=False,
+        donate_model=False,
     )
 
     if args.export_gguf:
@@ -106,6 +104,7 @@ def main() -> None:
             raise ValueError(
                 f"Model type {model_type} not supported for GGUF conversion."
             )
+        weights = dict(tree_flatten(model.parameters()))
         convert_to_gguf(model_path, weights, config, str(save_path / args.gguf_path))
 
     if args.upload_repo is not None:
 
@@ -263,20 +263,24 @@ def remove_lora_layers(model: nn.Module) -> nn.Module:
     return model
 
 
-def nparams(module):
-    if hasattr(module, "bits"):
-        n = 0 if not hasattr(module, "bias") else module.bias.size
-        return n + module.weight.size * 32 // module.bits
-    return sum(v.size for _, v in tree_flatten(module.parameters()))
-
-
-def print_trainable_parameters(model):
+def get_total_parameters(model):
     leaf_modules = tree_flatten(
         model.leaf_modules(), is_leaf=lambda m: isinstance(m, nn.Module)
     )
-    total_p = sum(nparams(m) for _, m in leaf_modules) / 10**6
+
+    def nparams(m):
+        if hasattr(m, "bits"):
+            n = 0 if not hasattr(m, "bias") else m.bias.size
+            return n + m.weight.size * 32 // m.bits
+        return sum(v.size for _, v in tree_flatten(m.parameters()))
+
+    return sum(nparams(m) for _, m in leaf_modules)
+
+
+def print_trainable_parameters(model):
+    total_p = get_total_parameters(model) / 1e6
     trainable_p = (
-        sum(v.size for _, v in tree_flatten(model.trainable_parameters())) / 10**6
+        sum(v.size for _, v in tree_flatten(model.trainable_parameters())) / 1e6
     )
     print(
         f"Trainable parameters: {(trainable_p * 100 / total_p):.3f}% "