Use 4-bit quantized models

DePasqualeOrg · DePasqualeOrg · commit 0d6d026997b9 · 2025-06-28T09:39:59.000+02:00
diff --git a/Libraries/MLXVLM/VLMModelFactory.swift b/Libraries/MLXVLM/VLMModelFactory.swift
@@ -169,14 +169,14 @@ public class VLMRegistry: AbstractModelRegistry, @unchecked Sendable {
         extraEOSTokens: ["<end_of_turn>"]
     )
 
-    static public let gemma3n_E2B_instruct = ModelConfiguration(
-        id: "mlx-community/gemma-3n-E2B-it-bf16",
+    static public let gemma3n_E2B = ModelConfiguration(
+        id: "mlx-community/gemma-3n-E2B-it-4bit",
         defaultPrompt: "Describe this image.",
         extraEOSTokens: ["<end_of_turn>"]
     )
 
-    static public let gemma3n_E4B_instruct = ModelConfiguration(
-        id: "mlx-community/gemma-3n-E4B-it-bf16",
+    static public let gemma3n_E4B = ModelConfiguration(
+        id: "mlx-community/gemma-3n-E4B-it-4bit",
         defaultPrompt: "Describe this image.",
         extraEOSTokens: ["<end_of_turn>"]
     )
@@ -196,8 +196,8 @@ public class VLMRegistry: AbstractModelRegistry, @unchecked Sendable {
             gemma3_4B_qat_4bit,
             gemma3_12B_qat_4bit,
             gemma3_27B_qat_4bit,
-            gemma3n_E2B_instruct,
-            gemma3n_E4B_instruct,
+            gemma3n_E2B,
+            gemma3n_E4B,
             smolvlm,
         ]
     }
diff --git a/Tools/llm-tool/LLMTool.swift b/Tools/llm-tool/LLMTool.swift
@@ -302,9 +302,9 @@ struct EvaluateCommand: AsyncParsableCommand {
         let modelFactory: ModelFactory
         let defaultModel: ModelConfiguration
 
-        // Always use VLM factory and gemma3n_E2B_instruct for testing
+        // Always use VLM factory and gemma3n_E2B for testing
         modelFactory = VLMModelFactory.shared
-        defaultModel = MLXVLM.VLMRegistry.gemma3n_E2B_instruct
+        defaultModel = MLXVLM.VLMRegistry.gemma3n_E2B
 
         // Load the model
         let modelContainer = try await memory.start { [args] in

Original file line number	Diff line number	Diff line change
`@@ -169,14 +169,14 @@ public class VLMRegistry: AbstractModelRegistry, @unchecked Sendable {`
`169`	`169`	`extraEOSTokens: ["<end_of_turn>"]`
`170`	`170`	`)`
`171`	`171`
`172`		`- static public let gemma3n_E2B_instruct = ModelConfiguration(`
`173`		`- id: "mlx-community/gemma-3n-E2B-it-bf16",`
	`172`	`+ static public let gemma3n_E2B = ModelConfiguration(`
	`173`	`+ id: "mlx-community/gemma-3n-E2B-it-4bit",`
`174`	`174`	`defaultPrompt: "Describe this image.",`
`175`	`175`	`extraEOSTokens: ["<end_of_turn>"]`
`176`	`176`	`)`
`177`	`177`
`178`		`- static public let gemma3n_E4B_instruct = ModelConfiguration(`
`179`		`- id: "mlx-community/gemma-3n-E4B-it-bf16",`
	`178`	`+ static public let gemma3n_E4B = ModelConfiguration(`
	`179`	`+ id: "mlx-community/gemma-3n-E4B-it-4bit",`
`180`	`180`	`defaultPrompt: "Describe this image.",`
`181`	`181`	`extraEOSTokens: ["<end_of_turn>"]`
`182`	`182`	`)`
`@@ -196,8 +196,8 @@ public class VLMRegistry: AbstractModelRegistry, @unchecked Sendable {`
`196`	`196`	`gemma3_4B_qat_4bit,`
`197`	`197`	`gemma3_12B_qat_4bit,`
`198`	`198`	`gemma3_27B_qat_4bit,`
`199`		`- gemma3n_E2B_instruct,`
`200`		`- gemma3n_E4B_instruct,`
	`199`	`+ gemma3n_E2B,`
	`200`	`+ gemma3n_E4B,`
`201`	`201`	`smolvlm,`
`202`	`202`	`]`
`203`	`203`	`}`