ml-explore · sxy-trans-n · Jul 7, 2025 · Jul 7, 2025 · Jul 7, 2025 · Jul 8, 2025
diff --git a/Libraries/MLXLMCommon/Evaluate.swift b/Libraries/MLXLMCommon/Evaluate.swift
@@ -133,6 +133,8 @@ public struct ArgMaxSampler: LogitSampler {
 
 /// Sampler that uses `topP` and `temperature` to sample the logits.
 public struct TopPSampler: LogitSampler {
+    private static let randomStateLock = NSLock()
+
     let temp: MLXArray
     let topP: MLXArray
 
@@ -166,6 +168,10 @@ public struct TopPSampler: LogitSampler {
             logits = logits.asType(.float32)
         }
 
+        // Thread-safe sampling to prevent concurrent access to global random state
+        TopPSampler.randomStateLock.lock()
+        defer { TopPSampler.randomStateLock.unlock() }
+
         return compiledTopPSampling(logits, topP, temp)
     }
 }
@@ -174,6 +180,9 @@ public struct TopPSampler: LogitSampler {
 public struct CategoricalSampler: LogitSampler {
     let temp: MLXArray
 
+    // Thread-safe sampling using a lock to protect global state access
+    private static let randomStateLock = NSLock()
+
     public init(temperature: Float) {
         self.temp = MLXArray(temperature)
     }
@@ -185,7 +194,10 @@ public struct CategoricalSampler: LogitSampler {
     }()
 
     public func sample(logits: MLXArray) -> MLXArray {
-        compiledCategorical(logits, temp)
+        // Synchronize access to global random state to prevent concurrency issues
+        CategoricalSampler.randomStateLock.lock()
+        defer { CategoricalSampler.randomStateLock.unlock() }
+        return compiledCategorical(logits, temp)
     }
 }
 
@@ -267,6 +279,9 @@ public struct RepetitionContext: LogitProcessor {
 ///
 /// Note: this uses `asyncEval()` and there may be an async evaluation running after a call to `next()`.
 public struct TokenIterator: Sequence, IteratorProtocol {
+    // Global lock to protect MLX evaluation operations
+    private static let mlxEvalLock = NSLock()
+
     let model: any LanguageModel
     var state: LMOutput.State?
 
@@ -383,11 +398,19 @@ public struct TokenIterator: Sequence, IteratorProtocol {
             // evaluate the remainder of the prompt -- this primes the pump
             let token = step(previous: y)
             y = .init(tokens: token)
+
+            // Protect asyncEval with the global lock
+            TokenIterator.mlxEvalLock.lock()
             asyncEval(y.tokens)
+            TokenIterator.mlxEvalLock.unlock()
 
         case .logits(let result):
             y = .init(tokens: convertToToken(logits: result.logits))
+
+            // Protect asyncEval with the global lock
+            TokenIterator.mlxEvalLock.lock()
             asyncEval(y.tokens)
+            TokenIterator.mlxEvalLock.unlock()
 
             break
         }
@@ -434,7 +457,11 @@ public struct TokenIterator: Sequence, IteratorProtocol {
         // compute the next state and async eval the next token
         let token = step(previous: previousY)
         y = .init(tokens: token)
+
+        // Protect asyncEval with the global lock to prevent concurrent access
+        TokenIterator.mlxEvalLock.lock()
         asyncEval(token)
+        TokenIterator.mlxEvalLock.unlock()
 
         tokenCount += 1