diff --git a/Package.swift b/Package.swift
index fc28be1..0e49fdd 100644
--- a/Package.swift
+++ b/Package.swift
@@ -7,31 +7,17 @@ let package = Package(
     name: "swift-transformers",
     platforms: [.iOS(.v16), .macOS(.v13)],
     products: [
-        .library(name: "Transformers", targets: ["Tokenizers", "Generation", "Models"]),
-        .executable(name: "transformers", targets: ["TransformersCLI"]),
-        .executable(name: "hub-cli", targets: ["HubCLI"]),
+        .library(name: "Transformers", targets: ["Tokenizers"]),
     ],
     dependencies: [
         .package(url: "https://github.com/apple/swift-argument-parser.git", .upToNextMinor(from: "1.4.0")),
         .package(url: "https://github.com/johnmai-dev/Jinja", .upToNextMinor(from: "1.1.0"))
     ],
     targets: [
-        .executableTarget(
-            name: "TransformersCLI",
-            dependencies: [
-                "Models", "Generation", "Tokenizers",
-                .product(name: "ArgumentParser", package: "swift-argument-parser")]),
-        .executableTarget(name: "HubCLI", dependencies: ["Hub", .product(name: "ArgumentParser", package: "swift-argument-parser")]),
         .target(name: "Hub", resources: [.process("FallbackConfigs")]),
         .target(name: "Tokenizers", dependencies: ["Hub", .product(name: "Jinja", package: "Jinja")]),
-        .target(name: "TensorUtils"),
-        .target(name: "Generation", dependencies: ["Tokenizers", "TensorUtils"]),
-        .target(name: "Models", dependencies: ["Tokenizers", "Generation", "TensorUtils"]),
-        .testTarget(name: "TokenizersTests", dependencies: ["Tokenizers", "Models", "Hub"], resources: [.process("Resources"), .process("Vocabs")]),
+        .testTarget(name: "TokenizersTests", dependencies: ["Tokenizers", "Hub"], resources: [.process("Resources"), .process("Vocabs")]),
         .testTarget(name: "HubTests", dependencies: ["Hub"]),
         .testTarget(name: "PreTokenizerTests", dependencies: ["Tokenizers", "Hub"]),
-        .testTarget(name: "TensorUtilsTests", dependencies: ["TensorUtils", "Models", "Hub"], resources: [.process("Resources")]),
-        .testTarget(name: "NormalizerTests", dependencies: ["Tokenizers", "Hub"]),
-        .testTarget(name: "PostProcessorTests", dependencies: ["Tokenizers", "Hub"]),
     ]
 )
diff --git a/Sources/Generation/Generation.swift b/Sources/Generation/Generation.swift
deleted file mode 100644
index 6cfd8ab..0000000
--- a/Sources/Generation/Generation.swift
+++ /dev/null
@@ -1,109 +0,0 @@
-//
-//  Generation.swift
-//  
-//
-//  Created by Pedro Cuenca on 7/5/23.
-//
-
-import Tokenizers
-import CoreML
-import TensorUtils
-
-public enum GenerationMode {
-    case contrastiveSearch
-    case greedy
-    case sample
-    case beam
-    case groupBeam
-    case unsupported
-}
-
-public typealias InputTokens = [Int]
-public typealias GenerationOutput = [Int]
-
-/// A callable (a model, usually), that predicts the next token after a given sequence
-public typealias NextTokenModel = (InputTokens, GenerationConfig) -> any MLShapedArrayProtocol
-
-public typealias PredictionTokensCallback = (GenerationOutput) -> Void
-public typealias PredictionStringCallback = (String) -> Void
-
-// TODO: callbacks (for streaming)
-public protocol Generation {
-    func greedySearch(config: GenerationConfig, tokens: InputTokens, model: NextTokenModel, callback: PredictionTokensCallback?) async -> GenerationOutput
-    
-    func generate(config: GenerationConfig, prompt: String, model: NextTokenModel, tokenizer: Tokenizer, callback: PredictionStringCallback?) async -> String
-}
-
-public extension Generation {
-    func greedySearch(config: GenerationConfig, tokens: InputTokens, model: NextTokenModel, callback: PredictionTokensCallback? = nil) async -> GenerationOutput {
-        // Iterate until we find the eos token or reach the max length
-        // TODO: additional stopping criteria
-        var outputTokens = tokens
-        while outputTokens.count < config.maxLength {
-            let logits = model(outputTokens, config)
-            let (nextToken, _) = Math.argmax(logits)
-            if nextToken == config.eosTokenId { break }
-            outputTokens.append(nextToken)
-            callback?(outputTokens)
-        }
-        return outputTokens
-    }
-    
-    /// https://github.com/huggingface/transformers/blob/42017d82baa083da2bee3055fdac80c81ee97b8a/src/transformers/generation/utils.py#L1552
-    func sample(config: GenerationConfig, tokens: InputTokens, model: NextTokenModel, callback: PredictionTokensCallback? = nil) async -> GenerationOutput {
-        // Iterate until we find the eos token or reach the max length
-        // TODO: additional stopping criteria
-        var outputTokens = tokens
-        let logitsProcessor = LogitsProcessor(logitsWarpers: logitsWarpers(config: config))
-        while outputTokens.count < config.maxLength {
-            let outputs = model(outputTokens, config)
-            /// `floats` can be much faster than `scalars` for a vector with stride 1, as it uses `memcpy` in that case
-            let logits = (outputs as? MLShapedArraySlice<Float>)?.floats ?? outputs.scalars as! [Float]
-            let (indexes, processedLogits) = logitsProcessor(logits)
-            let nextToken = Math.sample(indexes: indexes, probs: Math.softmax(processedLogits))
-            if nextToken == config.eosTokenId { break }
-            outputTokens.append(nextToken)
-            callback?(outputTokens)
-        }
-        return outputTokens
-    }
-
-    func generate(config: GenerationConfig, prompt: String, model: NextTokenModel, tokenizer: Tokenizer, callback: PredictionStringCallback? = nil) async -> String {
-        let tokens = tokenizer.encode(text: prompt)
-        var generationConfig = config
-        generationConfig.maxLength = config.maxNewTokens + tokens.count
-
-        let output: GenerationOutput
-        switch generationConfig.generationMode {
-        case .greedy:
-            output = await greedySearch(config: generationConfig, tokens: tokens, model: model) { tokens in
-                callback?(tokenizer.decode(tokens: tokens))
-            }
-        case .sample:
-            output = await sample(config: generationConfig, tokens: tokens, model: model) { tokens in
-                callback?(tokenizer.decode(tokens: tokens))
-            }
-        default:
-            fatalError("Generation mode \(generationConfig.generationMode) not implemented yet")
-        }
-        
-        return tokenizer.decode(tokens: output)
-    }
-
-    private func logitsWarpers(config: GenerationConfig) -> [any LogitsWarper] {
-        var logitsWarpers = [any LogitsWarper]()
-        if config.temperature > 0 && config.temperature != 1 {
-            logitsWarpers.append(TemperatureLogitsWarper(temperature: Float(config.temperature)))
-        }
-        if config.topK > 0 {
-            logitsWarpers.append(TopKLogitsWarper(k: config.topK))
-        }
-        if config.topP < 1.0 {
-            logitsWarpers.append(TopPLogitsWarper(p: Float(config.topP)))
-        }
-        if config.repetitionPenalty != 1.0 {
-            logitsWarpers.append(RepetitionPenaltyWarper(penalty: config.repetitionPenalty))
-        }
-        return logitsWarpers
-    }
-}
diff --git a/Sources/Generation/GenerationConfig.swift b/Sources/Generation/GenerationConfig.swift
deleted file mode 100644
index a9eee7b..0000000
--- a/Sources/Generation/GenerationConfig.swift
+++ /dev/null
@@ -1,58 +0,0 @@
-//
-//  GenerationConfig.swift
-//  
-//
-//  Created by Pedro Cuenca on 7/5/23.
-//
-
-import Foundation
-
-/// Essentials taken from https://github.com/huggingface/transformers/blob/main/src/transformers/generation/configuration_utils.py
-public struct GenerationConfig {
-    public var maxLength = 20
-    public var maxNewTokens: Int
-    public var doSample = false
-    public var numBeams = 1
-    public var numBeamGroups = 1
-    public var penaltyAlpha: Double? = nil
-    public var temperature = 1.0
-    public var topK = 50
-    public var topP = 1.0
-    public var repetitionPenalty = 1.0
-    
-    public var padTokenId: Int? = nil
-    public var bosTokenId: Int? = nil
-    public var eosTokenId: Int? = nil
-    
-    public init(maxLength: Int = 20, maxNewTokens: Int, doSample: Bool = false, numBeams: Int = 1, numBeamGroups: Int = 1, penaltyAlpha: Double? = nil, temperature: Double = 1.0, topK: Int = 50, topP: Double = 1.0, repetitionPenalty: Double = 1.0) {
-        self.maxLength = maxLength
-        self.maxNewTokens = maxNewTokens
-        self.doSample = doSample
-        self.numBeams = numBeams
-        self.numBeamGroups = numBeamGroups
-        self.penaltyAlpha = penaltyAlpha
-        self.temperature = temperature
-        self.topK = topK
-        self.topP = topP
-        self.repetitionPenalty = repetitionPenalty
-    }
-}
-
-public extension GenerationConfig {
-    var generationMode: GenerationMode {
-        // Exclude this case from the pattern matching below
-        if topK > 1 && !doSample && penaltyAlpha != nil && penaltyAlpha! > 0 {
-            return .contrastiveSearch
-        }
-        
-        switch (numBeams, numBeamGroups, doSample) {
-        case (1, 1, false)   : return .greedy
-        case (1, 1, true)    : return .sample
-        case (2..., 1, false): return .beam
-        case (2..., 2..., _) : return .groupBeam
-        default              : return .unsupported
-        }
-    }
-}
-
-extension GenerationConfig: Decodable {}
diff --git a/Sources/Hub/Downloader.swift b/Sources/Hub/Downloader.swift
index 1f3b8c6..2ae2609 100644
--- a/Sources/Hub/Downloader.swift
+++ b/Sources/Hub/Downloader.swift
@@ -7,128 +7,6 @@
 //
 
 import Foundation
-import Combine
-
-class Downloader: NSObject, ObservableObject {
-    private(set) var destination: URL
-
-    enum DownloadState {
-        case notStarted
-        case downloading(Double)
-        case completed(URL)
-        case failed(Error)
-    }
-
-    enum DownloadError: Error {
-        case invalidDownloadLocation
-        case unexpectedError
-    }
-
-    private(set) lazy var downloadState: CurrentValueSubject<DownloadState, Never> = CurrentValueSubject(.notStarted)
-    private var stateSubscriber: Cancellable?
-
-    private var urlSession: URLSession? = nil
-
-    init(from url: URL, to destination: URL, using authToken: String? = nil, inBackground: Bool = false) {
-        self.destination = destination
-        super.init()
-        let sessionIdentifier = "swift-transformers.hub.downloader"
-
-        var config = URLSessionConfiguration.default
-        if inBackground {
-            config = URLSessionConfiguration.background(withIdentifier: sessionIdentifier)
-            config.isDiscretionary = false
-            config.sessionSendsLaunchEvents = true
-        }
-
-        self.urlSession = URLSession(configuration: config, delegate: self, delegateQueue: nil)
-
-        setupDownload(from: url, with: authToken)
-    }
-
-    private func setupDownload(from url: URL, with authToken: String?) {
-        downloadState.value = .downloading(0)
-        urlSession?.getAllTasks { tasks in
-            // If there's an existing pending background task with the same URL, let it proceed.
-            if let existing = tasks.filter({ $0.originalRequest?.url == url }).first {
-                switch existing.state {
-                case .running:
-                    // print("Already downloading \(url)")
-                    return
-                case .suspended:
-                    // print("Resuming suspended download task for \(url)")
-                    existing.resume()
-                    return
-                case .canceling:
-                    // print("Starting new download task for \(url), previous was canceling")
-                    break
-                case .completed:
-                    // print("Starting new download task for \(url), previous is complete but the file is no longer present (I think it's cached)")
-                    break
-                @unknown default:
-                    // print("Unknown state for running task; cancelling and creating a new one")
-                    existing.cancel()
-                }
-            }
-            var request = URLRequest(url: url)
-            if let authToken = authToken {
-                request.setValue("Bearer \(authToken)", forHTTPHeaderField: "Authorization")
-            }
-
-            self.urlSession?.downloadTask(with: request).resume()
-        }
-    }
-
-    @discardableResult
-    func waitUntilDone() throws -> URL {
-        // It's either this, or stream the bytes ourselves (add to a buffer, save to disk, etc; boring and finicky)
-        let semaphore = DispatchSemaphore(value: 0)
-        stateSubscriber = downloadState.sink { state in
-            switch state {
-            case .completed: semaphore.signal()
-            case .failed:    semaphore.signal()
-            default:         break
-            }
-        }
-        semaphore.wait()
-
-        switch downloadState.value {
-        case .completed(let url): return url
-        case .failed(let error):  throw error
-        default:                  throw DownloadError.unexpectedError
-        }
-    }
-
-    func cancel() {
-        urlSession?.invalidateAndCancel()
-    }
-}
-
-extension Downloader: URLSessionDownloadDelegate {
-    func urlSession(_: URLSession, downloadTask: URLSessionDownloadTask, didWriteData _: Int64, totalBytesWritten: Int64, totalBytesExpectedToWrite: Int64) {
-        downloadState.value = .downloading(Double(totalBytesWritten) / Double(totalBytesExpectedToWrite))
-    }
-
-    func urlSession(_: URLSession, downloadTask _: URLSessionDownloadTask, didFinishDownloadingTo location: URL) {
-        do {
-            // If the downloaded file already exists on the filesystem, overwrite it
-            try FileManager.default.moveDownloadedFile(from: location, to: self.destination)
-            downloadState.value = .completed(destination)
-        } catch {
-            downloadState.value = .failed(error)
-        }
-    }
-
-    func urlSession(_ session: URLSession, task: URLSessionTask, didCompleteWithError error: Error?) {
-        if let error = error {
-            downloadState.value = .failed(error)
-//        } else if let response = task.response as? HTTPURLResponse {
-//            print("HTTP response status code: \(response.statusCode)")
-//            let headers = response.allHeaderFields
-//            print("HTTP response headers: \(headers)")
-        }
-    }
-}
 
 extension FileManager {
     func moveDownloadedFile(from srcURL: URL, to dstURL: URL) throws {
diff --git a/Sources/Hub/Hub.swift b/Sources/Hub/Hub.swift
index 4116dcb..0510cbd 100644
--- a/Sources/Hub/Hub.swift
+++ b/Sources/Hub/Hub.swift
@@ -112,15 +112,6 @@ public class LanguageModelConfigurationFromHub {
 
     private var configPromise: Task<Configurations, Error>? = nil
 
-    public init(
-        modelName: String,
-        hubApi: HubApi = .shared
-    ) {
-        self.configPromise = Task.init {
-            return try await self.loadConfig(modelName: modelName, hubApi: hubApi)
-        }
-    }
-    
     public init(
         modelFolder: URL,
         hubApi: HubApi = .shared
@@ -173,17 +164,6 @@ public class LanguageModelConfigurationFromHub {
         }
     }
 
-    func loadConfig(
-        modelName: String,
-        hubApi: HubApi = .shared
-    ) async throws -> Configurations {
-        let filesToDownload = ["config.json", "tokenizer_config.json", "tokenizer.json"]
-        let repo = Hub.Repo(id: modelName)
-        let downloadedModelFolder = try await hubApi.snapshot(from: repo, matching: filesToDownload)
-
-        return try await loadConfig(modelFolder: downloadedModelFolder, hubApi: hubApi)
-    }
-    
     func loadConfig(
         modelFolder: URL,
         hubApi: HubApi = .shared
diff --git a/Sources/Hub/HubApi.swift b/Sources/Hub/HubApi.swift
index 6e687ac..c31444c 100644
--- a/Sources/Hub/HubApi.swift
+++ b/Sources/Hub/HubApi.swift
@@ -6,8 +6,6 @@
 //
 
 import Foundation
-import CryptoKit
-import os
 
 public struct HubApi {
     var downloadBase: URL
@@ -17,7 +15,7 @@ public struct HubApi {
 
     public typealias RepoType = Hub.RepoType
     public typealias Repo = Hub.Repo
-    
+
     public init(downloadBase: URL? = nil, hfToken: String? = nil, endpoint: String = "https://huggingface.co", useBackgroundSession: Bool = false) {
         self.hfToken = hfToken ?? Self.hfTokenFromEnv()
         if let downloadBase {
@@ -29,10 +27,8 @@ public struct HubApi {
         self.endpoint = endpoint
         self.useBackgroundSession = useBackgroundSession
     }
-    
+
     public static let shared = HubApi()
-    
-    private static let logger = Logger()
 }
 
 private extension HubApi {
@@ -77,76 +73,6 @@ public extension HubApi {
     struct SiblingsResponse: Codable {
         let siblings: [Sibling]
     }
-        
-    /// Throws error if the response code is not 20X
-    func httpGet(for url: URL) async throws -> (Data, HTTPURLResponse) {
-        var request = URLRequest(url: url)
-        if let hfToken = hfToken {
-            request.setValue("Bearer \(hfToken)", forHTTPHeaderField: "Authorization")
-        }
-        let (data, response) = try await URLSession.shared.data(for: request)
-        guard let response = response as? HTTPURLResponse else { throw Hub.HubClientError.unexpectedError }
-        
-        switch response.statusCode {
-        case 200..<300: break
-        case 400..<500: throw Hub.HubClientError.authorizationRequired
-        default: throw Hub.HubClientError.httpStatusCode(response.statusCode)
-        }
-
-        return (data, response)
-    }
-    
-    /// Throws error if page does not exist or is not accessible.
-    /// Allows relative redirects but ignores absolute ones for LFS files.
-    func httpHead(for url: URL) async throws -> (Data, HTTPURLResponse) {
-        var request = URLRequest(url: url)
-        request.httpMethod = "HEAD"
-        if let hfToken = hfToken {
-            request.setValue("Bearer \(hfToken)", forHTTPHeaderField: "Authorization")
-        }
-        request.setValue("identity", forHTTPHeaderField: "Accept-Encoding")
-        
-        let redirectDelegate = RedirectDelegate()
-        let session = URLSession(configuration: .default, delegate: redirectDelegate, delegateQueue: nil)
-        
-        let (data, response) = try await session.data(for: request)
-        guard let response = response as? HTTPURLResponse else { throw Hub.HubClientError.unexpectedError }
-
-        switch response.statusCode {
-        case 200..<400: break // Allow redirects to pass through to the redirect delegate
-        case 400..<500: throw Hub.HubClientError.authorizationRequired
-        default: throw Hub.HubClientError.httpStatusCode(response.statusCode)
-        }
-                
-        return (data, response)
-    }
-    
-    func getFilenames(from repo: Repo, matching globs: [String] = []) async throws -> [String] {
-        // Read repo info and only parse "siblings"
-        let url = URL(string: "\(endpoint)/api/\(repo.type)/\(repo.id)")!
-        let (data, _) = try await httpGet(for: url)
-        let response = try JSONDecoder().decode(SiblingsResponse.self, from: data)
-        let filenames = response.siblings.map { $0.rfilename }
-        guard globs.count > 0 else { return filenames }
-        
-        var selected: Set<String> = []
-        for glob in globs {
-            selected = selected.union(filenames.matching(glob: glob))
-        }
-        return Array(selected)
-    }
-    
-    func getFilenames(from repoId: String, matching globs: [String] = []) async throws -> [String] {
-        return try await getFilenames(from: Repo(id: repoId), matching: globs)
-    }
-    
-    func getFilenames(from repo: Repo, matching glob: String) async throws -> [String] {
-        return try await getFilenames(from: repo, matching: [glob])
-    }
-    
-    func getFilenames(from repoId: String, matching glob: String) async throws -> [String] {
-        return try await getFilenames(from: Repo(id: repoId), matching: [glob])
-    }
 }
 
 /// Additional Errors
@@ -182,275 +108,11 @@ public extension HubApi {
     }
 }
 
-/// Whoami
-public extension HubApi {
-    func whoami() async throws -> Config {
-        guard hfToken != nil else { throw Hub.HubClientError.authorizationRequired }
-        
-        let url = URL(string: "\(endpoint)/api/whoami-v2")!
-        let (data, _) = try await httpGet(for: url)
-
-        let parsed = try JSONSerialization.jsonObject(with: data, options: [])
-        guard let dictionary = parsed as? [NSString: Any] else { throw Hub.HubClientError.parse }
-        return Config(dictionary)
-    }
-}
-
 /// Snaphsot download
 public extension HubApi {
     func localRepoLocation(_ repo: Repo) -> URL {
         downloadBase.appending(component: repo.type.rawValue).appending(component: repo.id)
     }
-    
-    struct HubFileDownloader {
-        let repo: Repo
-        let repoDestination: URL
-        let relativeFilename: String
-        let hfToken: String?
-        let endpoint: String?
-        let backgroundSession: Bool
-        
-        let sha256Pattern = "^[0-9a-f]{64}$"
-        let commitHashPattern = "^[0-9a-f]{40}$"
-
-        var source: URL {
-            // https://huggingface.co/coreml-projects/Llama-2-7b-chat-coreml/resolve/main/tokenizer.json?download=true
-            var url = URL(string: endpoint ?? "https://huggingface.co")!
-            if repo.type != .models {
-                url = url.appending(component: repo.type.rawValue)
-            }
-            url = url.appending(path: repo.id)
-            url = url.appending(path: "resolve/main") // TODO: revisions
-            url = url.appending(path: relativeFilename)
-            return url
-        }
-        
-        var destination: URL {
-            repoDestination.appending(path: relativeFilename)
-        }
-        
-        var metadataDestination: URL {
-            repoDestination
-                .appendingPathComponent(".cache")
-                .appendingPathComponent("huggingface")
-                .appendingPathComponent("download")
-        }
-        
-        var downloaded: Bool {
-            FileManager.default.fileExists(atPath: destination.path)
-        }
-        
-        func prepareDestination() throws {
-            let directoryURL = destination.deletingLastPathComponent()
-            try FileManager.default.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil)
-        }
-        
-        func prepareMetadataDestination() throws {
-            try FileManager.default.createDirectory(at: metadataDestination, withIntermediateDirectories: true, attributes: nil)
-        }
-        
-        /// Reads metadata about a file in the local directory related to a download process.
-        ///
-        /// Reference: https://github.com/huggingface/huggingface_hub/blob/b2c9a148d465b43ab90fab6e4ebcbbf5a9df27d4/src/huggingface_hub/_local_folder.py#L263
-        ///
-        /// - Parameters:
-        ///   - localDir: The local directory where metadata files are downloaded.
-        ///   - filePath: The path of the file for which metadata is being read.
-        /// - Throws: An `EnvironmentError.invalidMetadataError` if the metadata file is invalid and cannot be removed.
-        /// - Returns: A `LocalDownloadFileMetadata` object if the metadata file exists and is valid, or `nil` if the file is missing or invalid.
-        func readDownloadMetadata(localDir: URL, filePath: String) throws -> LocalDownloadFileMetadata? {
-            let metadataPath = localDir.appending(path: filePath)
-            if FileManager.default.fileExists(atPath: metadataPath.path) {
-                do {
-                    let contents = try String(contentsOf: metadataPath, encoding: .utf8)
-                    let lines = contents.components(separatedBy: .newlines)
-                    
-                    guard lines.count >= 3 else {
-                        throw EnvironmentError.invalidMetadataError("Metadata file is missing required fields.")
-                    }
-                    
-                    let commitHash = lines[0].trimmingCharacters(in: .whitespacesAndNewlines)
-                    let etag = lines[1].trimmingCharacters(in: .whitespacesAndNewlines)
-                    guard let timestamp = Double(lines[2].trimmingCharacters(in: .whitespacesAndNewlines)) else {
-                        throw EnvironmentError.invalidMetadataError("Missing or invalid timestamp.")
-                    }
-                    let timestampDate = Date(timeIntervalSince1970: timestamp)
-                            
-                    // TODO: check if file hasn't been modified since the metadata was saved
-                    // Reference: https://github.com/huggingface/huggingface_hub/blob/2fdc6f48ef5e6b22ee9bcdc1945948ac070da675/src/huggingface_hub/_local_folder.py#L303
-                    
-                    return LocalDownloadFileMetadata(commitHash: commitHash, etag: etag, filename: filePath, timestamp: timestampDate)
-                } catch {
-                    do {
-                        logger.warning("Invalid metadata file \(metadataPath): \(error). Removing it from disk and continue.")
-                        try FileManager.default.removeItem(at: metadataPath)
-                    } catch {
-                        throw EnvironmentError.invalidMetadataError("Could not remove corrupted metadata file \(metadataPath): \(error)")
-                    }
-                    return nil
-                }
-            }
-                
-            // metadata file does not exist
-            return nil
-        }
-        
-        func isValidHash(hash: String, pattern: String) -> Bool {
-            let regex = try? NSRegularExpression(pattern: pattern)
-            let range = NSRange(location: 0, length: hash.utf16.count)
-            return regex?.firstMatch(in: hash, options: [], range: range) != nil
-        }
-        
-        /// Reference: https://github.com/huggingface/huggingface_hub/blob/b2c9a148d465b43ab90fab6e4ebcbbf5a9df27d4/src/huggingface_hub/_local_folder.py#L391
-        func writeDownloadMetadata(commitHash: String, etag: String, metadataRelativePath: String) throws {
-            let metadataContent = "\(commitHash)\n\(etag)\n\(Date().timeIntervalSince1970)\n"
-            let metadataPath = metadataDestination.appending(component: metadataRelativePath)
-            
-            do {
-                try FileManager.default.createDirectory(at: metadataPath.deletingLastPathComponent(), withIntermediateDirectories: true)
-                try metadataContent.write(to: metadataPath, atomically: true, encoding: .utf8)
-            } catch {
-                throw EnvironmentError.invalidMetadataError("Failed to write metadata file \(metadataPath)")
-            }
-        }
-        
-        func computeFileHash(file url: URL) throws -> String {
-            // Open file for reading
-            guard let fileHandle = try? FileHandle(forReadingFrom: url) else {
-                throw Hub.HubClientError.unexpectedError
-            }
-            
-            defer {
-                try? fileHandle.close()
-            }
-            
-            var hasher = SHA256()
-            let chunkSize = 1024 * 1024 // 1MB chunks
-            
-            while autoreleasepool(invoking: {
-                let nextChunk = try? fileHandle.read(upToCount: chunkSize)
-                
-                guard let nextChunk,
-                        !nextChunk.isEmpty
-                else {
-                    return false
-                }
-                
-                hasher.update(data: nextChunk)
-                
-                return true
-            }) { }
-            
-            let digest = hasher.finalize()
-            return digest.map { String(format: "%02x", $0) }.joined()
-        }
-        
-        
-        // Note we go from Combine in Downloader to callback-based progress reporting
-        // We'll probably need to support Combine as well to play well with Swift UI
-        // (See for example PipelineLoader in swift-coreml-diffusers)
-        @discardableResult
-        func download(progressHandler: @escaping (Double) -> Void) async throws -> URL {
-            let metadataRelativePath = "\(relativeFilename).metadata"
-                        
-            let localMetadata = try readDownloadMetadata(localDir: metadataDestination, filePath: metadataRelativePath)
-            let remoteMetadata = try await HubApi.shared.getFileMetadata(url: source)
-            
-            let localCommitHash = localMetadata?.commitHash ?? ""
-            let remoteCommitHash = remoteMetadata.commitHash ?? ""
-            
-            // Local file exists + metadata exists + commit_hash matches => return file
-            if isValidHash(hash: remoteCommitHash, pattern: commitHashPattern) && downloaded && localMetadata != nil && localCommitHash == remoteCommitHash {
-                return destination
-            }
-            
-            // From now on, etag, commit_hash, url and size are not empty
-            guard let remoteCommitHash = remoteMetadata.commitHash,
-                  let remoteEtag = remoteMetadata.etag,
-                  remoteMetadata.location != "" else {
-                throw EnvironmentError.invalidMetadataError("File metadata must have been retrieved from server")
-            }
-            
-            // Local file exists => check if it's up-to-date
-            if downloaded {
-                // etag matches => update metadata and return file
-                if localMetadata?.etag == remoteEtag {
-                    try writeDownloadMetadata(commitHash: remoteCommitHash, etag: remoteEtag, metadataRelativePath: metadataRelativePath)
-                    return destination
-                }
-                
-                // etag is a sha256
-                // => means it's an LFS file (large)
-                // => let's compute local hash and compare
-                // => if match, update metadata and return file
-                if isValidHash(hash: remoteEtag, pattern: sha256Pattern) {
-                    let fileHash = try computeFileHash(file: destination)
-                    if fileHash == remoteEtag {
-                        try writeDownloadMetadata(commitHash: remoteCommitHash, etag: remoteEtag, metadataRelativePath: metadataRelativePath)
-                        return destination
-                    }
-                }
-            }
-            
-            // Otherwise, let's download the file!
-            try prepareDestination()
-            try prepareMetadataDestination()
-
-            let downloader = Downloader(from: source, to: destination, using: hfToken, inBackground: backgroundSession)
-            let downloadSubscriber = downloader.downloadState.sink { state in
-                if case .downloading(let progress) = state {
-                    progressHandler(progress)
-                }
-            }
-            _ = try withExtendedLifetime(downloadSubscriber) {
-                try downloader.waitUntilDone()
-            }
-            
-            try writeDownloadMetadata(commitHash: remoteCommitHash, etag: remoteEtag, metadataRelativePath: metadataRelativePath)
-            
-            return destination
-        }
-    }
-
-    @discardableResult
-    func snapshot(from repo: Repo, matching globs: [String] = [], progressHandler: @escaping (Progress) -> Void = { _ in }) async throws -> URL {
-        let filenames = try await getFilenames(from: repo, matching: globs)
-        let progress = Progress(totalUnitCount: Int64(filenames.count))
-        let repoDestination = localRepoLocation(repo)
-        for filename in filenames {
-            let fileProgress = Progress(totalUnitCount: 100, parent: progress, pendingUnitCount: 1)
-            let downloader = HubFileDownloader(
-                repo: repo,
-                repoDestination: repoDestination,
-                relativeFilename: filename,
-                hfToken: hfToken,
-                endpoint: endpoint,
-                backgroundSession: useBackgroundSession
-            )
-            try await downloader.download { fractionDownloaded in
-                fileProgress.completedUnitCount = Int64(100 * fractionDownloaded)
-                progressHandler(progress)
-            }
-            fileProgress.completedUnitCount = 100
-        }
-        progressHandler(progress)
-        return repoDestination
-    }
-    
-    @discardableResult
-    func snapshot(from repoId: String, matching globs: [String] = [], progressHandler: @escaping (Progress) -> Void = { _ in }) async throws -> URL {
-        return try await snapshot(from: Repo(id: repoId), matching: globs, progressHandler: progressHandler)
-    }
-    
-    @discardableResult
-    func snapshot(from repo: Repo, matching glob: String, progressHandler: @escaping (Progress) -> Void = { _ in }) async throws -> URL {
-        return try await snapshot(from: repo, matching: [glob], progressHandler: progressHandler)
-    }
-    
-    @discardableResult
-    func snapshot(from repoId: String, matching glob: String, progressHandler: @escaping (Progress) -> Void = { _ in }) async throws -> URL {
-        return try await snapshot(from: Repo(id: repoId), matching: [glob], progressHandler: progressHandler)
-    }
 }
 
 /// Metadata
@@ -490,146 +152,4 @@ public extension HubApi {
         guard let etag = etag else { return nil }
         return etag.trimmingPrefix("W/").trimmingCharacters(in: CharacterSet(charactersIn: "\""))
     }
-    
-    func getFileMetadata(url: URL) async throws -> FileMetadata {
-        let (_, response) = try await httpHead(for: url)
-        let location = response.statusCode == 302 ? response.value(forHTTPHeaderField: "Location") : response.url?.absoluteString
-        
-        return FileMetadata(
-            commitHash: response.value(forHTTPHeaderField: "X-Repo-Commit"),
-            etag: normalizeEtag(
-                (response.value(forHTTPHeaderField: "X-Linked-Etag")) ?? (response.value(forHTTPHeaderField: "Etag"))
-            ),
-            location: location ?? url.absoluteString,
-            size: Int(response.value(forHTTPHeaderField: "X-Linked-Size") ?? response.value(forHTTPHeaderField: "Content-Length") ?? "")
-        )
-    }
-    
-    func getFileMetadata(from repo: Repo, matching globs: [String] = []) async throws -> [FileMetadata] {
-        let files = try await getFilenames(from: repo, matching: globs)
-        let url = URL(string: "\(endpoint)/\(repo.id)/resolve/main")! // TODO: revisions
-        var selectedMetadata: Array<FileMetadata> = []
-        for file in files {
-            let fileURL = url.appending(path: file)
-            selectedMetadata.append(try await getFileMetadata(url: fileURL))
-        }
-        return selectedMetadata
-    }
-    
-    func getFileMetadata(from repoId: String, matching globs: [String] = []) async throws -> [FileMetadata] {
-        return try await getFileMetadata(from: Repo(id: repoId), matching: globs)
-    }
-    
-    func getFileMetadata(from repo: Repo, matching glob: String) async throws -> [FileMetadata] {
-        return try await getFileMetadata(from: repo, matching: [glob])
-    }
-    
-    func getFileMetadata(from repoId: String, matching glob: String) async throws -> [FileMetadata] {
-        return try await getFileMetadata(from: Repo(id: repoId), matching: [glob])
-    }
-}
-
-/// Stateless wrappers that use `HubApi` instances
-public extension Hub {
-    static func getFilenames(from repo: Hub.Repo, matching globs: [String] = []) async throws -> [String] {
-        return try await HubApi.shared.getFilenames(from: repo, matching: globs)
-    }
-    
-    static func getFilenames(from repoId: String, matching globs: [String] = []) async throws -> [String] {
-        return try await HubApi.shared.getFilenames(from: Repo(id: repoId), matching: globs)
-    }
-    
-    static func getFilenames(from repo: Repo, matching glob: String) async throws -> [String] {
-        return try await HubApi.shared.getFilenames(from: repo, matching: glob)
-    }
-    
-    static func getFilenames(from repoId: String, matching glob: String) async throws -> [String] {
-        return try await HubApi.shared.getFilenames(from: Repo(id: repoId), matching: glob)
-    }
-    
-    static func snapshot(from repo: Repo, matching globs: [String] = [], progressHandler: @escaping (Progress) -> Void = { _ in }) async throws -> URL {
-        return try await HubApi.shared.snapshot(from: repo, matching: globs, progressHandler: progressHandler)
-    }
-    
-    static func snapshot(from repoId: String, matching globs: [String] = [], progressHandler: @escaping (Progress) -> Void = { _ in }) async throws -> URL {
-        return try await HubApi.shared.snapshot(from: Repo(id: repoId), matching: globs, progressHandler: progressHandler)
-    }
-    
-    static func snapshot(from repo: Repo, matching glob: String, progressHandler: @escaping (Progress) -> Void = { _ in }) async throws -> URL {
-        return try await HubApi.shared.snapshot(from: repo, matching: glob, progressHandler: progressHandler)
-    }
-    
-    static func snapshot(from repoId: String, matching glob: String, progressHandler: @escaping (Progress) -> Void = { _ in }) async throws -> URL {
-        return try await HubApi.shared.snapshot(from: Repo(id: repoId), matching: glob, progressHandler: progressHandler)
-    }
-    
-    static func whoami(token: String) async throws -> Config {
-        return try await HubApi(hfToken: token).whoami()
-    }
-    
-    static func getFileMetadata(fileURL: URL) async throws -> HubApi.FileMetadata {
-        return try await HubApi.shared.getFileMetadata(url: fileURL)
-    }
-    
-    static func getFileMetadata(from repo: Repo, matching globs: [String] = []) async throws -> [HubApi.FileMetadata] {
-        return try await HubApi.shared.getFileMetadata(from: repo, matching: globs)
-    }
-    
-    static func getFileMetadata(from repoId: String, matching globs: [String] = []) async throws -> [HubApi.FileMetadata] {
-        return try await HubApi.shared.getFileMetadata(from: Repo(id: repoId), matching: globs)
-    }
-    
-    static func getFileMetadata(from repo: Repo, matching glob: String) async throws -> [HubApi.FileMetadata] {
-        return try await HubApi.shared.getFileMetadata(from: repo, matching: [glob])
-    }
-    
-    static func getFileMetadata(from repoId: String, matching glob: String) async throws -> [HubApi.FileMetadata] {
-        return try await HubApi.shared.getFileMetadata(from: Repo(id: repoId), matching: [glob])
-    }
-}
-
-public extension [String] {
-    func matching(glob: String) -> [String] {
-        filter { fnmatch(glob, $0, 0) == 0 }
-    }
-}
-
-/// Only allow relative redirects and reject others
-/// Reference: https://github.com/huggingface/huggingface_hub/blob/b2c9a148d465b43ab90fab6e4ebcbbf5a9df27d4/src/huggingface_hub/file_download.py#L258
-private class RedirectDelegate: NSObject, URLSessionTaskDelegate {
-    func urlSession(_ session: URLSession, task: URLSessionTask, willPerformHTTPRedirection response: HTTPURLResponse, newRequest request: URLRequest, completionHandler: @escaping (URLRequest?) -> Void) {
-        // Check if it's a redirect status code (300-399)
-        if (300...399).contains(response.statusCode) {
-            // Get the Location header
-            if let locationString = response.value(forHTTPHeaderField: "Location"),
-               let locationUrl = URL(string: locationString) {
-                
-                // Check if it's a relative redirect (no host component)
-                if locationUrl.host == nil {
-                    // For relative redirects, construct the new URL using the original request's base
-                    if let originalUrl = task.originalRequest?.url,
-                       var components = URLComponents(url: originalUrl, resolvingAgainstBaseURL: true) {
-                        // Update the path component with the relative path
-                        components.path = locationUrl.path
-                        components.query = locationUrl.query
-                        
-                        // Create new request with the resolved URL
-                        if let resolvedUrl = components.url {
-                            var newRequest = URLRequest(url: resolvedUrl)
-                            // Copy headers from original request
-                            task.originalRequest?.allHTTPHeaderFields?.forEach { key, value in
-                                newRequest.setValue(value, forHTTPHeaderField: key)
-                            }
-                            newRequest.setValue(resolvedUrl.absoluteString, forHTTPHeaderField: "Location")
-                            completionHandler(newRequest)
-                            return
-                        }
-                    }
-                }
-            }
-        }
-        
-        // For all other cases (non-redirects or absolute redirects), prevent redirect
-        completionHandler(nil)
-    }
 }
diff --git a/Sources/HubCLI/HubCLI.swift b/Sources/HubCLI/HubCLI.swift
deleted file mode 100644
index fb0cc72..0000000
--- a/Sources/HubCLI/HubCLI.swift
+++ /dev/null
@@ -1,96 +0,0 @@
-import ArgumentParser
-import Foundation
-
-import Hub
-
-let defaultTokenLocation = NSString("~/.cache/huggingface/token").expandingTildeInPath
-
-@main
-struct HubCLI: AsyncParsableCommand {
-    static let configuration = CommandConfiguration(
-        abstract: "Hugging Face Hub Client",
-        version: "0.0.1",
-        subcommands: [Download.self, Whoami.self]
-    )
-}
-
-protocol SubcommandWithToken {
-
-    var token: String? { get }
-}
-
-extension SubcommandWithToken {
-    var hfToken: String? {
-        if let token = token { return token }
-        return try? String(contentsOfFile: defaultTokenLocation, encoding: .utf8)
-    }
-}
-
-struct Download: AsyncParsableCommand, SubcommandWithToken {
-    static let configuration = CommandConfiguration(abstract: "Snapshot download from the Hub")
-
-    enum RepoType: String, ExpressibleByArgument {
-        case model
-        case dataset
-        case space
-        
-        var asHubApiRepoType: HubApi.RepoType {
-            switch self {
-            case .model: return .models
-            case .dataset: return .datasets
-            case .space: return .spaces
-            }
-        }
-    }
-    
-    @Argument(help: "Repo ID")
-    var repo: String
-
-    @Option(help: "Repo type")
-    var repoType: RepoType = .model
-
-    @Option(help: "Glob patterns for files to include")
-    var include: [String] = []
-    
-    @Option(help: "Hugging Face token. If empty, will attempt to read from the filesystem at \(defaultTokenLocation)")
-    var token: String? = nil
-        
-    func run() async throws {
-        let hubApi = HubApi(hfToken: hfToken)
-        let repo = Hub.Repo(id: repo, type: repoType.asHubApiRepoType)
-        let downloadedTo = try await hubApi.snapshot(from: repo, matching: include) { progress in
-            DispatchQueue.main.async {
-                let totalPercent = 100 * progress.fractionCompleted
-                print("\(progress.completedUnitCount)/\(progress.totalUnitCount) \(totalPercent.formatted("%.02f"))%", terminator: "\r")
-                fflush(stdout)
-            }
-        }
-        print("Snapshot downloaded to: \(downloadedTo.path)")
-    }
-}
-
-struct Whoami: AsyncParsableCommand, SubcommandWithToken {
-    static let configuration = CommandConfiguration(abstract: "whoami")
-         
-    @Option(help: "Hugging Face token. If empty, will attempt to read from the filesystem at \(defaultTokenLocation)")
-    var token: String? = nil
-    
-    func run() async throws {
-        let hubApi = HubApi(hfToken: hfToken)
-        let userInfo = try await hubApi.whoami()
-        if let name = userInfo.name?.stringValue,
-           let fullname = userInfo.fullname?.stringValue,
-           let email = userInfo.email?.stringValue
-        {
-            print("\(name) (\(fullname) <\(email)>)")
-        } else {
-            print("Cannot retrieve user info")
-        }
-    }
-}
-
-extension Double {
-    func formatted(_ format: String) -> String {
-        return String(format: "\(format)", self)
-    }
-}
diff --git a/Sources/Models/LanguageModel.swift b/Sources/Models/LanguageModel.swift
deleted file mode 100644
index 22ba7aa..0000000
--- a/Sources/Models/LanguageModel.swift
+++ /dev/null
@@ -1,219 +0,0 @@
-//
-//  LanguageModel.swift
-//  
-//
-//  Created by Pedro Cuenca on 7/5/23.
-//
-
-import CoreML
-import Tokenizers
-import Generation
-import Hub
-
-public class LanguageModel {
-    public let model: MLModel
-    
-    public let minContextLength: Int
-    public let maxContextLength: Int
-    
-    let input_ids = "input_ids"
-    let attention_mask = "attention_mask"
-    
-    struct Configurations {
-        var modelConfig: Config
-        var tokenizerConfig: Config?
-        var tokenizerData: Config
-    }
-
-    private var configuration: LanguageModelConfigurationFromHub? = nil
-    private var _tokenizer: Tokenizer? = nil
-
-    public required init(model: MLModel) {
-        self.model = model
-        
-        // We assume inputs named "input_ids" with shape (1, seq_length)
-        // Perhaps we should convert to vectors of shape (seq_length) and use sequenceConstraint instead of shapeConstraint
-        let inputDescription = model.modelDescription.inputDescriptionsByName["input_ids"]
-        
-        guard let shapeConstraint = inputDescription?.multiArrayConstraint?.shapeConstraint else {
-            fatalError("Cannot obtain shape information")
-        }
-        
-        switch shapeConstraint.type {
-        case .enumerated:
-            // TODO: support a set of fixed shapes (keeping the first one here)
-            minContextLength = shapeConstraint.enumeratedShapes[0][1].intValue
-            maxContextLength = minContextLength
-        case .range:
-            let range = inputDescription?.multiArrayConstraint?.shapeConstraint.sizeRangeForDimension[1] as? NSRange
-            minContextLength = range?.location ?? 1
-            maxContextLength = range?.length ?? 128
-        case .unspecified:
-            minContextLength = 128
-            maxContextLength = 128
-        @unknown default:
-            minContextLength = 128
-            maxContextLength = 128
-        }
-                
-        self.configuration = LanguageModelConfigurationFromHub(modelName: modelName)
-    }
-}
-
-public extension LanguageModel {
-    static func loadCompiled(url: URL, computeUnits: MLComputeUnits = .cpuAndGPU) throws -> LanguageModel {
-        let config = MLModelConfiguration()
-        config.computeUnits = computeUnits
-        let model = try MLModel(contentsOf: url, configuration: config)
-        return LanguageModel(model: model)
-    }
-}
-
-public extension LanguageModel {
-    var description: String {
-        if let description = model.modelDescription.metadata[MLModelMetadataKey.description] as? String,
-           !description.isEmpty {
-            return description
-        }
-        return model.configuration.modelDisplayName ?? ""
-    }
-    
-    /// `name_or_path` in the Python world
-    var modelName: String {
-        if let userFields = model.modelDescription.metadata[MLModelMetadataKey.creatorDefinedKey] as? [String : String],
-           let name = userFields["co.huggingface.exporters.name"] {
-            return name
-        }
-        // This is usually the basename of the file, that's our best bet if no metadata exists
-        guard let modelName = model.configuration.modelDisplayName else { fatalError("Models must have a name that identifies them") }
-        return modelName
-    }
-        
-    var inputIdsDescription: MLFeatureDescription {
-        model.modelDescription.inputDescriptionsByName[input_ids]!
-    }
-    
-    var inputIdsName: String {
-        inputIdsDescription.name
-    }
-    
-    /// The expected shape of the models latent sample input
-    var inputIdsShape: [Int] {
-        inputIdsDescription.multiArrayConstraint!.shape.map { $0.intValue }
-    }
-    
-    var requiresAttention: Bool {
-        model.modelDescription.inputDescriptionsByName[attention_mask] != nil
-    }
-    
-    // MLShapedArrayProtocol is either a MLShapedArray or a MLShapedArraySlice
-    func predictNextTokenScores(_ tokens: InputTokens, config: GenerationConfig) -> any MLShapedArrayProtocol {
-        // TODO: exceptions
-        
-        // Maybe pad or truncate
-        let maxTokens = min(tokens.count, maxContextLength)
-        let padLength = maxTokens >= minContextLength ? 0 : minContextLength-maxTokens
-        let inputTokens = Array(tokens[0..<maxTokens]) + Array(repeating: config.padTokenId ?? 0, count: padLength)
-        
-        let inputIds = MLShapedArray<Int32>(scalars: inputTokens.map { Int32($0) }, shape: inputIdsShape)
-        var inputDictionary = [inputIdsName: MLFeatureValue(shapedArray: inputIds)]
-        if requiresAttention {
-            let mask = Array(repeating: 1, count: maxTokens) + Array(repeating: 0, count: padLength)
-            let attentionMask = MLShapedArray<Int32>(scalars: mask.map{ Int32($0) }, shape: inputIdsShape)
-            inputDictionary[attention_mask] = MLFeatureValue(shapedArray: attentionMask)
-        }
-        let input = try! MLDictionaryFeatureProvider(dictionary: inputDictionary)
-        
-        let output = try! model.prediction(from: input)
-        
-        // TODO: maybe try to support models with "token_scores" too (after the softmax)
-        assert(output.featureNames.first! == "logits")
-
-        let scores = output.featureValue(for: output.featureNames.first!)!.shapedArrayValue(of: Float.self)!
-        let nextTokenScores = scores[0, maxTokens - 1]
-        return nextTokenScores
-    }
-}
-
-/// async properties downloaded from the configuration
-public extension LanguageModel {
-    var modelConfig: Config {
-        get async throws {
-            try await configuration!.modelConfig
-        }
-    }
-    
-    var tokenizerConfig: Config? {
-        get async throws {
-            try await configuration!.tokenizerConfig
-        }
-    }
-    
-    var tokenizerData: Config {
-        get async throws {
-            try await configuration!.tokenizerData
-        }
-    }
-    
-    var modelType: String? {
-        get async throws {
-            try await modelConfig.modelType?.stringValue
-        }
-    }
-    
-    var textGenerationParameters: Config? {
-        get async throws {
-            try await modelConfig.taskSpecificParams?.textGeneration
-        }
-    }
-    
-    var defaultDoSample: Bool {
-        get async throws {
-            try await textGenerationParameters?.doSample?.boolValue ?? true
-        }
-    }
-
-    var bosTokenId: Int? {
-        get async throws {
-            let modelConfig = try await modelConfig
-            return modelConfig.bosTokenId?.intValue
-        }
-    }
-    
-    var eosTokenId: Int? {
-        get async throws {
-            let modelConfig = try await modelConfig
-            return modelConfig.eosTokenId?.intValue
-        }
-    }
-    
-    var tokenizer: Tokenizer {
-        get async throws {
-            guard _tokenizer == nil else { return _tokenizer! }
-            guard let tokenizerConfig = try await tokenizerConfig else {
-                throw TokenizerError.tokenizerConfigNotFound
-            }
-            let tokenizerData = try await tokenizerData
-            _tokenizer = try AutoTokenizer.from(tokenizerConfig: tokenizerConfig, tokenizerData: tokenizerData)
-            return _tokenizer!
-        }
-    }
-}
-
-extension LanguageModel: TextGenerationModel {
-    //TODO: retrieve from the json: https://huggingface.co/nlpcloud/instruct-gpt-j-fp16/blob/main/config.json#L26
-    public var defaultGenerationConfig: GenerationConfig {
-        var config = GenerationConfig(maxNewTokens: 30)
-        switch modelName.lowercased() {
-        case let x where x.contains("gpt"):
-            config.doSample = true
-            config.topK = 50
-        default: break
-        }
-        return config
-    }
-}
-
-public enum TokenizerError: Error {
-    case tokenizerConfigNotFound
-}
diff --git a/Sources/Models/LanguageModelTypes.swift b/Sources/Models/LanguageModelTypes.swift
deleted file mode 100644
index 08d7d48..0000000
--- a/Sources/Models/LanguageModelTypes.swift
+++ /dev/null
@@ -1,42 +0,0 @@
-//
-//  LanguageModelTypes.swift
-//  
-//
-//  Created by Pedro Cuenca on 8/5/23.
-//
-
-import CoreML
-import Tokenizers
-import Generation
-
-public protocol LanguageModelProtocol {
-    /// `name_or_path` in the Python world
-    var modelName: String { get }
-
-    var tokenizer: Tokenizer { get async throws }
-    var model: MLModel { get }
-    
-    init(model: MLModel)
-    
-    /// Make prediction callable (this works like __call__ in Python)
-    func predictNextTokenScores(_ tokens: InputTokens, config: GenerationConfig) -> any MLShapedArrayProtocol
-    func callAsFunction(_ tokens: InputTokens, config: GenerationConfig) -> any MLShapedArrayProtocol
-}
-
-public extension LanguageModelProtocol {
-    func callAsFunction(_ tokens: InputTokens, config: GenerationConfig) -> any MLShapedArrayProtocol {
-        predictNextTokenScores(tokens, config: config)
-    }
-}
-
-public protocol TextGenerationModel: Generation, LanguageModelProtocol {
-    var defaultGenerationConfig: GenerationConfig { get }
-    func generate(config: GenerationConfig, prompt: String, callback: PredictionStringCallback?) async throws -> String
-}
-
-public extension TextGenerationModel {
-    @discardableResult
-    func generate(config: GenerationConfig, prompt: String, callback: PredictionStringCallback? = nil) async throws -> String {
-        try await self.generate(config: config, prompt: prompt, model: self.callAsFunction, tokenizer: self.tokenizer, callback: callback)
-    }
-}
diff --git a/Sources/TensorUtils/LogitsWarper/LogitsProcessor.swift b/Sources/TensorUtils/LogitsWarper/LogitsProcessor.swift
deleted file mode 100644
index 1c7f3a7..0000000
--- a/Sources/TensorUtils/LogitsWarper/LogitsProcessor.swift
+++ /dev/null
@@ -1,18 +0,0 @@
-import Foundation
-
-public struct LogitsProcessor {
-    public var logitsWarpers: [any LogitsWarper]
-
-    public init(logitsWarpers: [any LogitsWarper]) {
-        self.logitsWarpers = logitsWarpers
-    }
-
-    public func callAsFunction(_ arr: [Float]) -> (indices: [Int], logits: [Float]) {
-        var indices = Array(arr.indices)
-        var logits = arr
-        for warper in logitsWarpers {
-            (indices, logits) = warper(indices, logits)
-        }
-        return (indices: indices, logits: logits)
-    }
-}
diff --git a/Sources/TensorUtils/LogitsWarper/LogitsWarper.swift b/Sources/TensorUtils/LogitsWarper/LogitsWarper.swift
deleted file mode 100644
index ac92ebf..0000000
--- a/Sources/TensorUtils/LogitsWarper/LogitsWarper.swift
+++ /dev/null
@@ -1,13 +0,0 @@
-import Foundation
-
-/// Protocol for all logit warpers that can be applied during generation
-public protocol LogitsWarper {
-    func warp(indices: [Int], logits: [Float]) -> (indices: [Int], logits: [Float])
-    func callAsFunction(_ indices: [Int], _ logits: [Float]) -> (indices: [Int], logits: [Float])
-}
-
-extension LogitsWarper {
-    public func callAsFunction(_ indices: [Int], _ logits: [Float]) -> (indices: [Int], logits: [Float]) {
-        warp(indices: indices, logits: logits)
-    }
-}
diff --git a/Sources/TensorUtils/LogitsWarper/RepetitionPenaltyWarper.swift b/Sources/TensorUtils/LogitsWarper/RepetitionPenaltyWarper.swift
deleted file mode 100644
index cbc5c70..0000000
--- a/Sources/TensorUtils/LogitsWarper/RepetitionPenaltyWarper.swift
+++ /dev/null
@@ -1,25 +0,0 @@
-import Foundation
-
-/// `RepetitionPenaltyWarper` prevents the repetition of previous tokens through a penalty.
-/// This penalty is applied at most once per token.
-/// https://github.com/huggingface/transformers/blob/main/src/transformers/generation/logits_process.py#L294
-public struct RepetitionPenaltyWarper: LogitsWarper {
-    public var penalty: Float
-
-    public init(penalty: Double) {
-        self.penalty = Float(penalty)
-    }
-
-    public func warp(indices: [Int], logits: [Float]) -> (indices: [Int], logits: [Float]) {
-        var logits = logits
-        for index in indices {
-            if logits[index] < 0 {
-                logits[index] *= penalty
-            } else {
-                logits[index] /= penalty
-            }
-        }
-
-        return (indices, logits)
-    }
-}
diff --git a/Sources/TensorUtils/LogitsWarper/TemperatureLogitsWarper.swift b/Sources/TensorUtils/LogitsWarper/TemperatureLogitsWarper.swift
deleted file mode 100644
index 53dc0db..0000000
--- a/Sources/TensorUtils/LogitsWarper/TemperatureLogitsWarper.swift
+++ /dev/null
@@ -1,13 +0,0 @@
-import Foundation
-
-public struct TemperatureLogitsWarper: LogitsWarper {
-    public var temperature: Float
-    
-    public init(temperature: Float) {
-        self.temperature = temperature
-    }
-
-    public func warp(indices: [Int], logits: [Float]) -> (indices: [Int], logits: [Float]) {
-        return (indices: indices, logits: logits.map { $0 / temperature })
-    }
-}
diff --git a/Sources/TensorUtils/LogitsWarper/TopKLogitsWarper.swift b/Sources/TensorUtils/LogitsWarper/TopKLogitsWarper.swift
deleted file mode 100644
index a236d84..0000000
--- a/Sources/TensorUtils/LogitsWarper/TopKLogitsWarper.swift
+++ /dev/null
@@ -1,58 +0,0 @@
-import Foundation
-import Accelerate
-
-/// Top-K.
-/// Select the k most-probable element indices from `arr`
-/// and return both the indices (from the original array)
-/// and their probabilities.
-public struct TopKLogitsWarper: LogitsWarper {
-    public var k: Int
-    
-    public init(k: Int) {
-        self.k = k
-    }
-
-    public func warp(indices: [Int], logits: [Float]) -> (indices: [Int], logits: [Float]) {
-        guard !logits.isEmpty else {
-            return (indices: [], logits: [])
-        }
-        let k = min(k, logits.count)
-        let arrDescriptor = BNNSNDArrayDescriptor.allocate(
-            initializingFrom: logits,
-            shape: .vector(logits.count)
-        )
-        defer {
-            arrDescriptor.deallocate()
-        }
-        let bestIndices = BNNSNDArrayDescriptor.allocateUninitialized(
-            scalarType: Int32.self,
-            shape: .vector(k)
-        )
-        defer {
-            bestIndices.deallocate()
-        }
-        let bestValues = BNNSNDArrayDescriptor.allocateUninitialized(
-            scalarType: Float.self,
-            shape: .vector(k)
-        )
-        defer {
-            bestValues.deallocate()
-        }
-        try! Accelerate.BNNS.applyTopK(
-            k: k,
-            input: arrDescriptor,
-            bestValues: bestValues,
-            bestIndices: bestIndices,
-            axis: 0,
-            batchSize: 1,
-            filterParameters: nil
-        )
-        let topkLogits = bestValues.data!.withMemoryRebound(to: Float.self, capacity: k) { ptr in
-            Array(UnsafeBufferPointer(start: ptr, count: k))
-        }
-        let topkIndices = bestIndices.data!.withMemoryRebound(to: Int32.self, capacity: k) { ptr in
-            Array(UnsafeBufferPointer(start: ptr, count: k))
-        }
-        return (indices: topkIndices.map { indices[Int($0)] }, logits: topkLogits)
-    }
-}
diff --git a/Sources/TensorUtils/LogitsWarper/TopPLogitsWarper.swift b/Sources/TensorUtils/LogitsWarper/TopPLogitsWarper.swift
deleted file mode 100644
index 3796a09..0000000
--- a/Sources/TensorUtils/LogitsWarper/TopPLogitsWarper.swift
+++ /dev/null
@@ -1,37 +0,0 @@
-import Foundation
-
-/// Top-P.
-/// Select the smallest set of elements whose cumulative probability exceeds the probability `p`.
-/// Based on https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
-public struct TopPLogitsWarper: LogitsWarper {
-    public var p: Float
-
-    public init(p: Float) {
-        self.p = p
-    }
-
-    public func warp(indices: [Int], logits: [Float]) -> (indices: [Int], logits: [Float]) {
-        guard !logits.isEmpty else {
-            return (indices: [], logits: [])
-        }
-
-        let arrSoftmax = Math.softmax(logits)
-        var indexLogitProb = [(index: Int, logit: Float, prob: Float)]()
-        indexLogitProb.reserveCapacity(logits.count)
-        for (index, data) in zip(logits, arrSoftmax).enumerated() {
-            indexLogitProb.append((index: index, logit: data.0, prob: data.1))
-        }
-        indexLogitProb.sort { $0.prob > $1.prob }
-
-        let cumsum = Math.cumsum(indexLogitProb.map(\.prob))
-        var sliceIndex = cumsum.count - 1
-        for (index, element) in cumsum.enumerated() where element > p {
-            sliceIndex = index
-            break
-        }
-
-        let toppIndices = indexLogitProb[0 ... sliceIndex].map { indices[$0.index] }
-        let toppLogits = indexLogitProb[0 ... sliceIndex].map(\.logit)
-        return (indices: toppIndices, logits: toppLogits)
-    }
-}
diff --git a/Sources/TensorUtils/MLMultiArray+Utils.swift b/Sources/TensorUtils/MLMultiArray+Utils.swift
deleted file mode 100644
index ddb2760..0000000
--- a/Sources/TensorUtils/MLMultiArray+Utils.swift
+++ /dev/null
@@ -1,200 +0,0 @@
-//
-//  MLMultiArray+Utils.swift
-//  CoreMLBert
-//
-//  Created by Julien Chaumond on 27/06/2019.
-//  Copyright © 2019 Hugging Face. All rights reserved.
-//
-
-import Foundation
-import CoreML
-
-public extension MLMultiArray {
-    /// All values will be stored in the last dimension of the MLMultiArray (default is dims=1)
-    static func from(_ arr: [Int], dims: Int = 1) -> MLMultiArray {
-        var shape = Array(repeating: 1, count: dims)
-        shape[shape.count - 1] = arr.count
-        /// Examples:
-        /// dims=1 : [arr.count]
-        /// dims=2 : [1, arr.count]
-        ///
-        let o = try! MLMultiArray(shape: shape as [NSNumber], dataType: .int32)
-        let ptr = UnsafeMutablePointer<Int32>(OpaquePointer(o.dataPointer))
-        for (i, item) in arr.enumerated() {
-            ptr[i] = Int32(item)
-        }
-        return o
-    }
-    
-    /// All values will be stored in the last dimension of the MLMultiArray (default is dims=1)
-    static func from(_ arr: [Double], dims: Int = 1) -> MLMultiArray {
-        var shape = Array(repeating: 1, count: dims)
-        shape[shape.count - 1] = arr.count
-        /// Examples:
-        /// dims=1 : [arr.count]
-        /// dims=2 : [1, arr.count]
-        ///
-        let o = try! MLMultiArray(shape: shape as [NSNumber], dataType: .float64)
-        let ptr = UnsafeMutablePointer<Double>(OpaquePointer(o.dataPointer))
-        for (i, item) in arr.enumerated() {
-            ptr[i] = Double(item)
-        }
-        return o
-    }
-    
-    /// This will concatenate all dimensions into one one-dim array.
-    static func toIntArray(_ o: MLMultiArray) -> [Int] {
-        var arr = Array(repeating: 0, count: o.count)
-        let ptr = UnsafeMutablePointer<Int32>(OpaquePointer(o.dataPointer))
-        for i in 0..<o.count {
-            arr[i] = Int(ptr[i])
-        }
-        return arr
-    }
-    
-    func toIntArray() -> [Int] { Self.toIntArray(self) }
-    
-    /// This will concatenate all dimensions into one one-dim array.
-    static func toDoubleArray(_ o: MLMultiArray) -> [Double] {
-        var arr: [Double] = Array(repeating: 0, count: o.count)
-        let ptr = UnsafeMutablePointer<Double>(OpaquePointer(o.dataPointer))
-        for i in 0..<o.count {
-            arr[i] = Double(ptr[i])
-        }
-        return arr
-    }
-    
-    func toDoubleArray() -> [Double] { Self.toDoubleArray(self) }
-    
-    /// Helper to construct a sequentially-indexed multi array,
-    /// useful for debugging and unit tests
-    /// Example in 3 dimensions:
-    /// ```
-    /// [[[ 0, 1, 2, 3 ],
-    ///   [ 4, 5, 6, 7 ],
-    ///   [ 8, 9, 10, 11 ]],
-    ///  [[ 12, 13, 14, 15 ],
-    ///   [ 16, 17, 18, 19 ],
-    ///   [ 20, 21, 22, 23 ]]]
-    /// ```
-    static func testTensor(shape: [Int]) -> MLMultiArray {
-        let arr = try! MLMultiArray(shape: shape as [NSNumber], dataType: .double)
-        let ptr = UnsafeMutablePointer<Double>(OpaquePointer(arr.dataPointer))
-        for i in 0..<arr.count {
-            ptr.advanced(by: i).pointee = Double(i)
-        }
-        return arr
-    }
-}
-
-
-public extension MLMultiArray {
-    /// Provides a way to index n-dimensionals arrays a la numpy.
-    enum Indexing: Equatable {
-        case select(Int)
-        case slice
-    }
-    
-    /// Slice an array according to a list of `Indexing` enums.
-    ///
-    /// You must specify all dimensions.
-    /// Note: only one slice is supported at the moment.
-    static func slice(_ o: MLMultiArray, indexing: [Indexing]) -> MLMultiArray {
-        assert(
-            indexing.count == o.shape.count
-        )
-        assert(
-            indexing.filter { $0 == Indexing.slice }.count == 1
-        )
-        var selectDims: [Int: Int] = [:]
-        for (i, idx) in indexing.enumerated() {
-            if case .select(let select) = idx {
-                selectDims[i] = select
-            }
-        }
-        return slice(
-            o,
-            sliceDim: indexing.firstIndex { $0 == Indexing.slice }!,
-            selectDims: selectDims
-        )
-    }
-    
-    /// Slice an array according to a list, according to `sliceDim` (which dimension to slice on)
-    /// and a dictionary of `dim` to `index`.
-    ///
-    /// You must select all other dimensions than the slice dimension (cf. the assert).
-    static func slice(_ o: MLMultiArray, sliceDim: Int, selectDims: [Int: Int]) -> MLMultiArray {
-        assert(
-            selectDims.count + 1 == o.shape.count
-        )
-        var shape: [NSNumber] = Array(repeating: 1, count: o.shape.count)
-        shape[sliceDim] = o.shape[sliceDim]
-        /// print("About to slice ndarray of shape \(o.shape) into ndarray of shape \(shape)")
-        let arr = try! MLMultiArray(shape: shape, dataType: .double)
-        
-        /// let srcPtr = UnsafeMutablePointer<Double>(OpaquePointer(o.dataPointer))
-        /// TODO: use srcPtr instead of array subscripting.
-        let dstPtr = UnsafeMutablePointer<Double>(OpaquePointer(arr.dataPointer))
-        for i in 0..<arr.count {
-            var index: [Int] = []
-            for j in 0..<shape.count {
-                if j == sliceDim {
-                    index.append(i)
-                } else {
-                    index.append(selectDims[j]!)
-                }
-            }
-            /// print("Accessing element \(index)")
-            dstPtr[i] = o[index as [NSNumber]] as! Double
-        }
-        return arr
-    }
-}
-
-
-extension MLMultiArray {
-    var debug: String {
-        return debug([])
-    }
-    
-    /// From https://twitter.com/mhollemans
-    ///
-    /// Slightly tweaked
-    ///
-    func debug(_ indices: [Int]) -> String {
-        func indent(_ x: Int) -> String {
-            return String(repeating: " ", count: x)
-        }
-        
-        // This function is called recursively for every dimension.
-        // Add an entry for this dimension to the end of the array.
-        var indices = indices + [0]
-        
-        let d = indices.count - 1          // the current dimension
-        let N = shape[d].intValue          // how many elements in this dimension
-        var s = "["
-        if indices.count < shape.count {   // not last dimension yet?
-            for i in 0..<N {
-                indices[d] = i
-                s += debug(indices)        // then call recursively again
-                if i != N - 1 {
-                    s += ",\n" + indent(d + 1)
-                }
-            }
-        } else {                           // the last dimension has actual data
-            s += " "
-            for i in 0..<N {
-                indices[d] = i
-                s += "\(self[indices as [NSNumber]])"
-                if i != N - 1 {                // not last element?
-                    s += ", "
-                    if i % 11 == 10 {            // wrap long lines
-                        s += "\n " + indent(d + 1)
-                    }
-                }
-            }
-            s += " "
-        }
-        return s + "]"
-    }
-}
diff --git a/Sources/TensorUtils/MLShapedArray+Utils.swift b/Sources/TensorUtils/MLShapedArray+Utils.swift
deleted file mode 100644
index f46bc73..0000000
--- a/Sources/TensorUtils/MLShapedArray+Utils.swift
+++ /dev/null
@@ -1,53 +0,0 @@
-//
-//  MLShapedArray+Utils.swift
-//  
-//
-//  Created by Pedro Cuenca on 13/5/23.
-//
-
-import CoreML
-
-public extension MLShapedArray<Float> {
-    var floats: [Float] {
-        guard self.strides.first == 1, self.strides.count == 1 else {
-            // For some reason this path is slow.
-            // If strides is not 1, we can write a Metal kernel to copy the values properly.
-            return self.scalars
-        }
-        
-        // Fast path: memcpy
-        let mlArray = MLMultiArray(self)
-        return mlArray.floats ?? self.scalars
-    }
-}
-
-public extension MLShapedArraySlice<Float> {
-    var floats: [Float] {
-        guard self.strides.first == 1, self.strides.count == 1 else {
-            // For some reason this path is slow.
-            // If strides is not 1, we can write a Metal kernel to copy the values properly.
-            return self.scalars
-        }
-
-        // Fast path: memcpy
-        let mlArray = MLMultiArray(self)
-        return mlArray.floats ?? self.scalars
-    }
-}
-
-public extension MLMultiArray {
-    var floats: [Float]? {
-        guard self.dataType == .float32 else { return nil }
-        
-        var result: [Float] = Array(repeating: 0, count: self.count)
-        return self.withUnsafeBytes { ptr in
-            guard let source = ptr.baseAddress else { return nil }
-            result.withUnsafeMutableBytes { resultPtr in
-                let dest = resultPtr.baseAddress!
-                memcpy(dest, source, self.count * MemoryLayout<Float>.stride)
-            }
-            return result
-        }
-
-    }
-}
diff --git a/Sources/TensorUtils/Math.swift b/Sources/TensorUtils/Math.swift
deleted file mode 100644
index 4050ac1..0000000
--- a/Sources/TensorUtils/Math.swift
+++ /dev/null
@@ -1,171 +0,0 @@
-//
-//  Math.swift
-//  CoreMLBert
-//
-//  Created by Julien Chaumond on 27/06/2019.
-//  Copyright © 2019 Hugging Face. All rights reserved.
-//
-
-import Foundation
-import Accelerate
-import CoreML
-
-///
-/// From M.I. Hollemans
-///
-/// https://github.com/hollance/CoreMLHelpers
-///
-public struct Math {
-    
-    /**
-     Returns the index and value of the largest element in the array.
-     
-     - Parameters:
-     - ptr: Pointer to the first element in memory.
-     - count: How many elements to look at.
-     - stride: The distance between two elements in memory.
-     */
-    public static func argmax(_ ptr: UnsafePointer<Float>, count: Int, stride: Int = 1) -> (Int, Float) {
-        var maxValue: Float = 0
-        var maxIndex: vDSP_Length = 0
-        vDSP_maxvi(ptr, vDSP_Stride(stride), &maxValue, &maxIndex, vDSP_Length(count))
-        return (Int(maxIndex), maxValue)
-    }
-    
-    /**
-     Returns the index and value of the largest element in the array.
-     - Parameters:
-     - ptr: Pointer to the first element in memory.
-     - count: How many elements to look at.
-     - stride: The distance between two elements in memory.
-     */
-    public static func argmax(_ ptr: UnsafePointer<Double>, count: Int, stride: Int = 1) -> (Int, Double) {
-        var maxValue: Double = 0
-        var maxIndex: vDSP_Length = 0
-        vDSP_maxviD(ptr, vDSP_Stride(stride), &maxValue, &maxIndex, vDSP_Length(count))
-        return (Int(maxIndex), maxValue)
-    }
-    
-    public static func argmax32(_ ptr: UnsafePointer<Float>, count: Int, stride: Int = 1) -> (Int, Float) {
-        var maxValue: Float = 0
-        var maxIndex: vDSP_Length = 0
-        vDSP_maxvi(ptr, vDSP_Stride(stride), &maxValue, &maxIndex, vDSP_Length(count))
-        return (Int(maxIndex), maxValue)
-    }
-    
-    /// MLMultiArray helper.
-    /// Works in our specific use case.
-    public static func argmax(_ multiArray: MLMultiArray) -> (Int, Double) {
-        assert(multiArray.dataType == .double)
-        let ptr = UnsafeMutablePointer<Double>(OpaquePointer(multiArray.dataPointer))
-        return Math.argmax(ptr, count: multiArray.count)
-    }
-    
-    /// MLMultiArray helper.
-    /// Works in our specific use case.
-    public static func argmax32(_ multiArray: MLMultiArray) -> (Int, Float) {
-        assert(multiArray.dataType == .float32)
-        let ptr = UnsafeMutablePointer<Float32>(OpaquePointer(multiArray.dataPointer))
-        return Math.argmax32(ptr, count: multiArray.count)
-    }
-
-    /// Returns the cumulative sum of the array.
-    public static func cumsum(_ arr: [Float]) -> [Float] {
-        guard !arr.isEmpty else {
-            return []
-        }
-        let arrCount = vDSP_Length(arr.count)
-        var weight: Float = 1.0
-        var result: [Float] = Array(repeating: 0.0, count: arr.count)
-        var firstItem = arr[0]
-        vDSP_vrsum(arr, 1, &weight, &result, 1, arrCount)
-        vDSP_vsadd(result, 1, &firstItem, &result, 1, arrCount)
-        return result
-    }
-
-    /// Multinomial sampling from an array of probs. Works well with topK
-    public static func sample(indexes: [Int], probs: [Float]) -> Int {
-        let i = randomNumber(probabilities: probs)
-        return indexes[i]
-    }
-    
-    /**
-     Computes the "softmax" function over an array.
-     Based on code from https://github.com/nikolaypavlov/MLPNeuralNet/
-     This is what softmax looks like in "pseudocode" (actually using Python
-     and numpy):
-     x -= np.max(x)
-     exp_scores = np.exp(x)
-     softmax = exp_scores / np.sum(exp_scores)
-     First we shift the values of x so that the highest value in the array is 0.
-     This ensures numerical stability with the exponents, so they don't blow up.
-     */
-    public static func softmax(_ x: [Float]) -> [Float] {
-        var x = x
-        let len = vDSP_Length(x.count)
-        
-        // Find the maximum value in the input array.
-        var max: Float = 0
-        vDSP_maxv(x, 1, &max, len)
-        
-        // Subtract the maximum from all the elements in the array.
-        // Now the highest value in the array is 0.
-        max = -max
-        vDSP_vsadd(x, 1, &max, &x, 1, len)
-        
-        // Exponentiate all the elements in the array.
-        var count = Int32(x.count)
-        vvexpf(&x, x, &count)
-        
-        // Compute the sum of all exponentiated values.
-        var sum: Float = 0
-        vDSP_sve(x, 1, &sum, len)
-        
-        // Divide each element by the sum. This normalizes the array contents
-        // so that they all add up to 1.
-        vDSP_vsdiv(x, 1, &sum, &x, 1, len)
-        
-        return x
-    }
-    
-    /// Multinomial sampling
-    ///
-    /// From https://stackoverflow.com/questions/30309556/generate-random-numbers-with-a-given-distribution
-    ///
-    public static func randomNumber(probabilities: [Float]) -> Int {
-        // Sum of all probabilities (so that we don't have to require that the sum is 1.0):
-        let sum = probabilities.reduce(0, +)
-        // Random number in the range 0.0 <= rnd < sum :
-        let rnd = sum * Float(arc4random_uniform(UInt32.max)) / Float(UInt32.max)
-        // Find the first interval of accumulated probabilities into which `rnd` falls:
-        var accum: Float = 0.0
-        for (i, p) in probabilities.enumerated() {
-            accum += p
-            if rnd < accum {
-                return i
-            }
-        }
-        // This point might be reached due to floating point inaccuracies:
-        return (probabilities.count - 1)
-    }
-}
-
-// MLShapedArray versions
-
-public extension Math {
-    static func argmax(_ shapedArray: MLShapedArray<Float>) -> (Int, Float) {
-        shapedArray.withUnsafeShapedBufferPointer { ptr, shape, strides in
-            assert(shape.count == 1, "Only supported for 1-dimensional arrays or slices")
-            return Math.argmax32(ptr.baseAddress!, count: shapedArray.count, stride: strides.first!)
-        }
-    }
-    
-    // TODO: handle Double, etc.
-    static func argmax(_ shapedArray: some MLShapedArrayProtocol) -> (Int, Float) {
-        shapedArray.withUnsafeShapedBufferPointer { ptr, shape, strides in
-            assert(shape.count == 1, "Only supported for 1-dimensional arrays or slices")
-            let floatsPtr = ptr.baseAddress as! UnsafePointer<Float>
-            return Math.argmax32(floatsPtr, count: shapedArray.count, stride: strides.first!)
-        }
-    }
-}
diff --git a/Sources/TensorUtils/Weights.swift b/Sources/TensorUtils/Weights.swift
deleted file mode 100644
index 2050e01..0000000
--- a/Sources/TensorUtils/Weights.swift
+++ /dev/null
@@ -1,88 +0,0 @@
-import CoreML
-
-
-public struct Weights {
-
-    enum WeightsError: Error {
-        case notSupported(message: String)
-        case invalidFile
-    }
-
-    private let dictionary: [String: MLMultiArray]
-
-    init(_ dictionary: [String: MLMultiArray]) {
-        self.dictionary = dictionary
-    }
-
-    subscript(key: String) -> MLMultiArray? { dictionary[key] }
-
-    public static func from(fileURL: URL) throws -> Weights {
-        guard ["safetensors", "gguf", "mlx"].contains(fileURL.pathExtension)
-        else { throw WeightsError.notSupported(message: "\(fileURL.pathExtension)") }
-
-        let data = try Data(contentsOf: fileURL, options: .mappedIfSafe)
-        switch ([UInt8](data.subdata(in: 0..<4)), [UInt8](data.subdata(in: 4..<6))) {
-        case ([0x47, 0x47, 0x55, 0x46], _): throw WeightsError.notSupported(message: ("gguf"))
-        case ([0x93, 0x4e, 0x55, 0x4d], [0x50, 0x59]): throw WeightsError.notSupported(message: "mlx")
-        default: return try Safetensor.from(data: data)
-        }
-    }
-}
-
-struct Safetensor {
-
-    typealias Error = Weights.WeightsError
-
-    struct Header {
-
-        struct Offset: Decodable {
-            let dataOffsets: [Int]?
-            let dtype: String?
-            let shape: [Int]?
-
-            /// Unsupported: "I8", "U8", "I16", "U16", "BF16"
-            var dataType: MLMultiArrayDataType? {
-                get throws {
-                    switch dtype {
-                    case "I32", "U32": .int32
-                    case "F16": .float16
-                    case "F32": .float32
-                    case "F64", "U64": .float64
-                    default: throw Error.notSupported(message: "\(dtype ?? "empty")")
-                    }
-                }
-            }
-        }
-
-        static func from(data: Data) throws -> [String: Offset?] {
-            let decoder = JSONDecoder()
-            decoder.keyDecodingStrategy = .convertFromSnakeCase
-            return try decoder.decode([String: Offset?].self, from: data)
-        }
-    }
-
-    static func from(data: Data) throws -> Weights {
-        let headerSize: Int = data.subdata(in: 0..<8).withUnsafeBytes({ $0.load(as: Int.self) })
-        guard headerSize < data.count else { throw Error.invalidFile }
-        let header = try Header.from(data: data.subdata(in: 8..<(headerSize + 8)))
-
-        var dict = [String: MLMultiArray]()
-        for (key, point) in header {
-            guard let offsets = point?.dataOffsets, offsets.count == 2,
-                  let shape = point?.shape as? [NSNumber],
-                  let dType = try point?.dataType
-            else { continue }
-
-            let strides = shape.dropFirst().reversed().reduce(into: [1]) { acc, a in
-                acc.insert(acc[0].intValue * a.intValue as NSNumber, at: 0)
-            }
-            let start = 8 + offsets[0] + headerSize
-            let end = 8 + offsets[1] + headerSize
-            let tensorData = data.subdata(in: start..<end) as NSData
-            let ptr = UnsafeMutableRawPointer(mutating: tensorData.bytes)
-            dict[key] = try MLMultiArray(dataPointer: ptr, shape: shape, dataType: dType, strides: strides)
-        }
-
-        return Weights(dict)
-    }
-}
diff --git a/Sources/Tokenizers/Tokenizer.swift b/Sources/Tokenizers/Tokenizer.swift
index db53337..5022f6d 100644
--- a/Sources/Tokenizers/Tokenizer.swift
+++ b/Sources/Tokenizers/Tokenizer.swift
@@ -574,17 +574,6 @@ extension AutoTokenizer {
         return try tokenizerClass.init(tokenizerConfig: tokenizerConfig, tokenizerData: tokenizerData)
     }
 
-    public static func from(
-        pretrained model: String,
-        hubApi: HubApi = .shared
-    ) async throws -> Tokenizer {
-        let config = LanguageModelConfigurationFromHub(modelName: model, hubApi: hubApi)
-        guard let tokenizerConfig = try await config.tokenizerConfig else { throw TokenizerError.missingConfig }
-        let tokenizerData = try await config.tokenizerData
-
-        return try AutoTokenizer.from(tokenizerConfig: tokenizerConfig, tokenizerData: tokenizerData)
-    }
-
     public static func from(
         modelFolder: URL,
         hubApi: HubApi = .shared
diff --git a/Sources/Tokenizers/Utils.swift b/Sources/Tokenizers/Utils.swift
index 9efacc2..00d04fa 100644
--- a/Sources/Tokenizers/Utils.swift
+++ b/Sources/Tokenizers/Utils.swift
@@ -9,23 +9,6 @@
 import Foundation
 
 struct Utils {
-    /// Time a block in ms
-    static func time<T>(label: String, _ block: () -> T) -> T {
-        let startTime = CFAbsoluteTimeGetCurrent()
-        let result = block()
-        let diff = (CFAbsoluteTimeGetCurrent() - startTime) * 1_000
-        print("[\(label)] \(diff)ms")
-        return result
-    }
-    
-    /// Time a block in seconds and return (output, time)
-    static func time<T>(_ block: () -> T) -> (T, Double) {
-        let startTime = CFAbsoluteTimeGetCurrent()
-        let result = block()
-        let diff = CFAbsoluteTimeGetCurrent() - startTime
-        return (result, diff)
-    }
-    
     /// Return unix timestamp in ms
     static func dateNow() -> Int64 {
         // Use `Int` when we don't support 32-bits devices/OSes anymore.
diff --git a/Sources/TransformersCLI/main.swift b/Sources/TransformersCLI/main.swift
deleted file mode 100644
index a040946..0000000
--- a/Sources/TransformersCLI/main.swift
+++ /dev/null
@@ -1,109 +0,0 @@
-import ArgumentParser
-import CoreML
-import Foundation
-
-import Models
-import Generation
-
-@available(iOS 16.2, macOS 13.1, *)
-struct TransformersCLI: ParsableCommand {
-    static let configuration = CommandConfiguration(
-        abstract: "Run text generation on a Core ML language model",
-        version: "0.0.1"
-    )
-
-    @Argument(help: "Input text")
-    var prompt: String
-
-    @Argument(help: "Path to Core ML mlpackage model")
-    var modelPath: String = "./model.mlpackage"
-
-    @Option(help: "Maximum amount of tokens the model should generate")
-    var maxLength: Int = 50
-
-    @Option(help: "Compute units to load model with {all,cpuOnly,cpuAndGPU,cpuAndNeuralEngine}")
-    var computeUnits: ComputeUnits = .cpuAndGPU
-    
-    func generate(model: LanguageModel, config: GenerationConfig, prompt: String, printOutput: Bool = true) {
-        let semaphore = DispatchSemaphore(value: 0)
-        Task.init { [config] in
-            defer { semaphore.signal() }
-            var tokensReceived = 0
-            var previousIndex: String.Index? = nil
-            let begin = Date()
-            do {
-                try await model.generate(config: config, prompt: prompt) { inProgressGeneration in
-                    tokensReceived += 1
-                    let response = inProgressGeneration.replacingOccurrences(of: "\\n", with: "\n")
-                    if printOutput {
-                        print(response[(previousIndex ?? response.startIndex)...], terminator: "")
-                        fflush(stdout)
-                    }
-                    previousIndex = response.endIndex
-                }
-                let completionTime = Date().timeIntervalSince(begin)
-                let tps = Double(tokensReceived) / completionTime
-                if printOutput {
-                    print("")
-                    print("\(tps.formatted("%.2f")) tokens/s, total time: \(completionTime.formatted("%.2f"))s")
-                }
-            } catch {
-                print("Error \(error)")
-            }
-        }
-        semaphore.wait()
-    }
-
-    func compile(at url: URL) throws -> URL {
-        #if os(watchOS)
-        fatalError("Model compilation is not supported on watchOS")
-        #else
-        if url.pathExtension == "mlmodelc" { return url }
-        print("Compiling model \(url)")
-        return try MLModel.compileModel(at: url)
-        #endif
-    }
-
-    func run() throws {
-        let url = URL(filePath: modelPath)
-        let compiledURL = try compile(at: url)
-        print("Loading model \(compiledURL)")
-        let model = try LanguageModel.loadCompiled(url: compiledURL, computeUnits: computeUnits.asMLComputeUnits)
-        
-        // Using greedy generation for now
-        var config = model.defaultGenerationConfig
-        config.doSample = false
-        config.maxNewTokens = maxLength
-        
-        print("Warming up...")
-        generate(model: model, config: config, prompt: prompt, printOutput: false)
-        
-        print("Generating")
-        generate(model: model, config: config, prompt: prompt)
-    }
-}
-
-@available(iOS 16.2, macOS 13.1, *)
-enum ComputeUnits: String, ExpressibleByArgument, CaseIterable {
-    case all, cpuAndGPU, cpuOnly, cpuAndNeuralEngine
-    var asMLComputeUnits: MLComputeUnits {
-        switch self {
-        case .all: return .all
-        case .cpuAndGPU: return .cpuAndGPU
-        case .cpuOnly: return .cpuOnly
-        case .cpuAndNeuralEngine: return .cpuAndNeuralEngine
-        }
-    }
-}
-
-if #available(iOS 16.2, macOS 13.1, *) {
-    TransformersCLI.main()
-} else {
-    print("Unsupported OS")
-}
-
-extension Double {
-    func formatted(_ format: String) -> String {
-        return String(format: "\(format)", self)
-    }
-}
diff --git a/Tests/NormalizerTests/NormalizerTests.swift b/Tests/NormalizerTests/NormalizerTests.swift
deleted file mode 100644
index fea423a..0000000
--- a/Tests/NormalizerTests/NormalizerTests.swift
+++ /dev/null
@@ -1,258 +0,0 @@
-import XCTest
-
-@testable import Hub
-@testable import Tokenizers
-
-class NormalizerTests: XCTestCase {
-
-    func testLowercaseNormalizer() {
-        let testCases: [(String, String)] = [
-            ("Café", "café"),
-            ("François", "françois"),
-            ("Ωmega", "ωmega"),
-            ("über", "über"),
-            ("háček", "háček"),
-            ("Häagen-Dazs", "häagen-dazs"),
-            ("你好!", "你好!"),
-            ("𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼", "𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼"),
-            ("\u{00C5}", "\u{00E5}"),
-        ]
-
-        for (arg, expect) in testCases {
-            let config = Config([:])
-            let normalizer = LowercaseNormalizer(config: config)
-            XCTAssertEqual(normalizer.normalize(text: arg), expect)
-        }
-
-        let config = Config(["type": NormalizerType.Lowercase.rawValue])
-        XCTAssertNotNil(NormalizerFactory.fromConfig(config: config) as? LowercaseNormalizer)
-    }
-
-    func testNFDNormalizer() {
-        let testCases: [(String, String)] = [
-            ("caf\u{65}\u{301}", "cafe\u{301}"),
-            ("François", "François"),
-            ("Ωmega", "Ωmega"),
-            ("über", "über"),
-            ("háček", "háček"),
-            ("Häagen-Dazs", "Häagen-Dazs"),
-            ("你好!", "你好!"),
-            ("𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼", "𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼"),
-            ("\u{00C5}", "\u{0041}\u{030A}"),
-        ]
-
-        for (arg, expect) in testCases {
-            let config = Config([:])
-            let normalizer = NFDNormalizer(config: config)
-            XCTAssertEqual(normalizer.normalize(text: arg), expect)
-        }
-
-        let config = Config(["type": NormalizerType.NFD.rawValue])
-        XCTAssertNotNil(NormalizerFactory.fromConfig(config: config) as? NFDNormalizer)
-    }
-
-    func testNFCNormalizer() {
-        let testCases: [(String, String)] = [
-            ("café", "café"),
-            ("François", "François"),
-            ("Ωmega", "Ωmega"),
-            ("über", "über"),
-            ("háček", "háček"),
-            ("Häagen-Dazs", "Häagen-Dazs"),
-            ("你好!", "你好!"),
-            ("𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼", "𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼"),
-            ("\u{00C5}", "\u{00C5}"),
-        ]
-
-        for (arg, expect) in testCases {
-            let config = Config([:])
-            let normalizer = NFCNormalizer(config: config)
-            XCTAssertEqual(normalizer.normalize(text: arg), expect)
-        }
-
-        let config = Config(["type": NormalizerType.NFC.rawValue])
-        XCTAssertNotNil(NormalizerFactory.fromConfig(config: config) as? NFCNormalizer)
-    }
-
-    func testNFKDNormalizer() {
-        let testCases: [(String, String)] = [
-            ("café", "cafe\u{301}"),
-            ("François", "François"),
-            ("Ωmega", "Ωmega"),
-            ("über", "über"),
-            ("háček", "háček"),
-            ("Häagen-Dazs", "Häagen-Dazs"),
-            ("你好!", "你好!"),
-            ("𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼", "ABC⓵⓶⓷{,},i9,i9,アパート,1⁄4"),
-            ("\u{00C5}", "Å"),
-        ]
-
-        for (arg, expect) in testCases {
-            let config = Config([:])
-            let normalizer = NFKDNormalizer(config: config)
-            XCTAssertEqual(normalizer.normalize(text: arg), expect)
-        }
-
-        let config = Config(["type": NormalizerType.NFKD.rawValue])
-        XCTAssertNotNil(NormalizerFactory.fromConfig(config: config) as? NFKDNormalizer)
-    }
-
-    func testNFKCINormalizer() {
-        let testCases: [(String, String)] = [
-            ("café", "café"),
-            ("François", "François"),
-            ("Ωmega", "Ωmega"),
-            ("über", "über"),
-            ("háček", "háček"),
-            ("Häagen-Dazs", "Häagen-Dazs"),
-            ("你好!", "你好!"),
-            ("𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼", "ABC⓵⓶⓷{,},i9,i9,アパート,1⁄4"),
-            ("\u{00C5}", "\u{00C5}"),
-        ]
-
-        for (arg, expect) in testCases {
-            let config = Config([:])
-            let normalizer = NFKCNormalizer(config: config)
-            XCTAssertEqual(normalizer.normalize(text: arg), expect)
-        }
-
-        let config = Config(["type": NormalizerType.NFKC.rawValue])
-        XCTAssertNotNil(NormalizerFactory.fromConfig(config: config) as? NFKCNormalizer)
-    }
-
-    func testStripAccents() {
-        let testCases: [(String, String)] = [
-            ("département", "departement"),
-        ]
-
-        //TODO: test combinations with/without lowercase
-        let config = Config(["stripAccents":true])
-        let normalizer = BertNormalizer(config: config)
-        for (arg, expect) in testCases {
-            XCTAssertEqual(normalizer.normalize(text: arg), expect)
-        }
-    }
-
-    func testBertNormalizer() {
-        let testCases: [(String, String)] = [
-            ("Café", "café"),
-            ("François", "françois"),
-            ("Ωmega", "ωmega"),
-            ("über", "über"),
-            ("háček", "háček"),
-            ("Häagen\tDazs", "häagen dazs"),
-            ("你好!", " 你  好 !"),
-            ("𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼", "𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼"),
-            ("\u{00C5}", "\u{00E5}"),
-        ]
-
-        for (arg, expect) in testCases {
-            let config = Config(["stripAccents":false])
-            let normalizer = BertNormalizer(config: config)
-            XCTAssertEqual(normalizer.normalize(text: arg), expect)
-        }
-
-        let config = Config(["type": NormalizerType.Bert.rawValue])
-        XCTAssertNotNil(NormalizerFactory.fromConfig(config: config) as? BertNormalizer)
-    }
-
-    func testBertNormalizerDefaults() {
-        // Python verification: t._tokenizer.normalizer.normalize_str("Café")
-        let testCases: [(String, String)] = [
-            ("Café", "cafe"),
-            ("François", "francois"),
-            ("Ωmega", "ωmega"),
-            ("über", "uber"),
-            ("háček", "hacek"),
-            ("Häagen\tDazs", "haagen dazs"),
-            ("你好!", " 你  好 !"),
-            ("𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼", "𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼"),
-            ("Å", "a"),
-        ]
-
-        for (arg, expect) in testCases {
-            let config = Config([:])
-            let normalizer = BertNormalizer(config: config)
-            XCTAssertEqual(normalizer.normalize(text: arg), expect)
-        }
-
-        let config = Config(["type": NormalizerType.Bert.rawValue])
-        XCTAssertNotNil(NormalizerFactory.fromConfig(config: config) as? BertNormalizer)
-    }
-
-    func testPrecompiledNormalizer() {
-        let testCases: [(String, String)] = [
-            ("café", "café"),
-            ("François", "François"),
-            ("Ωmega", "Ωmega"),
-            ("über", "über"),
-            ("háček", "háček"),
-            ("Häagen-Dazs", "Häagen-Dazs"),
-            ("你好!", "你好!"),
-            ("𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼", "ABC⓵⓶⓷{,},i9,i9,アパート,1⁄4"),
-            ("\u{00C5}", "\u{00C5}"),
-            ("™\u{001e}g", "TMg"),
-            ("full-width～tilde", "full-width～tilde"),
-        ]
-
-        for (arg, expect) in testCases {
-            let config = Config([:])
-            let normalizer = PrecompiledNormalizer(config: config)
-            XCTAssertEqual(normalizer.normalize(text: arg), expect)
-        }
-
-        let config = Config(["type": NormalizerType.Precompiled.rawValue])
-        XCTAssertNotNil(NormalizerFactory.fromConfig(config: config) as? PrecompiledNormalizer)
-    }
-
-    func testStripAccentsINormalizer() {
-        let testCases: [(String, String)] = [
-            ("café", "café"),
-            ("François", "François"),
-            ("Ωmega", "Ωmega"),
-            ("über", "über"),
-            ("háček", "háček"),
-            ("Häagen-Dazs", "Häagen-Dazs"),
-            ("你好!", "你好!"),
-            ("𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼", "ABC⓵⓶⓷{,},i9,i9,アパート,1⁄4"),
-            ("\u{00C5}", "\u{00C5}"),
-        ]
-
-        for (arg, expect) in testCases {
-            let config = Config([:])
-            let normalizer = StripAccentsNormalizer(config: config)
-            XCTAssertEqual(normalizer.normalize(text: arg), expect)
-        }
-
-        let config = Config(["type": NormalizerType.StripAccents.rawValue])
-        XCTAssertNotNil(NormalizerFactory.fromConfig(config: config) as? StripAccentsNormalizer)
-    }
-
-    func testStripNormalizer() {
-        let testCases: [(String, String, Bool, Bool)] = [
-            ("  hello  ", "hello", true, true),
-            ("  hello  ", "hello  ", true, false),
-            ("  hello  ", "  hello", false, true),
-            ("  hello  ", "  hello  ", false, false),
-            ("\t\nHello\t\n", "Hello", true, true),
-            ("   ", "", true, true),
-            ("", "", true, true),
-        ]
-
-        for (input, expected, leftStrip, rightStrip) in testCases {
-            let config = Config([
-                "type": NormalizerType.Strip.rawValue,
-                "stripLeft": leftStrip,
-                "stripRight": rightStrip,
-            ])
-            let normalizer = StripNormalizer(config: config)
-            XCTAssertEqual(
-                normalizer.normalize(text: input), expected,
-                "Failed for input: '\(input)', leftStrip: \(leftStrip), rightStrip: \(rightStrip)")
-        }
-
-        let config = Config(["type": NormalizerType.Strip.rawValue])
-        XCTAssertNotNil(NormalizerFactory.fromConfig(config: config) as? StripNormalizer)
-    }
-
-}
diff --git a/Tests/PostProcessorTests/PostProcessorTests.swift b/Tests/PostProcessorTests/PostProcessorTests.swift
deleted file mode 100644
index 347bc38..0000000
--- a/Tests/PostProcessorTests/PostProcessorTests.swift
+++ /dev/null
@@ -1,83 +0,0 @@
-import XCTest
-@testable import Tokenizers
-@testable import Hub
-
-class PostProcessorTests: XCTestCase {
-    func testRobertaProcessing() {
-       let testCases: [(Config, [String], [String]?, [String])] = [
-            // Should keep spaces; uneven spaces; ignore `addPrefixSpace`.
-            (
-                Config(["cls": (0, "[HEAD]") as (UInt, String),
-                        "sep": (0, "[END]") as (UInt, String),
-                        "trimOffset": false,
-                        "addPrefixSpace": true,
-                       ]),
-                [" The", " sun", "sets ", "  in  ", "   the  ", "west"],
-                nil,
-                ["[HEAD]", " The", " sun", "sets ", "  in  ", "   the  ", "west", "[END]"]
-            ),
-            // Should leave only one space around each token.
-            (
-                Config(["cls": (0, "[START]") as (UInt, String),
-                        "sep": (0, "[BREAK]") as (UInt, String),
-                        "trimOffset": true,
-                        "addPrefixSpace": true,
-                       ]),
-                [" The ", " sun", "sets ", "  in ", "  the    ", "west"],
-                nil,
-                ["[START]", " The ", " sun", "sets ", " in ", " the ", "west", "[BREAK]"]
-            ),
-            // Should ignore empty tokens pair.
-            (
-                Config(["cls": (0, "[START]") as (UInt, String),
-                        "sep": (0, "[BREAK]") as (UInt, String),
-                        "trimOffset": true,
-                        "addPrefixSpace": true,
-                       ]),
-                [" The ", " sun", "sets ", "  in ", "  the    ", "west"],
-                [],
-                ["[START]", " The ", " sun", "sets ", " in ", " the ", "west", "[BREAK]"]
-            ),
-            // Should trim all whitespace.
-            (
-                Config(["cls": (0, "[CLS]") as (UInt, String),
-                        "sep": (0, "[SEP]") as (UInt, String),
-                        "trimOffset": true,
-                        "addPrefixSpace": false,
-                       ]),
-                [" The ", " sun", "sets ", "  in ", "  the    ", "west"],
-                nil,
-                ["[CLS]", "The", "sun", "sets", "in", "the", "west", "[SEP]"]
-            ),
-            // Should add tokens.
-            (
-                Config(["cls": (0, "[CLS]") as (UInt, String),
-                        "sep": (0, "[SEP]") as (UInt, String),
-                        "trimOffset": true,
-                        "addPrefixSpace": true,
-                       ]),
-                [" The ", " sun", "sets ", "  in ", "  the    ", "west"],
-                [".", "The", " cat ", "   is ", " sitting  ", " on", "the ", "mat"],
-                ["[CLS]", " The ", " sun", "sets ", " in ", " the ", "west", "[SEP]",
-                "[SEP]", ".", "The", " cat ", " is ", " sitting ", " on", "the ",
-                 "mat", "[SEP]"]
-            ),
-            (
-                Config(["cls": (0, "[CLS]") as (UInt, String),
-                        "sep": (0, "[SEP]") as (UInt, String),
-                        "trimOffset": true,
-                        "addPrefixSpace": true,
-                       ]),
-                [" 你 ", " 好 ", ","],
-                [" 凯  ", "  蒂  ", "!"],
-                ["[CLS]", " 你 ", " 好 ", ",", "[SEP]", "[SEP]", " 凯 ", " 蒂 ", "!", "[SEP]"]
-            ),
-        ]
-
-        for (config, tokens, tokensPair, expect) in testCases {
-            let processor = RobertaProcessing(config: config)
-            let output = processor.postProcess(tokens: tokens, tokensPair: tokensPair)
-            XCTAssertEqual(output, expect)
-        }
-    }
-}
diff --git a/Tests/TensorUtilsTests/LogitsWarperTests.swift b/Tests/TensorUtilsTests/LogitsWarperTests.swift
deleted file mode 100644
index 0260967..0000000
--- a/Tests/TensorUtilsTests/LogitsWarperTests.swift
+++ /dev/null
@@ -1,152 +0,0 @@
-//
-//  LogitsWarperTests.swift
-//
-//  Created by Jan Krukowski on 09/12/2023.
-//
-
-import XCTest
-import CoreML
-@testable import TensorUtils
-
-final class LogitsWarperTests: XCTestCase {
-    private let accuracy: Float = 0.00001
-
-    func testTemperatureLogitsWarper() {
-        let result1 = TemperatureLogitsWarper(temperature: 0.0)([], [])
-        XCTAssertTrue(result1.indices.isEmpty)
-        XCTAssertTrue(result1.logits.isEmpty)
-
-        let result2 = TemperatureLogitsWarper(temperature: 1.0)([], [])
-        XCTAssertTrue(result2.indices.isEmpty)
-        XCTAssertTrue(result2.logits.isEmpty)
-
-        let result3 = TemperatureLogitsWarper(temperature: 1.0)([0, 1], [2.0, 1.0])
-        XCTAssertEqual(result3.indices, [0, 1])
-        XCTAssertEqual(result3.logits, [2.0, 1.0], accuracy: accuracy)
-
-        let result4 = TemperatureLogitsWarper(temperature: 2.0)([0, 1], [2.0, 1.0])
-        XCTAssertEqual(result4.indices, [0, 1])
-        XCTAssertEqual(result4.logits, [1.0, 0.5], accuracy: accuracy)
-
-        let result5 = TemperatureLogitsWarper(temperature: 0.5)([0, 1], [2.0, 1.0])
-        XCTAssertEqual(result5.indices, [0, 1])
-        XCTAssertEqual(result5.logits, [4.0, 2.0], accuracy: accuracy)
-
-        let result6 = TemperatureLogitsWarper(temperature: 0.5)([200, 100], [2.0, 1.0])
-        XCTAssertEqual(result6.indices, [200, 100])
-        XCTAssertEqual(result6.logits, [4.0, 2.0], accuracy: accuracy)
-    }
-
-    func testTopKLogitsWarper() {
-        let result1 = TopKLogitsWarper(k: 0)([], [])
-        XCTAssertTrue(result1.indices.isEmpty)
-        XCTAssertTrue(result1.logits.isEmpty)
-
-        let result2 = TopKLogitsWarper(k: 3)([], [])
-        XCTAssertTrue(result2.indices.isEmpty)
-        XCTAssertTrue(result2.logits.isEmpty)
-
-        let result3 = TopKLogitsWarper(k: 3)([0, 1], [2.0, 1.0])
-        XCTAssertEqual(result3.indices, [0, 1])
-        XCTAssertEqual(result3.logits, [2.0, 1.0], accuracy: accuracy)
-
-        let result4 = TopKLogitsWarper(k: 3)([0, 1, 2], [2.0, 1.0, 3.0])
-        XCTAssertEqual(result4.indices, [2, 0, 1])
-        XCTAssertEqual(result4.logits, [3.0, 2.0, 1.0], accuracy: accuracy)
-
-        let result5 = TopKLogitsWarper(k: 4)([0, 1, 2, 3, 4, 5], [2.0, 1.0, 3.0, -1.0, 123.0, 0.0])
-        XCTAssertEqual(result5.indices, [4, 2, 0, 1])
-        XCTAssertEqual(result5.logits, [123.0, 3.0, 2.0, 1.0], accuracy: accuracy)
-
-        let result6 = TopKLogitsWarper(k: 3)([10, 1, 52], [2.0, 1.0, 3.0])
-        XCTAssertEqual(result6.indices, [52, 10, 1])
-        XCTAssertEqual(result6.logits, [3.0, 2.0, 1.0], accuracy: accuracy)
-    }
-
-    func testTopPLogitsWarper() {
-        let result1 = TopPLogitsWarper(p: 0.99)([], [])
-        XCTAssertTrue(result1.indices.isEmpty)
-        XCTAssertTrue(result1.logits.isEmpty)
-
-        let logits = (0 ..< 10).map { Float($0) }
-        let indices = Array(logits.indices)
-        let result2 = TopPLogitsWarper(p: 0.99)(indices, logits)
-        XCTAssertEqual(result2.indices, [9, 8, 7, 6, 5])
-        XCTAssertEqual(result2.logits, [9.0, 8.0, 7.0, 6.0, 5.0], accuracy: accuracy)
-
-        let result3 = TopPLogitsWarper(p: 0.95)(indices, logits)
-        XCTAssertEqual(result3.indices, [9, 8, 7])
-        XCTAssertEqual(result3.logits, [9.0, 8.0, 7.0], accuracy: accuracy)
-
-        let result4 = TopPLogitsWarper(p: 0.6321493)(indices, logits)
-        XCTAssertEqual(result4.indices, [9, 8])
-        XCTAssertEqual(result4.logits, [9.0, 8.0], accuracy: accuracy)
-
-        let result5 = TopPLogitsWarper(p: 0.95)([3, 1, 8], [0, 1, 2])
-        XCTAssertEqual(result5.indices, [8, 1, 3])
-        XCTAssertEqual(result5.logits, [2, 1, 0], accuracy: accuracy)
-    }
-
-    func testRepetitionPenaltyWarper() {
-        let indices = Array(0..<10)
-        let logits = indices.map({ Float($0) })
-
-        let result1 = RepetitionPenaltyWarper(penalty: 1.0)(indices, logits)
-        XCTAssertEqual(result1.indices, indices)
-        XCTAssertEqual(result1.logits, logits, accuracy: accuracy)
-
-        let result2 = RepetitionPenaltyWarper(penalty: 3.75)(indices, logits)
-        XCTAssertEqual(result2.indices, indices)
-        let logits2 = indices.map({ Float($0) / 3.75 })
-        XCTAssertEqual(result2.logits, logits2, accuracy: accuracy)
-        
-        let result3 = RepetitionPenaltyWarper(penalty: 0.75)([0, 1, 2], [0.8108, 0.9954, 0.0119])
-        XCTAssertEqual(result3.indices, [0, 1, 2])
-        XCTAssertEqual(result3.logits, [1.0811, 1.3272, 0.0158], accuracy: 1e-4)
-        
-        let result4 = RepetitionPenaltyWarper(penalty: 1.11)([2, 3, 4], [0.5029, 0.8694, 0.4765, 0.9967, 0.4190, 0.9158])
-        XCTAssertEqual(result4.indices, [2, 3, 4])
-        XCTAssertEqual(result4.logits, [0.5029, 0.8694, 0.4293, 0.8980, 0.3775, 0.9158], accuracy: 1e-4)
-
-        let result5 = RepetitionPenaltyWarper(penalty: 0.9)([0, 1, 2], [-0.7433, -0.4738, -0.2966])
-        XCTAssertEqual(result5.indices, [0, 1, 2])
-        XCTAssertEqual(result5.logits, [-0.6690, -0.4264, -0.2669], accuracy: 1e-4)
-        
-        let result6 = RepetitionPenaltyWarper(penalty: 1.125)([3, 1, 2], [0.1674, 0.6431, 0.6780, 0.2755])
-        XCTAssertEqual(result6.indices, [3, 1, 2])
-        XCTAssertEqual(result6.logits, [0.1674, 0.5716, 0.6026, 0.2449], accuracy: 1e-4)
-    }
-
-    func testLogitsProcessor() {
-        let processor1 = LogitsProcessor(logitsWarpers: [])
-        let result1 = processor1([])
-        XCTAssertTrue(result1.indices.isEmpty)
-        XCTAssertTrue(result1.logits.isEmpty)
-
-        let processor2 = LogitsProcessor(logitsWarpers: [])
-        let result2 = processor2([2.0, 1.0])
-        XCTAssertEqual(result2.indices, [0, 1])
-        XCTAssertEqual(result2.logits, [2.0, 1.0], accuracy: accuracy)
-
-        let processor3 = LogitsProcessor(
-            logitsWarpers: [TopKLogitsWarper(k: 3)]
-        )
-        let result3 = processor3([2.0, 1.0, 3.0, -5.0])
-        XCTAssertEqual(result3.indices, [2, 0, 1])
-        XCTAssertEqual(result3.logits, [3.0, 2.0, 1.0], accuracy: accuracy)
-
-        let processor4 = LogitsProcessor(
-            logitsWarpers: [TopKLogitsWarper(k: 3), TopPLogitsWarper(p: 0.99)]
-        )
-        let result4 = processor4([2.0, 1.0, 3.0, -5.0, -23.0, 12.5])
-        XCTAssertEqual(result4.indices, [5])
-        XCTAssertEqual(result4.logits, [12.5], accuracy: accuracy)
-
-        let processor5 = LogitsProcessor(
-            logitsWarpers: [TopKLogitsWarper(k: 4), TopPLogitsWarper(p: 0.99)]
-        )
-        let result5 = processor5([2.0, 1.0, 3.0, -5.0, -3.0, 4.5])
-        XCTAssertEqual(result5.indices, [5, 2, 0, 1])
-        XCTAssertEqual(result5.logits, [4.5, 3.0, 2.0, 1.0], accuracy: accuracy)
-    }
-}
diff --git a/Tests/TensorUtilsTests/Resources/tensor-1d-int32.safetensors b/Tests/TensorUtilsTests/Resources/tensor-1d-int32.safetensors
deleted file mode 100644
index b604002..0000000
Binary files a/Tests/TensorUtilsTests/Resources/tensor-1d-int32.safetensors and /dev/null differ
diff --git a/Tests/TensorUtilsTests/Resources/tensor-2d-float64.safetensors b/Tests/TensorUtilsTests/Resources/tensor-2d-float64.safetensors
deleted file mode 100644
index bb9fc26..0000000
Binary files a/Tests/TensorUtilsTests/Resources/tensor-2d-float64.safetensors and /dev/null differ
diff --git a/Tests/TensorUtilsTests/Resources/tensor-3d-float32.safetensors b/Tests/TensorUtilsTests/Resources/tensor-3d-float32.safetensors
deleted file mode 100644
index f9f058d..0000000
Binary files a/Tests/TensorUtilsTests/Resources/tensor-3d-float32.safetensors and /dev/null differ
diff --git a/Tests/TensorUtilsTests/Resources/tensor-4d-float32.safetensors b/Tests/TensorUtilsTests/Resources/tensor-4d-float32.safetensors
deleted file mode 100644
index 0f7a80d..0000000
Binary files a/Tests/TensorUtilsTests/Resources/tensor-4d-float32.safetensors and /dev/null differ
diff --git a/Tests/TensorUtilsTests/TensorUtilsTests.swift b/Tests/TensorUtilsTests/TensorUtilsTests.swift
deleted file mode 100644
index 6355165..0000000
--- a/Tests/TensorUtilsTests/TensorUtilsTests.swift
+++ /dev/null
@@ -1,53 +0,0 @@
-//
-//  TensorUtilsTests.swift
-//
-//  Created by Jan Krukowski on 25/11/2023.
-//
-
-import XCTest
-import CoreML
-@testable import TensorUtils
-
-final class TensorUtilsTests: XCTestCase {
-    private let accuracy: Float = 0.00001
-
-    func testCumsum() {
-        XCTAssertTrue(Math.cumsum([]).isEmpty)
-        XCTAssertEqual(Math.cumsum([1]), [1])
-        XCTAssertEqual(Math.cumsum([1, 2, 3, 4]), [1, 3, 6, 10])
-    }
-
-    func testArgMax() throws {
-        let result1 = Math.argmax([3.0, 4.0, 1.0, 2.0] as [Float], count: 4)
-        XCTAssertEqual(result1.0, 1)
-        XCTAssertEqual(result1.1, 4.0)
-
-        let result2 = Math.argmax32([3.0, 4.0, 1.0, 2.0], count: 4)
-        XCTAssertEqual(result2.0, 1)
-        XCTAssertEqual(result2.1, 4.0)
-
-        let result3 = Math.argmax([3.0, 4.0, 1.0, 2.0] as [Double], count: 4)
-        XCTAssertEqual(result3.0, 1)
-        XCTAssertEqual(result3.1, 4.0)
-
-        let result4 = Math.argmax32(try MLMultiArray([3.0, 4.0, 1.0, 2.0] as [Float]))
-        XCTAssertEqual(result4.0, 1)
-        XCTAssertEqual(result4.1, 4.0)
-
-        let result5 = Math.argmax(try MLMultiArray([3.0, 4.0, 1.0, 2.0] as [Double]))
-        XCTAssertEqual(result5.0, 1)
-        XCTAssertEqual(result5.1, 4.0)
-
-        let result6 = Math.argmax(MLShapedArray(scalars: [3.0, 4.0, 1.0, 2.0] as [Float], shape: [4]))
-        XCTAssertEqual(result6.0, 1)
-        XCTAssertEqual(result6.1, 4.0)
-    }
-
-    func testSoftmax() {
-        XCTAssertEqual(Math.softmax([]),  [])
-        
-        let result1 = Math.softmax([3.0, 4.0, 1.0, 2.0])
-        XCTAssertEqual(result1, [0.23688284, 0.6439143, 0.032058604, 0.08714432], accuracy: accuracy)
-        XCTAssertEqual(result1.reduce(0, +), 1.0, accuracy: accuracy)
-    }
-}
diff --git a/Tests/TensorUtilsTests/TestUtils.swift b/Tests/TensorUtilsTests/TestUtils.swift
deleted file mode 100644
index 7765c8c..0000000
--- a/Tests/TensorUtilsTests/TestUtils.swift
+++ /dev/null
@@ -1,22 +0,0 @@
-import Foundation
-import XCTest
-
-func XCTAssertEqual<T: FloatingPoint>(
-    _ expression1: @autoclosure () throws -> [T],
-    _ expression2: @autoclosure () throws -> [T],
-    accuracy: T,
-    _ message: @autoclosure () -> String = "",
-    file: StaticString = #filePath,
-    line: UInt = #line
-) {
-    do {
-        let lhsEvaluated = try expression1()
-        let rhsEvaluated = try expression2()
-        XCTAssertEqual(lhsEvaluated.count, rhsEvaluated.count, file: file, line: line)
-        for (lhs, rhs) in zip(lhsEvaluated, rhsEvaluated) {
-            XCTAssertEqual(lhs, rhs, accuracy: accuracy, file: file, line: line)
-        }
-    } catch {
-        XCTFail("Unexpected error: \(error)", file: file, line: line)
-    }
-}
diff --git a/Tests/TensorUtilsTests/WeightsTests.swift b/Tests/TensorUtilsTests/WeightsTests.swift
deleted file mode 100644
index 5d2e478..0000000
--- a/Tests/TensorUtilsTests/WeightsTests.swift
+++ /dev/null
@@ -1,104 +0,0 @@
-@testable import TensorUtils
-@testable import Hub
-import XCTest
-
-class WeightsTests: XCTestCase {
-
-    let downloadDestination: URL = {
-        FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first!.appending(component: "huggingface-tests")
-    }()
-
-    var hubApi: HubApi { HubApi(downloadBase: downloadDestination) }
-
-    func testLoadWeightsFromFileURL() async throws {
-        let repo = "google/bert_uncased_L-2_H-128_A-2"
-        let modelDir = try await hubApi.snapshot(from: repo, matching: ["config.json", "model.safetensors"])
-
-        let files = try FileManager.default.contentsOfDirectory(at: modelDir, includingPropertiesForKeys: [.isReadableKey])
-        XCTAssertTrue(files.contains(where: { $0.lastPathComponent == "config.json" }))
-        XCTAssertTrue(files.contains(where: { $0.lastPathComponent == "model.safetensors" }))
-
-        let modelFile = modelDir.appending(path: "/model.safetensors")
-        let weights = try Weights.from(fileURL: modelFile)
-        XCTAssertEqual(weights["bert.embeddings.LayerNorm.bias"]!.dataType, .float32)
-        XCTAssertEqual(weights["bert.embeddings.LayerNorm.bias"]!.count, 128)
-        XCTAssertEqual(weights["bert.embeddings.LayerNorm.bias"]!.shape.count, 1)
-
-        XCTAssertEqual(weights["bert.embeddings.word_embeddings.weight"]!.dataType, .float32)
-        XCTAssertEqual(weights["bert.embeddings.word_embeddings.weight"]!.count, 3906816)
-        XCTAssertEqual(weights["bert.embeddings.word_embeddings.weight"]!.shape.count, 2)
-
-        XCTAssertEqual(weights["bert.embeddings.word_embeddings.weight"]![[0, 0]].floatValue, -0.0041, accuracy: 1e-3)
-        XCTAssertEqual(weights["bert.embeddings.word_embeddings.weight"]![[3, 4]].floatValue, 0.0037, accuracy: 1e-3)
-        XCTAssertEqual(weights["bert.embeddings.word_embeddings.weight"]![[5, 3]].floatValue, -0.5371, accuracy: 1e-3)
-        XCTAssertEqual(weights["bert.embeddings.word_embeddings.weight"]![[7, 8]].floatValue, 0.0460, accuracy: 1e-3)
-        XCTAssertEqual(weights["bert.embeddings.word_embeddings.weight"]![[11, 7]].floatValue, -0.0058, accuracy: 1e-3)
-    }
-
-    func testSafetensorReadTensor1D() throws {
-        let modelFile = Bundle.module.url(forResource: "tensor-1d-int32", withExtension: "safetensors")!
-        let weights: Weights = try Weights.from(fileURL: modelFile)
-        let tensor = weights["embedding"]!
-        XCTAssertEqual(tensor.dataType, .int32)
-        XCTAssertEqual(tensor[[0]], 1)
-        XCTAssertEqual(tensor[[1]], 2)
-        XCTAssertEqual(tensor[[2]], 3)
-    }
-
-    func testSafetensorReadTensor2D() throws {
-        let modelFile = Bundle.module.url(forResource: "tensor-2d-float64", withExtension: "safetensors")!
-        let weights: Weights = try Weights.from(fileURL: modelFile)
-        let tensor = weights["embedding"]!
-        XCTAssertEqual(tensor.dataType, .float64)
-        XCTAssertEqual(tensor[[0, 0]], 1)
-        XCTAssertEqual(tensor[[0, 1]], 2)
-        XCTAssertEqual(tensor[[0, 2]], 3)
-        XCTAssertEqual(tensor[[1, 0]], 24)
-        XCTAssertEqual(tensor[[1, 1]], 25)
-        XCTAssertEqual(tensor[[1, 2]], 26)
-    }
-
-    func testSafetensorReadTensor3D() throws {
-        let modelFile = Bundle.module.url(forResource: "tensor-3d-float32", withExtension: "safetensors")!
-        let weights: Weights = try Weights.from(fileURL: modelFile)
-        let tensor = weights["embedding"]!
-        XCTAssertEqual(tensor.dataType, .float32)
-        XCTAssertEqual(tensor[[0, 0, 0]], 22)
-        XCTAssertEqual(tensor[[0, 0, 1]], 23)
-        XCTAssertEqual(tensor[[0, 0, 2]], 24)
-        XCTAssertEqual(tensor[[0, 1, 0]], 11)
-        XCTAssertEqual(tensor[[0, 1, 1]], 12)
-        XCTAssertEqual(tensor[[0, 1, 2]], 13)
-        XCTAssertEqual(tensor[[1, 0, 0]], 2)
-        XCTAssertEqual(tensor[[1, 0, 1]], 3)
-        XCTAssertEqual(tensor[[1, 0, 2]], 4)
-        XCTAssertEqual(tensor[[1, 1, 0]], 1)
-        XCTAssertEqual(tensor[[1, 1, 1]], 2)
-        XCTAssertEqual(tensor[[1, 1, 2]], 3)
-    }
-
-    func testSafetensorReadTensor4D() throws {
-        let modelFile = Bundle.module.url(forResource: "tensor-4d-float32", withExtension: "safetensors")!
-        let weights: Weights = try Weights.from(fileURL: modelFile)
-        let tensor = weights["embedding"]!
-        XCTAssertEqual(tensor.dataType, .float32)
-        XCTAssertEqual(tensor[[0, 0, 0, 0]], 11)
-        XCTAssertEqual(tensor[[0, 0, 0, 1]], 12)
-        XCTAssertEqual(tensor[[0, 0, 0, 2]], 13)
-        XCTAssertEqual(tensor[[0, 0, 1, 0]], 1)
-        XCTAssertEqual(tensor[[0, 0, 1, 1]], 2)
-        XCTAssertEqual(tensor[[0, 0, 1, 2]], 3)
-        XCTAssertEqual(tensor[[0, 0, 2, 0]], 4)
-        XCTAssertEqual(tensor[[0, 0, 2, 1]], 5)
-        XCTAssertEqual(tensor[[0, 0, 2, 2]], 6)
-        XCTAssertEqual(tensor[[1, 0, 0, 0]], 22)
-        XCTAssertEqual(tensor[[1, 0, 0, 1]], 23)
-        XCTAssertEqual(tensor[[1, 0, 0, 2]], 24)
-        XCTAssertEqual(tensor[[1, 0, 1, 0]], 15)
-        XCTAssertEqual(tensor[[1, 0, 1, 1]], 16)
-        XCTAssertEqual(tensor[[1, 0, 1, 2]], 17)
-        XCTAssertEqual(tensor[[1, 0, 2, 0]], 17)
-        XCTAssertEqual(tensor[[1, 0, 2, 1]], 18)
-        XCTAssertEqual(tensor[[1, 0, 2, 2]], 19)
-    }
-}
diff --git a/Tests/TokenizersTests/TokenizerTests.swift b/Tests/TokenizersTests/TokenizerTests.swift
index eae7003..6fbbd21 100644
--- a/Tests/TokenizersTests/TokenizerTests.swift
+++ b/Tests/TokenizersTests/TokenizerTests.swift
@@ -9,7 +9,6 @@
 import XCTest
 import Hub
 @testable import Tokenizers
-@testable import Models
 
 class GPT2TokenizerTests: TokenizerTests {
     override class var hubModelName: String? { "distilgpt2" }
@@ -277,7 +276,7 @@ class TokenizerTester {
             guard _tokenizer == nil else { return _tokenizer! }
             do {
                 guard let tokenizerConfig = try await configuration!.tokenizerConfig else {
-                    throw TokenizerError.tokenizerConfigNotFound
+                    throw TokenizerError.missingConfig
                 }
                 let tokenizerData = try await configuration!.tokenizerData
                 _tokenizer = try AutoTokenizer.from(tokenizerConfig: tokenizerConfig, tokenizerData: tokenizerData)