From dfaed4f219a4707fa638ac3b9b9a6726ee22e3a5 Mon Sep 17 00:00:00 2001 From: Arda Atahan Ibis Date: Mon, 14 Apr 2025 12:22:42 -0700 Subject: [PATCH] cleanup and add new test cases --- Sources/Hub/Downloader.swift | 63 +++++------------------ Sources/Hub/HubApi.swift | 26 +++++----- Tests/HubTests/DownloaderTests.swift | 49 +++++++++--------- Tests/HubTests/HubApiTests.swift | 74 ++++++++++++++++++++++++++++ 4 files changed, 122 insertions(+), 90 deletions(-) diff --git a/Sources/Hub/Downloader.swift b/Sources/Hub/Downloader.swift index dd6a921..1f27c4b 100644 --- a/Sources/Hub/Downloader.swift +++ b/Sources/Hub/Downloader.swift @@ -11,7 +11,6 @@ import Foundation class Downloader: NSObject, ObservableObject { private(set) var destination: URL - private(set) var sourceURL: URL private let chunkSize = 10 * 1024 * 1024 // 10MB @@ -31,28 +30,18 @@ class Downloader: NSObject, ObservableObject { private(set) lazy var downloadState: CurrentValueSubject = CurrentValueSubject(.notStarted) private var stateSubscriber: Cancellable? - private(set) var tempFilePath: URL? + private(set) var tempFilePath: URL private(set) var expectedSize: Int? private(set) var downloadedSize: Int = 0 private var urlSession: URLSession? = nil - /// Creates the incomplete file path for a given destination URL - /// This is similar to the Hugging Face Hub approach of using .incomplete files - static func incompletePath(for destination: URL) -> URL { - destination.appendingPathExtension("incomplete") - } - /// Check if an incomplete file exists for the destination and returns its size /// - Parameter destination: The destination URL for the download /// - Returns: Size of the incomplete file if it exists, otherwise 0 - static func checkForIncompleteFile(at destination: URL) -> Int { - let incompletePath = Self.incompletePath(for: destination) - + static func incompleteFileSize(at incompletePath: URL) -> Int { if FileManager.default.fileExists(atPath: incompletePath.path) { - if let attributes = try? FileManager.default.attributesOfItem(atPath: incompletePath.path), - let fileSize = attributes[.size] as? Int - { + if let attributes = try? FileManager.default.attributesOfItem(atPath: incompletePath.path), let fileSize = attributes[.size] as? Int { return fileSize } } @@ -63,29 +52,22 @@ class Downloader: NSObject, ObservableObject { init( from url: URL, to destination: URL, + incompleteDestination: URL, using authToken: String? = nil, inBackground: Bool = false, - resumeSize: Int = 0, // Can be specified manually, but will also check for incomplete files headers: [String: String]? = nil, expectedSize: Int? = nil, timeout: TimeInterval = 10, numRetries: Int = 5 ) { self.destination = destination - sourceURL = url self.expectedSize = expectedSize // Create incomplete file path based on destination - tempFilePath = Downloader.incompletePath(for: destination) + self.tempFilePath = incompleteDestination // If resume size wasn't specified, check for an existing incomplete file - let actualResumeSize: Int = if resumeSize > 0 { - resumeSize - } else { - Downloader.checkForIncompleteFile(at: destination) - } - - downloadedSize = actualResumeSize + let resumeSize = Self.incompleteFileSize(at: incompleteDestination) super.init() let sessionIdentifier = "swift-transformers.hub.downloader" @@ -99,7 +81,7 @@ class Downloader: NSObject, ObservableObject { urlSession = URLSession(configuration: config, delegate: self, delegateQueue: nil) - setUpDownload(from: url, with: authToken, resumeSize: actualResumeSize, headers: headers, expectedSize: expectedSize, timeout: timeout, numRetries: numRetries) + setUpDownload(from: url, with: authToken, resumeSize: resumeSize, headers: headers, expectedSize: expectedSize, timeout: timeout, numRetries: numRetries) } /// Sets up and initiates a file download operation @@ -139,25 +121,6 @@ class Downloader: NSObject, ObservableObject { Task { do { - // Check if incomplete file exists and get its size - var existingSize = 0 - guard let incompleteFilePath = self.tempFilePath else { - throw DownloadError.unexpectedError - } - - let fileManager = FileManager.default - if fileManager.fileExists(atPath: incompleteFilePath.path) { - let attributes = try fileManager.attributesOfItem(atPath: incompleteFilePath.path) - existingSize = attributes[.size] as? Int ?? 0 - self.downloadedSize = existingSize - } else { - // Create parent directory if needed - try fileManager.createDirectory(at: incompleteFilePath.deletingLastPathComponent(), withIntermediateDirectories: true) - - // Create empty incomplete file - fileManager.createFile(atPath: incompleteFilePath.path, contents: nil) - } - // Set up the request with appropriate headers var request = URLRequest(url: url) var requestHeaders = headers ?? [:] @@ -167,12 +130,12 @@ class Downloader: NSObject, ObservableObject { } // Set Range header if we're resuming - if existingSize > 0 { - requestHeaders["Range"] = "bytes=\(existingSize)-" + if resumeSize > 0 { + requestHeaders["Range"] = "bytes=\(resumeSize)-" // Calculate and show initial progress if let expectedSize, expectedSize > 0 { - let initialProgress = Double(existingSize) / Double(expectedSize) + let initialProgress = Double(resumeSize) / Double(expectedSize) self.downloadState.value = .downloading(initialProgress) } else { self.downloadState.value = .downloading(0) @@ -185,10 +148,10 @@ class Downloader: NSObject, ObservableObject { request.allHTTPHeaderFields = requestHeaders // Open the incomplete file for writing - let tempFile = try FileHandle(forWritingTo: incompleteFilePath) + let tempFile = try FileHandle(forWritingTo: self.tempFilePath) // If resuming, seek to end of file - if existingSize > 0 { + if resumeSize > 0 { try tempFile.seekToEnd() } @@ -197,7 +160,7 @@ class Downloader: NSObject, ObservableObject { // Clean up and move the completed download to its final destination tempFile.closeFile() - try fileManager.moveDownloadedFile(from: incompleteFilePath, to: self.destination) + try FileManager.default.moveDownloadedFile(from: self.tempFilePath, to: self.destination) self.downloadState.value = .completed(self.destination) } catch { self.downloadState.value = .failed(error) diff --git a/Sources/Hub/HubApi.swift b/Sources/Hub/HubApi.swift index d3a1e3d..1430229 100644 --- a/Sources/Hub/HubApi.swift +++ b/Sources/Hub/HubApi.swift @@ -361,6 +361,10 @@ public extension HubApi { repoMetadataDestination.appending(path: relativeFilename + ".metadata") } + var incompleteDestination: URL { + repoMetadataDestination.appending(path: relativeFilename + ".incomplete") + } + var downloaded: Bool { FileManager.default.fileExists(atPath: destination.path) } @@ -370,9 +374,13 @@ public extension HubApi { try FileManager.default.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil) } - func prepareMetadataDestination() throws { - let directoryURL = metadataDestination.deletingLastPathComponent() + // We're using incomplete destination to prepare cache destination because incomplete files include lfs + non-lfs files (vs only lfs for metadata files) + func prepareCacheDestination() throws { + let directoryURL = incompleteDestination.deletingLastPathComponent() try FileManager.default.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil) + if !FileManager.default.fileExists(atPath: incompleteDestination.path) { + try "".write(to: incompleteDestination, atomically: true, encoding: .utf8) + } } /// Note we go from Combine in Downloader to callback-based progress reporting @@ -423,24 +431,14 @@ public extension HubApi { // Otherwise, let's download the file! try prepareDestination() - try prepareMetadataDestination() + try prepareCacheDestination() - // Check for an existing incomplete file - let incompleteFile = Downloader.incompletePath(for: destination) - var resumeSize = 0 - - if FileManager.default.fileExists(atPath: incompleteFile.path) { - if let fileAttributes = try? FileManager.default.attributesOfItem(atPath: incompleteFile.path) { - resumeSize = (fileAttributes[FileAttributeKey.size] as? Int) ?? 0 - } - } - let downloader = Downloader( from: source, to: destination, + incompleteDestination: incompleteDestination, using: hfToken, inBackground: backgroundSession, - resumeSize: resumeSize, expectedSize: remoteSize ) diff --git a/Tests/HubTests/DownloaderTests.swift b/Tests/HubTests/DownloaderTests.swift index 1ef7c0f..3e7df3e 100644 --- a/Tests/HubTests/DownloaderTests.swift +++ b/Tests/HubTests/DownloaderTests.swift @@ -59,9 +59,16 @@ final class DownloaderTests: XCTestCase { """ + let cacheDir = tempDir.appendingPathComponent("cache") + try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true) + + let incompleteDestination = cacheDir.appendingPathComponent("config.json.incomplete") + FileManager.default.createFile(atPath: incompleteDestination.path, contents: nil, attributes: nil) + let downloader = Downloader( from: url, - to: destination + to: destination, + incompleteDestination: incompleteDestination ) // Store subscriber outside the continuation to maintain its lifecycle @@ -95,10 +102,17 @@ final class DownloaderTests: XCTestCase { let url = URL(string: "https://huggingface.co/coreml-projects/Llama-2-7b-chat-coreml/resolve/main/config.json")! let destination = tempDir.appendingPathComponent("config.json") + let cacheDir = tempDir.appendingPathComponent("cache") + try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true) + + let incompleteDestination = cacheDir.appendingPathComponent("config.json.incomplete") + FileManager.default.createFile(atPath: incompleteDestination.path, contents: nil, attributes: nil) + // Create downloader with incorrect expected size let downloader = Downloader( from: url, to: destination, + incompleteDestination: incompleteDestination, expectedSize: 999999 // Incorrect size ) @@ -120,10 +134,17 @@ final class DownloaderTests: XCTestCase { // Create parent directory if it doesn't exist try FileManager.default.createDirectory(at: destination.deletingLastPathComponent(), withIntermediateDirectories: true) - + + let cacheDir = tempDir.appendingPathComponent("cache") + try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true) + + let incompleteDestination = cacheDir.appendingPathComponent("config.json.incomplete") + FileManager.default.createFile(atPath: incompleteDestination.path, contents: nil, attributes: nil) + let downloader = Downloader( from: url, to: destination, + incompleteDestination: incompleteDestination, expectedSize: 73194001 // Correct size for verification ) @@ -168,28 +189,4 @@ final class DownloaderTests: XCTestCase { throw error } } - - func testAutomaticIncompleteFileDetection() throws { - let url = URL(string: "https://huggingface.co/coreml-projects/sam-2-studio/resolve/main/SAM%202%20Studio%201.1.zip")! - let destination = tempDir.appendingPathComponent("SAM%202%20Studio%201.1.zip") - - // Create a sample incomplete file with test content - let incompletePath = Downloader.incompletePath(for: destination) - try FileManager.default.createDirectory(at: incompletePath.deletingLastPathComponent(), withIntermediateDirectories: true) - let testContent = Data(repeating: 65, count: 1024) // 1KB of data - FileManager.default.createFile(atPath: incompletePath.path, contents: testContent) - - // Create a downloader for the same destination - // It should automatically detect and use the incomplete file - let downloader = Downloader( - from: url, - to: destination - ) - - // Verify the downloader found and is using the incomplete file - XCTAssertEqual(downloader.downloadedSize, 1024, "Should have detected the incomplete file and set resumeSize") - - // Clean up - try? FileManager.default.removeItem(at: incompletePath) - } } diff --git a/Tests/HubTests/HubApiTests.swift b/Tests/HubTests/HubApiTests.swift index 451816f..cf2c253 100644 --- a/Tests/HubTests/HubApiTests.swift +++ b/Tests/HubTests/HubApiTests.swift @@ -968,4 +968,78 @@ class SnapshotDownloadTests: XCTestCase { XCTFail("Unexpected error: \(error)") } } + + func testResumeDownloadFromEmptyIncomplete() async throws { + let hubApi = HubApi(downloadBase: downloadDestination) + var lastProgress: Progress? = nil + var downloadedTo = FileManager.default.homeDirectoryForCurrentUser + .appendingPathComponent("Library/Caches/huggingface-tests/models/coreml-projects/Llama-2-7b-chat-coreml") + + let metadataDestination = downloadedTo.appending(component: ".cache/huggingface/download") + + try FileManager.default.createDirectory(at: metadataDestination, withIntermediateDirectories: true, attributes: nil) + try "".write(to: metadataDestination.appendingPathComponent("config.json.incomplete"), atomically: true, encoding: .utf8) + downloadedTo = try await hubApi.snapshot(from: repo, matching: "config.json") { progress in + print("Total Progress: \(progress.fractionCompleted)") + print("Files Completed: \(progress.completedUnitCount) of \(progress.totalUnitCount)") + lastProgress = progress + } + XCTAssertEqual(lastProgress?.fractionCompleted, 1) + XCTAssertEqual(lastProgress?.completedUnitCount, 1) + XCTAssertEqual(downloadedTo, downloadDestination.appending(path: "models/\(repo)")) + + let fileContents = try String(contentsOfFile: downloadedTo.appendingPathComponent("config.json").path) + + let expected = """ + { + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "model_type": "llama", + "pad_token_id": 0, + "vocab_size": 32000 + } + + """ + XCTAssertTrue(fileContents.contains(expected)) + } + + func testResumeDownloadFromNonEmptyIncomplete() async throws { + let hubApi = HubApi(downloadBase: downloadDestination) + var lastProgress: Progress? = nil + var downloadedTo = FileManager.default.homeDirectoryForCurrentUser + .appendingPathComponent("Library/Caches/huggingface-tests/models/coreml-projects/Llama-2-7b-chat-coreml") + + let metadataDestination = downloadedTo.appending(component: ".cache/huggingface/download") + + try FileManager.default.createDirectory(at: metadataDestination, withIntermediateDirectories: true, attributes: nil) + try "X".write(to: metadataDestination.appendingPathComponent("config.json.incomplete"), atomically: true, encoding: .utf8) + downloadedTo = try await hubApi.snapshot(from: repo, matching: "config.json") { progress in + print("Total Progress: \(progress.fractionCompleted)") + print("Files Completed: \(progress.completedUnitCount) of \(progress.totalUnitCount)") + lastProgress = progress + } + XCTAssertEqual(lastProgress?.fractionCompleted, 1) + XCTAssertEqual(lastProgress?.completedUnitCount, 1) + XCTAssertEqual(downloadedTo, downloadDestination.appending(path: "models/\(repo)")) + + let fileContents = try String(contentsOfFile: downloadedTo.appendingPathComponent("config.json").path) + + let expected = """ + X + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "model_type": "llama", + "pad_token_id": 0, + "vocab_size": 32000 + } + + """ + XCTAssertTrue(fileContents.contains(expected)) + } }