From 1d6093bae150fac6fcda5a9515b42e78faf9a221 Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Thu, 6 Mar 2025 09:50:00 +0100 Subject: [PATCH 1/2] Add resumable downloads across app sessions --- Sources/Hub/Downloader.swift | 175 +++++++++++++++++++++------ Sources/Hub/Hub.swift | 6 +- Sources/Hub/HubApi.swift | 42 ++++++- Tests/HubTests/DownloaderTests.swift | 24 ++++ 4 files changed, 202 insertions(+), 45 deletions(-) diff --git a/Sources/Hub/Downloader.swift b/Sources/Hub/Downloader.swift index f52c596..521ad15 100644 --- a/Sources/Hub/Downloader.swift +++ b/Sources/Hub/Downloader.swift @@ -11,6 +11,7 @@ import Foundation class Downloader: NSObject, ObservableObject { private(set) var destination: URL + private(set) var sourceURL: URL private let chunkSize = 10 * 1024 * 1024 // 10MB @@ -24,25 +25,69 @@ class Downloader: NSObject, ObservableObject { enum DownloadError: Error { case invalidDownloadLocation case unexpectedError + case tempFileNotFound } private(set) lazy var downloadState: CurrentValueSubject = CurrentValueSubject(.notStarted) private var stateSubscriber: Cancellable? + + private(set) var tempFilePath: URL? + private(set) var expectedSize: Int? + private(set) var downloadedSize: Int = 0 private var urlSession: URLSession? = nil + + /// Creates the incomplete file path for a given destination URL + /// This is similar to the Hugging Face Hub approach of using .incomplete files + static func incompletePath(for destination: URL) -> URL { + destination.appendingPathExtension("incomplete") + } + + /// Check if an incomplete file exists for the destination and returns its size + /// - Parameter destination: The destination URL for the download + /// - Returns: Size of the incomplete file if it exists, otherwise 0 + static func checkForIncompleteFile(at destination: URL) -> Int { + let incompletePath = Self.incompletePath(for: destination) + + if FileManager.default.fileExists(atPath: incompletePath.path) { + if let attributes = try? FileManager.default.attributesOfItem(atPath: incompletePath.path), + let fileSize = attributes[.size] as? Int + { + print("[Downloader] Found existing incomplete file for \(destination.lastPathComponent): \(fileSize) bytes") + return fileSize + } + } + + return 0 + } init( from url: URL, to destination: URL, using authToken: String? = nil, inBackground: Bool = false, - resumeSize: Int = 0, + resumeSize: Int = 0, // Can be specified manually, but will also check for incomplete files headers: [String: String]? = nil, expectedSize: Int? = nil, timeout: TimeInterval = 10, numRetries: Int = 5 ) { self.destination = destination + sourceURL = url + self.expectedSize = expectedSize + + // Create incomplete file path based on destination + tempFilePath = Downloader.incompletePath(for: destination) + + // If resume size wasn't specified, check for an existing incomplete file + let actualResumeSize: Int = if resumeSize > 0 { + resumeSize + } else { + Downloader.checkForIncompleteFile(at: destination) + } + + downloadedSize = actualResumeSize + super.init() let sessionIdentifier = "swift-transformers.hub.downloader" @@ -55,7 +100,7 @@ class Downloader: NSObject, ObservableObject { urlSession = URLSession(configuration: config, delegate: self, delegateQueue: nil) - setupDownload(from: url, with: authToken, resumeSize: resumeSize, headers: headers, expectedSize: expectedSize, timeout: timeout, numRetries: numRetries) + setUpDownload(from: url, with: authToken, resumeSize: actualResumeSize, headers: headers, expectedSize: expectedSize, timeout: timeout, numRetries: numRetries) } /// Sets up and initiates a file download operation @@ -68,7 +113,7 @@ class Downloader: NSObject, ObservableObject { /// - expectedSize: Expected file size in bytes for validation /// - timeout: Time interval before the request times out /// - numRetries: Number of retry attempts for failed downloads - private func setupDownload( + private func setUpDownload( from url: URL, with authToken: String?, resumeSize: Int, @@ -77,61 +122,101 @@ class Downloader: NSObject, ObservableObject { timeout: TimeInterval, numRetries: Int ) { - downloadState.value = .downloading(0) + print("[Downloader] Setting up download for \(url.lastPathComponent)") + print("[Downloader] Destination: \(destination.path)") + print("[Downloader] Incomplete file: \(tempFilePath?.path ?? "none")") + urlSession?.getAllTasks { tasks in // If there's an existing pending background task with the same URL, let it proceed. if let existing = tasks.filter({ $0.originalRequest?.url == url }).first { switch existing.state { case .running: - // print("Already downloading \(url)") + print("[Downloader] Task already running for \(url.lastPathComponent)") return case .suspended: - // print("Resuming suspended download task for \(url)") + print("[Downloader] Resuming suspended download task for \(url.lastPathComponent)") existing.resume() return - case .canceling: - // print("Starting new download task for \(url), previous was canceling") - break - case .completed: - // print("Starting new download task for \(url), previous is complete but the file is no longer present (I think it's cached)") - break + case .canceling, .completed: + existing.cancel() @unknown default: - // print("Unknown state for running task; cancelling and creating a new one") existing.cancel() } } - var request = URLRequest(url: url) - - // Use headers from argument else create an empty header dictionary - var requestHeaders = headers ?? [:] - - // Populate header auth and range fields - if let authToken { - requestHeaders["Authorization"] = "Bearer \(authToken)" - } - if resumeSize > 0 { - requestHeaders["Range"] = "bytes=\(resumeSize)-" - } - request.timeoutInterval = timeout - request.allHTTPHeaderFields = requestHeaders - Task { do { - // Create a temp file to write - let tempURL = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString) - FileManager.default.createFile(atPath: tempURL.path, contents: nil) - let tempFile = try FileHandle(forWritingTo: tempURL) + // Check if incomplete file exists and get its size + var existingSize = 0 + guard let incompleteFilePath = self.tempFilePath else { + throw DownloadError.unexpectedError + } + + let fileManager = FileManager.default + if fileManager.fileExists(atPath: incompleteFilePath.path) { + let attributes = try fileManager.attributesOfItem(atPath: incompleteFilePath.path) + existingSize = attributes[.size] as? Int ?? 0 + print("[Downloader] Found incomplete file with \(existingSize) bytes") + self.downloadedSize = existingSize + } else { + // Create parent directory if needed + try fileManager.createDirectory(at: incompleteFilePath.deletingLastPathComponent(), withIntermediateDirectories: true) + + // Create empty incomplete file + fileManager.createFile(atPath: incompleteFilePath.path, contents: nil) + print("[Downloader] Created new incomplete file at \(incompleteFilePath.path)") + } + + // Set up the request with appropriate headers + var request = URLRequest(url: url) + var requestHeaders = headers ?? [:] + + if let authToken { + requestHeaders["Authorization"] = "Bearer \(authToken)" + } + + // Set Range header if we're resuming + if existingSize > 0 { + requestHeaders["Range"] = "bytes=\(existingSize)-" + + // Calculate and show initial progress + if let expectedSize, expectedSize > 0 { + let initialProgress = Double(existingSize) / Double(expectedSize) + self.downloadState.value = .downloading(initialProgress) + print("[Downloader] Resuming from \(existingSize)/\(expectedSize) bytes (\(Int(initialProgress * 100))%)") + } else { + self.downloadState.value = .downloading(0) + print("[Downloader] Resuming download from byte \(existingSize)") + } + } else { + self.downloadState.value = .downloading(0) + print("[Downloader] Starting new download") + } + + request.timeoutInterval = timeout + request.allHTTPHeaderFields = requestHeaders + + // Open the incomplete file for writing + let tempFile = try FileHandle(forWritingTo: incompleteFilePath) + + // If resuming, seek to end of file + if existingSize > 0 { + try tempFile.seekToEnd() + } defer { tempFile.closeFile() } - try await self.httpGet(request: request, tempFile: tempFile, resumeSize: resumeSize, numRetries: numRetries, expectedSize: expectedSize) + try await self.httpGet(request: request, tempFile: tempFile, resumeSize: self.downloadedSize, numRetries: numRetries, expectedSize: expectedSize) // Clean up and move the completed download to its final destination tempFile.closeFile() - try FileManager.default.moveDownloadedFile(from: tempURL, to: self.destination) + print("[Downloader] Download completed with total size \(self.downloadedSize) bytes") + print("[Downloader] Moving incomplete file to destination: \(self.destination.path)") + try fileManager.moveDownloadedFile(from: incompleteFilePath, to: self.destination) + print("[Downloader] Download successfully completed") self.downloadState.value = .completed(self.destination) } catch { + print("[Downloader] Error: \(error)") self.downloadState.value = .failed(error) } } @@ -164,20 +249,31 @@ class Downloader: NSObject, ObservableObject { var newRequest = request if resumeSize > 0 { newRequest.setValue("bytes=\(resumeSize)-", forHTTPHeaderField: "Range") + print("[Downloader] Adding Range header: bytes=\(resumeSize)-") } // Start the download and get the byte stream let (asyncBytes, response) = try await session.bytes(for: newRequest) - guard let response = response as? HTTPURLResponse else { + guard let httpResponse = response as? HTTPURLResponse else { + print("[Downloader] Error: Non-HTTP response received") throw DownloadError.unexpectedError } + + print("[Downloader] Received HTTP \(httpResponse.statusCode) response") + if let contentRange = httpResponse.value(forHTTPHeaderField: "Content-Range") { + print("[Downloader] Content-Range: \(contentRange)") + } + if let contentLength = httpResponse.value(forHTTPHeaderField: "Content-Length") { + print("[Downloader] Content-Length: \(contentLength)") + } - guard (200..<300).contains(response.statusCode) else { + guard (200..<300).contains(httpResponse.statusCode) else { + print("[Downloader] Error: HTTP status code \(httpResponse.statusCode)") throw DownloadError.unexpectedError } - var downloadedSize = resumeSize + downloadedSize = resumeSize // Create a buffer to collect bytes before writing to disk var buffer = Data(capacity: chunkSize) @@ -218,7 +314,7 @@ class Downloader: NSObject, ObservableObject { try await httpGet( request: request, tempFile: tempFile, - resumeSize: downloadedSize, + resumeSize: self.downloadedSize, numRetries: newNumRetries - 1, expectedSize: expectedSize ) @@ -227,7 +323,10 @@ class Downloader: NSObject, ObservableObject { // Verify the downloaded file size matches the expected size let actualSize = try tempFile.seekToEnd() if let expectedSize, expectedSize != actualSize { + print("[Downloader] Error: Size mismatch - expected \(expectedSize) bytes but got \(actualSize) bytes") throw DownloadError.unexpectedError + } else { + print("[Downloader] Final verification passed, size: \(actualSize) bytes") } } diff --git a/Sources/Hub/Hub.swift b/Sources/Hub/Hub.swift index fe8f461..0634b69 100644 --- a/Sources/Hub/Hub.swift +++ b/Sources/Hub/Hub.swift @@ -51,13 +51,13 @@ public extension Hub { } } - enum RepoType: String { + enum RepoType: String, Codable { case models case datasets case spaces } - - struct Repo { + + struct Repo: Codable { public let id: String public let type: RepoType diff --git a/Sources/Hub/HubApi.swift b/Sources/Hub/HubApi.swift index 368b805..a3628ef 100644 --- a/Sources/Hub/HubApi.swift +++ b/Sources/Hub/HubApi.swift @@ -426,14 +426,48 @@ public extension HubApi { try prepareDestination() try prepareMetadataDestination() - let downloader = Downloader(from: source, to: destination, using: hfToken, inBackground: backgroundSession, expectedSize: remoteSize) + // Check for an existing incomplete file + let incompleteFile = Downloader.incompletePath(for: destination) + var resumeSize = 0 + + if FileManager.default.fileExists(atPath: incompleteFile.path) { + if let fileAttributes = try? FileManager.default.attributesOfItem(atPath: incompleteFile.path) { + resumeSize = (fileAttributes[FileAttributeKey.size] as? Int) ?? 0 + print("[HubApi] Found existing incomplete file for \(destination.lastPathComponent): \(resumeSize) bytes at \(incompleteFile.path)") + } + } else { + print("[HubApi] No existing incomplete file found for \(destination.lastPathComponent)") + } + + let downloader = Downloader( + from: source, + to: destination, + using: hfToken, + inBackground: backgroundSession, + resumeSize: resumeSize, + expectedSize: remoteSize + ) + let downloadSubscriber = downloader.downloadState.sink { state in - if case let .downloading(progress) = state { + switch state { + case let .downloading(progress): progressHandler(progress) + case .completed, .failed, .notStarted: + break } } - _ = try withExtendedLifetime(downloadSubscriber) { - try downloader.waitUntilDone() + do { + _ = try withExtendedLifetime(downloadSubscriber) { + try downloader.waitUntilDone() + } + + try HubApi.shared.writeDownloadMetadata(commitHash: remoteCommitHash, etag: remoteEtag, metadataPath: metadataDestination) + + return destination + } catch { + // If download fails, leave the incomplete file in place for future resume + print("[HubApi] Download failed but incomplete file is preserved for future resume: \(error.localizedDescription)") + throw error } try hub.writeDownloadMetadata(commitHash: remoteCommitHash, etag: remoteEtag, metadataPath: metadataDestination) diff --git a/Tests/HubTests/DownloaderTests.swift b/Tests/HubTests/DownloaderTests.swift index 2c6ed02..1ef7c0f 100644 --- a/Tests/HubTests/DownloaderTests.swift +++ b/Tests/HubTests/DownloaderTests.swift @@ -168,4 +168,28 @@ final class DownloaderTests: XCTestCase { throw error } } + + func testAutomaticIncompleteFileDetection() throws { + let url = URL(string: "https://huggingface.co/coreml-projects/sam-2-studio/resolve/main/SAM%202%20Studio%201.1.zip")! + let destination = tempDir.appendingPathComponent("SAM%202%20Studio%201.1.zip") + + // Create a sample incomplete file with test content + let incompletePath = Downloader.incompletePath(for: destination) + try FileManager.default.createDirectory(at: incompletePath.deletingLastPathComponent(), withIntermediateDirectories: true) + let testContent = Data(repeating: 65, count: 1024) // 1KB of data + FileManager.default.createFile(atPath: incompletePath.path, contents: testContent) + + // Create a downloader for the same destination + // It should automatically detect and use the incomplete file + let downloader = Downloader( + from: url, + to: destination + ) + + // Verify the downloader found and is using the incomplete file + XCTAssertEqual(downloader.downloadedSize, 1024, "Should have detected the incomplete file and set resumeSize") + + // Clean up + try? FileManager.default.removeItem(at: incompletePath) + } } From 4b6ebd4355bad12716d1f721cf2db7d022f0d6d1 Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Tue, 25 Mar 2025 21:38:09 +0100 Subject: [PATCH 2/2] Remove debug print statements --- Sources/Hub/Downloader.swift | 32 -------------------------------- Sources/Hub/HubApi.swift | 4 ---- 2 files changed, 36 deletions(-) diff --git a/Sources/Hub/Downloader.swift b/Sources/Hub/Downloader.swift index 521ad15..dd6a921 100644 --- a/Sources/Hub/Downloader.swift +++ b/Sources/Hub/Downloader.swift @@ -53,7 +53,6 @@ class Downloader: NSObject, ObservableObject { if let attributes = try? FileManager.default.attributesOfItem(atPath: incompletePath.path), let fileSize = attributes[.size] as? Int { - print("[Downloader] Found existing incomplete file for \(destination.lastPathComponent): \(fileSize) bytes") return fileSize } } @@ -122,19 +121,13 @@ class Downloader: NSObject, ObservableObject { timeout: TimeInterval, numRetries: Int ) { - print("[Downloader] Setting up download for \(url.lastPathComponent)") - print("[Downloader] Destination: \(destination.path)") - print("[Downloader] Incomplete file: \(tempFilePath?.path ?? "none")") - urlSession?.getAllTasks { tasks in // If there's an existing pending background task with the same URL, let it proceed. if let existing = tasks.filter({ $0.originalRequest?.url == url }).first { switch existing.state { case .running: - print("[Downloader] Task already running for \(url.lastPathComponent)") return case .suspended: - print("[Downloader] Resuming suspended download task for \(url.lastPathComponent)") existing.resume() return case .canceling, .completed: @@ -156,7 +149,6 @@ class Downloader: NSObject, ObservableObject { if fileManager.fileExists(atPath: incompleteFilePath.path) { let attributes = try fileManager.attributesOfItem(atPath: incompleteFilePath.path) existingSize = attributes[.size] as? Int ?? 0 - print("[Downloader] Found incomplete file with \(existingSize) bytes") self.downloadedSize = existingSize } else { // Create parent directory if needed @@ -164,7 +156,6 @@ class Downloader: NSObject, ObservableObject { // Create empty incomplete file fileManager.createFile(atPath: incompleteFilePath.path, contents: nil) - print("[Downloader] Created new incomplete file at \(incompleteFilePath.path)") } // Set up the request with appropriate headers @@ -183,14 +174,11 @@ class Downloader: NSObject, ObservableObject { if let expectedSize, expectedSize > 0 { let initialProgress = Double(existingSize) / Double(expectedSize) self.downloadState.value = .downloading(initialProgress) - print("[Downloader] Resuming from \(existingSize)/\(expectedSize) bytes (\(Int(initialProgress * 100))%)") } else { self.downloadState.value = .downloading(0) - print("[Downloader] Resuming download from byte \(existingSize)") } } else { self.downloadState.value = .downloading(0) - print("[Downloader] Starting new download") } request.timeoutInterval = timeout @@ -209,14 +197,9 @@ class Downloader: NSObject, ObservableObject { // Clean up and move the completed download to its final destination tempFile.closeFile() - print("[Downloader] Download completed with total size \(self.downloadedSize) bytes") - print("[Downloader] Moving incomplete file to destination: \(self.destination.path)") try fileManager.moveDownloadedFile(from: incompleteFilePath, to: self.destination) - - print("[Downloader] Download successfully completed") self.downloadState.value = .completed(self.destination) } catch { - print("[Downloader] Error: \(error)") self.downloadState.value = .failed(error) } } @@ -249,27 +232,15 @@ class Downloader: NSObject, ObservableObject { var newRequest = request if resumeSize > 0 { newRequest.setValue("bytes=\(resumeSize)-", forHTTPHeaderField: "Range") - print("[Downloader] Adding Range header: bytes=\(resumeSize)-") } // Start the download and get the byte stream let (asyncBytes, response) = try await session.bytes(for: newRequest) guard let httpResponse = response as? HTTPURLResponse else { - print("[Downloader] Error: Non-HTTP response received") throw DownloadError.unexpectedError } - - print("[Downloader] Received HTTP \(httpResponse.statusCode) response") - if let contentRange = httpResponse.value(forHTTPHeaderField: "Content-Range") { - print("[Downloader] Content-Range: \(contentRange)") - } - if let contentLength = httpResponse.value(forHTTPHeaderField: "Content-Length") { - print("[Downloader] Content-Length: \(contentLength)") - } - guard (200..<300).contains(httpResponse.statusCode) else { - print("[Downloader] Error: HTTP status code \(httpResponse.statusCode)") throw DownloadError.unexpectedError } @@ -323,10 +294,7 @@ class Downloader: NSObject, ObservableObject { // Verify the downloaded file size matches the expected size let actualSize = try tempFile.seekToEnd() if let expectedSize, expectedSize != actualSize { - print("[Downloader] Error: Size mismatch - expected \(expectedSize) bytes but got \(actualSize) bytes") throw DownloadError.unexpectedError - } else { - print("[Downloader] Final verification passed, size: \(actualSize) bytes") } } diff --git a/Sources/Hub/HubApi.swift b/Sources/Hub/HubApi.swift index a3628ef..9be1f7f 100644 --- a/Sources/Hub/HubApi.swift +++ b/Sources/Hub/HubApi.swift @@ -433,10 +433,7 @@ public extension HubApi { if FileManager.default.fileExists(atPath: incompleteFile.path) { if let fileAttributes = try? FileManager.default.attributesOfItem(atPath: incompleteFile.path) { resumeSize = (fileAttributes[FileAttributeKey.size] as? Int) ?? 0 - print("[HubApi] Found existing incomplete file for \(destination.lastPathComponent): \(resumeSize) bytes at \(incompleteFile.path)") } - } else { - print("[HubApi] No existing incomplete file found for \(destination.lastPathComponent)") } let downloader = Downloader( @@ -466,7 +463,6 @@ public extension HubApi { return destination } catch { // If download fails, leave the incomplete file in place for future resume - print("[HubApi] Download failed but incomplete file is preserved for future resume: \(error.localizedDescription)") throw error }