Skip to content

Commit 1ca116b

Browse files
Add incomplete download support (#198)
* Add resumable downloads across app sessions * Remove debug print statements * cleanup and add new test cases * add download cancellation handling and fix download progress update * create target files right before moving and add etag to incomplete file names for versioning * properly cancel downloader task and fix formatting * remove unnecessary tabs in test cases --------- Co-authored-by: Anthony DePasquale <anthony@depasquale.org>
1 parent 940a4fa commit 1ca116b

File tree

5 files changed

+320
-120
lines changed

5 files changed

+320
-120
lines changed

Sources/Hub/Downloader.swift

Lines changed: 88 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,39 @@ class Downloader: NSObject, ObservableObject {
2424
enum DownloadError: Error {
2525
case invalidDownloadLocation
2626
case unexpectedError
27+
case tempFileNotFound
2728
}
2829

2930
private(set) lazy var downloadState: CurrentValueSubject<DownloadState, Never> = CurrentValueSubject(.notStarted)
3031
private var stateSubscriber: Cancellable?
32+
33+
private(set) var tempFilePath: URL
34+
private(set) var expectedSize: Int?
35+
private(set) var downloadedSize: Int = 0
3136

32-
private var urlSession: URLSession? = nil
33-
37+
var session: URLSession? = nil
38+
var downloadTask: Task<Void, Error>? = nil
39+
3440
init(
3541
from url: URL,
3642
to destination: URL,
43+
incompleteDestination: URL,
3744
using authToken: String? = nil,
3845
inBackground: Bool = false,
39-
resumeSize: Int = 0,
4046
headers: [String: String]? = nil,
4147
expectedSize: Int? = nil,
4248
timeout: TimeInterval = 10,
4349
numRetries: Int = 5
4450
) {
4551
self.destination = destination
52+
self.expectedSize = expectedSize
53+
54+
// Create incomplete file path based on destination
55+
tempFilePath = incompleteDestination
56+
57+
// If resume size wasn't specified, check for an existing incomplete file
58+
let resumeSize = Self.incompleteFileSize(at: incompleteDestination)
59+
4660
super.init()
4761
let sessionIdentifier = "swift-transformers.hub.downloader"
4862

@@ -53,9 +67,22 @@ class Downloader: NSObject, ObservableObject {
5367
config.sessionSendsLaunchEvents = true
5468
}
5569

56-
urlSession = URLSession(configuration: config, delegate: self, delegateQueue: nil)
70+
session = URLSession(configuration: config, delegate: self, delegateQueue: nil)
5771

58-
setupDownload(from: url, with: authToken, resumeSize: resumeSize, headers: headers, expectedSize: expectedSize, timeout: timeout, numRetries: numRetries)
72+
setUpDownload(from: url, with: authToken, resumeSize: resumeSize, headers: headers, expectedSize: expectedSize, timeout: timeout, numRetries: numRetries)
73+
}
74+
75+
/// Check if an incomplete file exists for the destination and returns its size
76+
/// - Parameter destination: The destination URL for the download
77+
/// - Returns: Size of the incomplete file if it exists, otherwise 0
78+
static func incompleteFileSize(at incompletePath: URL) -> Int {
79+
if FileManager.default.fileExists(atPath: incompletePath.path) {
80+
if let attributes = try? FileManager.default.attributesOfItem(atPath: incompletePath.path), let fileSize = attributes[.size] as? Int {
81+
return fileSize
82+
}
83+
}
84+
85+
return 0
5986
}
6087

6188
/// Sets up and initiates a file download operation
@@ -68,7 +95,7 @@ class Downloader: NSObject, ObservableObject {
6895
/// - expectedSize: Expected file size in bytes for validation
6996
/// - timeout: Time interval before the request times out
7097
/// - numRetries: Number of retry attempts for failed downloads
71-
private func setupDownload(
98+
private func setUpDownload(
7299
from url: URL,
73100
with authToken: String?,
74101
resumeSize: Int,
@@ -77,59 +104,67 @@ class Downloader: NSObject, ObservableObject {
77104
timeout: TimeInterval,
78105
numRetries: Int
79106
) {
80-
downloadState.value = .downloading(0)
81-
urlSession?.getAllTasks { tasks in
107+
session?.getAllTasks { tasks in
82108
// If there's an existing pending background task with the same URL, let it proceed.
83109
if let existing = tasks.filter({ $0.originalRequest?.url == url }).first {
84110
switch existing.state {
85111
case .running:
86-
// print("Already downloading \(url)")
87112
return
88113
case .suspended:
89-
// print("Resuming suspended download task for \(url)")
90114
existing.resume()
91115
return
92-
case .canceling:
93-
// print("Starting new download task for \(url), previous was canceling")
94-
break
95-
case .completed:
96-
// print("Starting new download task for \(url), previous is complete but the file is no longer present (I think it's cached)")
97-
break
116+
case .canceling, .completed:
117+
existing.cancel()
98118
@unknown default:
99-
// print("Unknown state for running task; cancelling and creating a new one")
100119
existing.cancel()
101120
}
102121
}
103-
var request = URLRequest(url: url)
104122

105-
// Use headers from argument else create an empty header dictionary
106-
var requestHeaders = headers ?? [:]
107-
108-
// Populate header auth and range fields
109-
if let authToken {
110-
requestHeaders["Authorization"] = "Bearer \(authToken)"
111-
}
112-
if resumeSize > 0 {
113-
requestHeaders["Range"] = "bytes=\(resumeSize)-"
114-
}
115-
116-
request.timeoutInterval = timeout
117-
request.allHTTPHeaderFields = requestHeaders
118-
119-
Task {
123+
self.downloadTask = Task {
120124
do {
121-
// Create a temp file to write
122-
let tempURL = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString)
123-
FileManager.default.createFile(atPath: tempURL.path, contents: nil)
124-
let tempFile = try FileHandle(forWritingTo: tempURL)
125+
// Set up the request with appropriate headers
126+
var request = URLRequest(url: url)
127+
var requestHeaders = headers ?? [:]
128+
129+
if let authToken {
130+
requestHeaders["Authorization"] = "Bearer \(authToken)"
131+
}
132+
133+
self.downloadedSize = resumeSize
134+
135+
// Set Range header if we're resuming
136+
if resumeSize > 0 {
137+
requestHeaders["Range"] = "bytes=\(resumeSize)-"
138+
139+
// Calculate and show initial progress
140+
if let expectedSize, expectedSize > 0 {
141+
let initialProgress = Double(resumeSize) / Double(expectedSize)
142+
self.downloadState.value = .downloading(initialProgress)
143+
} else {
144+
self.downloadState.value = .downloading(0)
145+
}
146+
} else {
147+
self.downloadState.value = .downloading(0)
148+
}
125149

126-
defer { tempFile.closeFile() }
127-
try await self.httpGet(request: request, tempFile: tempFile, resumeSize: resumeSize, numRetries: numRetries, expectedSize: expectedSize)
150+
request.timeoutInterval = timeout
151+
request.allHTTPHeaderFields = requestHeaders
152+
153+
// Open the incomplete file for writing
154+
let tempFile = try FileHandle(forWritingTo: self.tempFilePath)
155+
156+
// If resuming, seek to end of file
157+
if resumeSize > 0 {
158+
try tempFile.seekToEnd()
159+
}
160+
161+
try await self.httpGet(request: request, tempFile: tempFile, resumeSize: self.downloadedSize, numRetries: numRetries, expectedSize: expectedSize)
128162

129163
// Clean up and move the completed download to its final destination
130164
tempFile.closeFile()
131-
try FileManager.default.moveDownloadedFile(from: tempURL, to: self.destination)
132165

166+
try Task.checkCancellation()
167+
try FileManager.default.moveDownloadedFile(from: self.tempFilePath, to: self.destination)
133168
self.downloadState.value = .completed(self.destination)
134169
} catch {
135170
self.downloadState.value = .failed(error)
@@ -156,7 +191,7 @@ class Downloader: NSObject, ObservableObject {
156191
numRetries: Int,
157192
expectedSize: Int?
158193
) async throws {
159-
guard let session = urlSession else {
194+
guard let session else {
160195
throw DownloadError.unexpectedError
161196
}
162197

@@ -169,16 +204,13 @@ class Downloader: NSObject, ObservableObject {
169204
// Start the download and get the byte stream
170205
let (asyncBytes, response) = try await session.bytes(for: newRequest)
171206

172-
guard let response = response as? HTTPURLResponse else {
207+
guard let httpResponse = response as? HTTPURLResponse else {
173208
throw DownloadError.unexpectedError
174209
}
175-
176-
guard (200..<300).contains(response.statusCode) else {
210+
guard (200..<300).contains(httpResponse.statusCode) else {
177211
throw DownloadError.unexpectedError
178212
}
179213

180-
var downloadedSize = resumeSize
181-
182214
// Create a buffer to collect bytes before writing to disk
183215
var buffer = Data(capacity: chunkSize)
184216

@@ -213,12 +245,12 @@ class Downloader: NSObject, ObservableObject {
213245
try await Task.sleep(nanoseconds: 1_000_000_000)
214246

215247
let config = URLSessionConfiguration.default
216-
self.urlSession = URLSession(configuration: config, delegate: self, delegateQueue: nil)
248+
self.session = URLSession(configuration: config, delegate: self, delegateQueue: nil)
217249

218250
try await httpGet(
219251
request: request,
220252
tempFile: tempFile,
221-
resumeSize: downloadedSize,
253+
resumeSize: self.downloadedSize,
222254
numRetries: newNumRetries - 1,
223255
expectedSize: expectedSize
224256
)
@@ -252,7 +284,9 @@ class Downloader: NSObject, ObservableObject {
252284
}
253285

254286
func cancel() {
255-
urlSession?.invalidateAndCancel()
287+
session?.invalidateAndCancel()
288+
downloadTask?.cancel()
289+
downloadState.value = .failed(URLError(.cancelled))
256290
}
257291
}
258292

@@ -284,9 +318,13 @@ extension Downloader: URLSessionDownloadDelegate {
284318

285319
extension FileManager {
286320
func moveDownloadedFile(from srcURL: URL, to dstURL: URL) throws {
287-
if fileExists(atPath: dstURL.path) {
321+
if fileExists(atPath: dstURL.path()) {
288322
try removeItem(at: dstURL)
289323
}
324+
325+
let directoryURL = dstURL.deletingLastPathComponent()
326+
try createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil)
327+
290328
try moveItem(at: srcURL, to: dstURL)
291329
}
292330
}

Sources/Hub/Hub.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,13 @@ public extension Hub {
5151
}
5252
}
5353

54-
enum RepoType: String {
54+
enum RepoType: String, Codable {
5555
case models
5656
case datasets
5757
case spaces
5858
}
59-
60-
struct Repo {
59+
60+
struct Repo: Codable {
6161
public let id: String
6262
public let type: RepoType
6363

Sources/Hub/HubApi.swift

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -366,14 +366,13 @@ public extension HubApi {
366366
FileManager.default.fileExists(atPath: destination.path)
367367
}
368368

369-
func prepareDestination() throws {
370-
let directoryURL = destination.deletingLastPathComponent()
371-
try FileManager.default.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil)
372-
}
373-
374-
func prepareMetadataDestination() throws {
375-
let directoryURL = metadataDestination.deletingLastPathComponent()
369+
/// We're using incomplete destination to prepare cache destination because incomplete files include lfs + non-lfs files (vs only lfs for metadata files)
370+
func prepareCacheDestination(_ incompleteDestination: URL) throws {
371+
let directoryURL = incompleteDestination.deletingLastPathComponent()
376372
try FileManager.default.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil)
373+
if !FileManager.default.fileExists(atPath: incompleteDestination.path) {
374+
try "".write(to: incompleteDestination, atomically: true, encoding: .utf8)
375+
}
377376
}
378377

379378
/// Note we go from Combine in Downloader to callback-based progress reporting
@@ -423,22 +422,42 @@ public extension HubApi {
423422
}
424423

425424
// Otherwise, let's download the file!
426-
try prepareDestination()
427-
try prepareMetadataDestination()
428-
429-
let downloader = Downloader(from: source, to: destination, using: hfToken, inBackground: backgroundSession, expectedSize: remoteSize)
430-
let downloadSubscriber = downloader.downloadState.sink { state in
431-
if case let .downloading(progress) = state {
432-
progressHandler(progress)
425+
let incompleteDestination = repoMetadataDestination.appending(path: relativeFilename + ".\(remoteEtag).incomplete")
426+
try prepareCacheDestination(incompleteDestination)
427+
428+
let downloader = Downloader(
429+
from: source,
430+
to: destination,
431+
incompleteDestination: incompleteDestination,
432+
using: hfToken,
433+
inBackground: backgroundSession,
434+
expectedSize: remoteSize
435+
)
436+
437+
return try await withTaskCancellationHandler {
438+
let downloadSubscriber = downloader.downloadState.sink { state in
439+
switch state {
440+
case let .downloading(progress):
441+
progressHandler(progress)
442+
case .completed, .failed, .notStarted:
443+
break
444+
}
433445
}
446+
do {
447+
_ = try withExtendedLifetime(downloadSubscriber) {
448+
try downloader.waitUntilDone()
449+
}
450+
451+
try hub.writeDownloadMetadata(commitHash: remoteCommitHash, etag: remoteEtag, metadataPath: metadataDestination)
452+
453+
return destination
454+
} catch {
455+
// If download fails, leave the incomplete file in place for future resume
456+
throw error
457+
}
458+
} onCancel: {
459+
downloader.cancel()
434460
}
435-
_ = try withExtendedLifetime(downloadSubscriber) {
436-
try downloader.waitUntilDone()
437-
}
438-
439-
try hub.writeDownloadMetadata(commitHash: remoteCommitHash, etag: remoteEtag, metadataPath: metadataDestination)
440-
441-
return destination
442461
}
443462
}
444463

0 commit comments

Comments
 (0)