Skip to content

Commit 34ccd41

Browse files
ardaatahanDePasqualeOrg
authored andcommitted
cleanup and add new test cases
1 parent 4b6ebd4 commit 34ccd41

File tree

4 files changed

+122
-90
lines changed

4 files changed

+122
-90
lines changed

Sources/Hub/Downloader.swift

+13-50
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import Foundation
1111

1212
class Downloader: NSObject, ObservableObject {
1313
private(set) var destination: URL
14-
private(set) var sourceURL: URL
1514

1615
private let chunkSize = 10 * 1024 * 1024 // 10MB
1716

@@ -31,28 +30,18 @@ class Downloader: NSObject, ObservableObject {
3130
private(set) lazy var downloadState: CurrentValueSubject<DownloadState, Never> = CurrentValueSubject(.notStarted)
3231
private var stateSubscriber: Cancellable?
3332

34-
private(set) var tempFilePath: URL?
33+
private(set) var tempFilePath: URL
3534
private(set) var expectedSize: Int?
3635
private(set) var downloadedSize: Int = 0
3736

3837
private var urlSession: URLSession? = nil
3938

40-
/// Creates the incomplete file path for a given destination URL
41-
/// This is similar to the Hugging Face Hub approach of using .incomplete files
42-
static func incompletePath(for destination: URL) -> URL {
43-
destination.appendingPathExtension("incomplete")
44-
}
45-
4639
/// Check if an incomplete file exists for the destination and returns its size
4740
/// - Parameter destination: The destination URL for the download
4841
/// - Returns: Size of the incomplete file if it exists, otherwise 0
49-
static func checkForIncompleteFile(at destination: URL) -> Int {
50-
let incompletePath = Self.incompletePath(for: destination)
51-
42+
static func incompleteFileSize(at incompletePath: URL) -> Int {
5243
if FileManager.default.fileExists(atPath: incompletePath.path) {
53-
if let attributes = try? FileManager.default.attributesOfItem(atPath: incompletePath.path),
54-
let fileSize = attributes[.size] as? Int
55-
{
44+
if let attributes = try? FileManager.default.attributesOfItem(atPath: incompletePath.path), let fileSize = attributes[.size] as? Int {
5645
return fileSize
5746
}
5847
}
@@ -63,29 +52,22 @@ class Downloader: NSObject, ObservableObject {
6352
init(
6453
from url: URL,
6554
to destination: URL,
55+
incompleteDestination: URL,
6656
using authToken: String? = nil,
6757
inBackground: Bool = false,
68-
resumeSize: Int = 0, // Can be specified manually, but will also check for incomplete files
6958
headers: [String: String]? = nil,
7059
expectedSize: Int? = nil,
7160
timeout: TimeInterval = 10,
7261
numRetries: Int = 5
7362
) {
7463
self.destination = destination
75-
sourceURL = url
7664
self.expectedSize = expectedSize
7765

7866
// Create incomplete file path based on destination
79-
tempFilePath = Downloader.incompletePath(for: destination)
67+
self.tempFilePath = incompleteDestination
8068

8169
// If resume size wasn't specified, check for an existing incomplete file
82-
let actualResumeSize: Int = if resumeSize > 0 {
83-
resumeSize
84-
} else {
85-
Downloader.checkForIncompleteFile(at: destination)
86-
}
87-
88-
downloadedSize = actualResumeSize
70+
let resumeSize = Self.incompleteFileSize(at: incompleteDestination)
8971

9072
super.init()
9173
let sessionIdentifier = "swift-transformers.hub.downloader"
@@ -99,7 +81,7 @@ class Downloader: NSObject, ObservableObject {
9981

10082
urlSession = URLSession(configuration: config, delegate: self, delegateQueue: nil)
10183

102-
setUpDownload(from: url, with: authToken, resumeSize: actualResumeSize, headers: headers, expectedSize: expectedSize, timeout: timeout, numRetries: numRetries)
84+
setUpDownload(from: url, with: authToken, resumeSize: resumeSize, headers: headers, expectedSize: expectedSize, timeout: timeout, numRetries: numRetries)
10385
}
10486

10587
/// Sets up and initiates a file download operation
@@ -139,25 +121,6 @@ class Downloader: NSObject, ObservableObject {
139121

140122
Task {
141123
do {
142-
// Check if incomplete file exists and get its size
143-
var existingSize = 0
144-
guard let incompleteFilePath = self.tempFilePath else {
145-
throw DownloadError.unexpectedError
146-
}
147-
148-
let fileManager = FileManager.default
149-
if fileManager.fileExists(atPath: incompleteFilePath.path) {
150-
let attributes = try fileManager.attributesOfItem(atPath: incompleteFilePath.path)
151-
existingSize = attributes[.size] as? Int ?? 0
152-
self.downloadedSize = existingSize
153-
} else {
154-
// Create parent directory if needed
155-
try fileManager.createDirectory(at: incompleteFilePath.deletingLastPathComponent(), withIntermediateDirectories: true)
156-
157-
// Create empty incomplete file
158-
fileManager.createFile(atPath: incompleteFilePath.path, contents: nil)
159-
}
160-
161124
// Set up the request with appropriate headers
162125
var request = URLRequest(url: url)
163126
var requestHeaders = headers ?? [:]
@@ -167,12 +130,12 @@ class Downloader: NSObject, ObservableObject {
167130
}
168131

169132
// Set Range header if we're resuming
170-
if existingSize > 0 {
171-
requestHeaders["Range"] = "bytes=\(existingSize)-"
133+
if resumeSize > 0 {
134+
requestHeaders["Range"] = "bytes=\(resumeSize)-"
172135

173136
// Calculate and show initial progress
174137
if let expectedSize, expectedSize > 0 {
175-
let initialProgress = Double(existingSize) / Double(expectedSize)
138+
let initialProgress = Double(resumeSize) / Double(expectedSize)
176139
self.downloadState.value = .downloading(initialProgress)
177140
} else {
178141
self.downloadState.value = .downloading(0)
@@ -185,10 +148,10 @@ class Downloader: NSObject, ObservableObject {
185148
request.allHTTPHeaderFields = requestHeaders
186149

187150
// Open the incomplete file for writing
188-
let tempFile = try FileHandle(forWritingTo: incompleteFilePath)
151+
let tempFile = try FileHandle(forWritingTo: self.tempFilePath)
189152

190153
// If resuming, seek to end of file
191-
if existingSize > 0 {
154+
if resumeSize > 0 {
192155
try tempFile.seekToEnd()
193156
}
194157

@@ -197,7 +160,7 @@ class Downloader: NSObject, ObservableObject {
197160

198161
// Clean up and move the completed download to its final destination
199162
tempFile.closeFile()
200-
try fileManager.moveDownloadedFile(from: incompleteFilePath, to: self.destination)
163+
try FileManager.default.moveDownloadedFile(from: self.tempFilePath, to: self.destination)
201164
self.downloadState.value = .completed(self.destination)
202165
} catch {
203166
self.downloadState.value = .failed(error)

Sources/Hub/HubApi.swift

+12-14
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,10 @@ public extension HubApi {
362362
repoMetadataDestination.appending(path: relativeFilename + ".metadata")
363363
}
364364

365+
var incompleteDestination: URL {
366+
repoMetadataDestination.appending(path: relativeFilename + ".incomplete")
367+
}
368+
365369
var downloaded: Bool {
366370
FileManager.default.fileExists(atPath: destination.path)
367371
}
@@ -371,9 +375,13 @@ public extension HubApi {
371375
try FileManager.default.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil)
372376
}
373377

374-
func prepareMetadataDestination() throws {
375-
let directoryURL = metadataDestination.deletingLastPathComponent()
378+
// We're using incomplete destination to prepare cache destination because incomplete files include lfs + non-lfs files (vs only lfs for metadata files)
379+
func prepareCacheDestination() throws {
380+
let directoryURL = incompleteDestination.deletingLastPathComponent()
376381
try FileManager.default.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil)
382+
if !FileManager.default.fileExists(atPath: incompleteDestination.path) {
383+
try "".write(to: incompleteDestination, atomically: true, encoding: .utf8)
384+
}
377385
}
378386

379387
/// Note we go from Combine in Downloader to callback-based progress reporting
@@ -424,24 +432,14 @@ public extension HubApi {
424432

425433
// Otherwise, let's download the file!
426434
try prepareDestination()
427-
try prepareMetadataDestination()
435+
try prepareCacheDestination()
428436

429-
// Check for an existing incomplete file
430-
let incompleteFile = Downloader.incompletePath(for: destination)
431-
var resumeSize = 0
432-
433-
if FileManager.default.fileExists(atPath: incompleteFile.path) {
434-
if let fileAttributes = try? FileManager.default.attributesOfItem(atPath: incompleteFile.path) {
435-
resumeSize = (fileAttributes[FileAttributeKey.size] as? Int) ?? 0
436-
}
437-
}
438-
439437
let downloader = Downloader(
440438
from: source,
441439
to: destination,
440+
incompleteDestination: incompleteDestination,
442441
using: hfToken,
443442
inBackground: backgroundSession,
444-
resumeSize: resumeSize,
445443
expectedSize: remoteSize
446444
)
447445

Tests/HubTests/DownloaderTests.swift

+23-26
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,16 @@ final class DownloaderTests: XCTestCase {
5959
6060
"""
6161

62+
let cacheDir = tempDir.appendingPathComponent("cache")
63+
try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true)
64+
65+
let incompleteDestination = cacheDir.appendingPathComponent("config.json.incomplete")
66+
FileManager.default.createFile(atPath: incompleteDestination.path, contents: nil, attributes: nil)
67+
6268
let downloader = Downloader(
6369
from: url,
64-
to: destination
70+
to: destination,
71+
incompleteDestination: incompleteDestination
6572
)
6673

6774
// Store subscriber outside the continuation to maintain its lifecycle
@@ -95,10 +102,17 @@ final class DownloaderTests: XCTestCase {
95102
let url = URL(string: "https://huggingface.co/coreml-projects/Llama-2-7b-chat-coreml/resolve/main/config.json")!
96103
let destination = tempDir.appendingPathComponent("config.json")
97104

105+
let cacheDir = tempDir.appendingPathComponent("cache")
106+
try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true)
107+
108+
let incompleteDestination = cacheDir.appendingPathComponent("config.json.incomplete")
109+
FileManager.default.createFile(atPath: incompleteDestination.path, contents: nil, attributes: nil)
110+
98111
// Create downloader with incorrect expected size
99112
let downloader = Downloader(
100113
from: url,
101114
to: destination,
115+
incompleteDestination: incompleteDestination,
102116
expectedSize: 999999 // Incorrect size
103117
)
104118

@@ -120,10 +134,17 @@ final class DownloaderTests: XCTestCase {
120134
// Create parent directory if it doesn't exist
121135
try FileManager.default.createDirectory(at: destination.deletingLastPathComponent(),
122136
withIntermediateDirectories: true)
123-
137+
138+
let cacheDir = tempDir.appendingPathComponent("cache")
139+
try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true)
140+
141+
let incompleteDestination = cacheDir.appendingPathComponent("config.json.incomplete")
142+
FileManager.default.createFile(atPath: incompleteDestination.path, contents: nil, attributes: nil)
143+
124144
let downloader = Downloader(
125145
from: url,
126146
to: destination,
147+
incompleteDestination: incompleteDestination,
127148
expectedSize: 73194001 // Correct size for verification
128149
)
129150

@@ -168,28 +189,4 @@ final class DownloaderTests: XCTestCase {
168189
throw error
169190
}
170191
}
171-
172-
func testAutomaticIncompleteFileDetection() throws {
173-
let url = URL(string: "https://huggingface.co/coreml-projects/sam-2-studio/resolve/main/SAM%202%20Studio%201.1.zip")!
174-
let destination = tempDir.appendingPathComponent("SAM%202%20Studio%201.1.zip")
175-
176-
// Create a sample incomplete file with test content
177-
let incompletePath = Downloader.incompletePath(for: destination)
178-
try FileManager.default.createDirectory(at: incompletePath.deletingLastPathComponent(), withIntermediateDirectories: true)
179-
let testContent = Data(repeating: 65, count: 1024) // 1KB of data
180-
FileManager.default.createFile(atPath: incompletePath.path, contents: testContent)
181-
182-
// Create a downloader for the same destination
183-
// It should automatically detect and use the incomplete file
184-
let downloader = Downloader(
185-
from: url,
186-
to: destination
187-
)
188-
189-
// Verify the downloader found and is using the incomplete file
190-
XCTAssertEqual(downloader.downloadedSize, 1024, "Should have detected the incomplete file and set resumeSize")
191-
192-
// Clean up
193-
try? FileManager.default.removeItem(at: incompletePath)
194-
}
195192
}

Tests/HubTests/HubApiTests.swift

+74
Original file line numberDiff line numberDiff line change
@@ -968,4 +968,78 @@ class SnapshotDownloadTests: XCTestCase {
968968
XCTFail("Unexpected error: \(error)")
969969
}
970970
}
971+
972+
func testResumeDownloadFromEmptyIncomplete() async throws {
973+
let hubApi = HubApi(downloadBase: downloadDestination)
974+
var lastProgress: Progress? = nil
975+
var downloadedTo = FileManager.default.homeDirectoryForCurrentUser
976+
.appendingPathComponent("Library/Caches/huggingface-tests/models/coreml-projects/Llama-2-7b-chat-coreml")
977+
978+
let metadataDestination = downloadedTo.appending(component: ".cache/huggingface/download")
979+
980+
try FileManager.default.createDirectory(at: metadataDestination, withIntermediateDirectories: true, attributes: nil)
981+
try "".write(to: metadataDestination.appendingPathComponent("config.json.incomplete"), atomically: true, encoding: .utf8)
982+
downloadedTo = try await hubApi.snapshot(from: repo, matching: "config.json") { progress in
983+
print("Total Progress: \(progress.fractionCompleted)")
984+
print("Files Completed: \(progress.completedUnitCount) of \(progress.totalUnitCount)")
985+
lastProgress = progress
986+
}
987+
XCTAssertEqual(lastProgress?.fractionCompleted, 1)
988+
XCTAssertEqual(lastProgress?.completedUnitCount, 1)
989+
XCTAssertEqual(downloadedTo, downloadDestination.appending(path: "models/\(repo)"))
990+
991+
let fileContents = try String(contentsOfFile: downloadedTo.appendingPathComponent("config.json").path)
992+
993+
let expected = """
994+
{
995+
"architectures": [
996+
"LlamaForCausalLM"
997+
],
998+
"bos_token_id": 1,
999+
"eos_token_id": 2,
1000+
"model_type": "llama",
1001+
"pad_token_id": 0,
1002+
"vocab_size": 32000
1003+
}
1004+
1005+
"""
1006+
XCTAssertTrue(fileContents.contains(expected))
1007+
}
1008+
1009+
func testResumeDownloadFromNonEmptyIncomplete() async throws {
1010+
let hubApi = HubApi(downloadBase: downloadDestination)
1011+
var lastProgress: Progress? = nil
1012+
var downloadedTo = FileManager.default.homeDirectoryForCurrentUser
1013+
.appendingPathComponent("Library/Caches/huggingface-tests/models/coreml-projects/Llama-2-7b-chat-coreml")
1014+
1015+
let metadataDestination = downloadedTo.appending(component: ".cache/huggingface/download")
1016+
1017+
try FileManager.default.createDirectory(at: metadataDestination, withIntermediateDirectories: true, attributes: nil)
1018+
try "X".write(to: metadataDestination.appendingPathComponent("config.json.incomplete"), atomically: true, encoding: .utf8)
1019+
downloadedTo = try await hubApi.snapshot(from: repo, matching: "config.json") { progress in
1020+
print("Total Progress: \(progress.fractionCompleted)")
1021+
print("Files Completed: \(progress.completedUnitCount) of \(progress.totalUnitCount)")
1022+
lastProgress = progress
1023+
}
1024+
XCTAssertEqual(lastProgress?.fractionCompleted, 1)
1025+
XCTAssertEqual(lastProgress?.completedUnitCount, 1)
1026+
XCTAssertEqual(downloadedTo, downloadDestination.appending(path: "models/\(repo)"))
1027+
1028+
let fileContents = try String(contentsOfFile: downloadedTo.appendingPathComponent("config.json").path)
1029+
1030+
let expected = """
1031+
X
1032+
"architectures": [
1033+
"LlamaForCausalLM"
1034+
],
1035+
"bos_token_id": 1,
1036+
"eos_token_id": 2,
1037+
"model_type": "llama",
1038+
"pad_token_id": 0,
1039+
"vocab_size": 32000
1040+
}
1041+
1042+
"""
1043+
XCTAssertTrue(fileContents.contains(expected))
1044+
}
9711045
}

0 commit comments

Comments
 (0)