Skip to content

Cleanup and add new test cases #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 13 additions & 50 deletions Sources/Hub/Downloader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import Foundation

class Downloader: NSObject, ObservableObject {
private(set) var destination: URL
private(set) var sourceURL: URL

private let chunkSize = 10 * 1024 * 1024 // 10MB

Expand All @@ -31,28 +30,18 @@ class Downloader: NSObject, ObservableObject {
private(set) lazy var downloadState: CurrentValueSubject<DownloadState, Never> = CurrentValueSubject(.notStarted)
private var stateSubscriber: Cancellable?

private(set) var tempFilePath: URL?
private(set) var tempFilePath: URL
private(set) var expectedSize: Int?
private(set) var downloadedSize: Int = 0

private var urlSession: URLSession? = nil

/// Creates the incomplete file path for a given destination URL
/// This is similar to the Hugging Face Hub approach of using .incomplete files
static func incompletePath(for destination: URL) -> URL {
destination.appendingPathExtension("incomplete")
}

/// Check if an incomplete file exists for the destination and returns its size
/// - Parameter destination: The destination URL for the download
/// - Returns: Size of the incomplete file if it exists, otherwise 0
static func checkForIncompleteFile(at destination: URL) -> Int {
let incompletePath = Self.incompletePath(for: destination)

static func incompleteFileSize(at incompletePath: URL) -> Int {
if FileManager.default.fileExists(atPath: incompletePath.path) {
if let attributes = try? FileManager.default.attributesOfItem(atPath: incompletePath.path),
let fileSize = attributes[.size] as? Int
{
if let attributes = try? FileManager.default.attributesOfItem(atPath: incompletePath.path), let fileSize = attributes[.size] as? Int {
return fileSize
}
}
Expand All @@ -63,29 +52,22 @@ class Downloader: NSObject, ObservableObject {
init(
from url: URL,
to destination: URL,
incompleteDestination: URL,
using authToken: String? = nil,
inBackground: Bool = false,
resumeSize: Int = 0, // Can be specified manually, but will also check for incomplete files
headers: [String: String]? = nil,
expectedSize: Int? = nil,
timeout: TimeInterval = 10,
numRetries: Int = 5
) {
self.destination = destination
sourceURL = url
self.expectedSize = expectedSize

// Create incomplete file path based on destination
tempFilePath = Downloader.incompletePath(for: destination)
self.tempFilePath = incompleteDestination

// If resume size wasn't specified, check for an existing incomplete file
let actualResumeSize: Int = if resumeSize > 0 {
resumeSize
} else {
Downloader.checkForIncompleteFile(at: destination)
}

downloadedSize = actualResumeSize
let resumeSize = Self.incompleteFileSize(at: incompleteDestination)

super.init()
let sessionIdentifier = "swift-transformers.hub.downloader"
Expand All @@ -99,7 +81,7 @@ class Downloader: NSObject, ObservableObject {

urlSession = URLSession(configuration: config, delegate: self, delegateQueue: nil)

setUpDownload(from: url, with: authToken, resumeSize: actualResumeSize, headers: headers, expectedSize: expectedSize, timeout: timeout, numRetries: numRetries)
setUpDownload(from: url, with: authToken, resumeSize: resumeSize, headers: headers, expectedSize: expectedSize, timeout: timeout, numRetries: numRetries)
}

/// Sets up and initiates a file download operation
Expand Down Expand Up @@ -139,25 +121,6 @@ class Downloader: NSObject, ObservableObject {

Task {
do {
// Check if incomplete file exists and get its size
var existingSize = 0
guard let incompleteFilePath = self.tempFilePath else {
throw DownloadError.unexpectedError
}

let fileManager = FileManager.default
if fileManager.fileExists(atPath: incompleteFilePath.path) {
let attributes = try fileManager.attributesOfItem(atPath: incompleteFilePath.path)
existingSize = attributes[.size] as? Int ?? 0
self.downloadedSize = existingSize
} else {
// Create parent directory if needed
try fileManager.createDirectory(at: incompleteFilePath.deletingLastPathComponent(), withIntermediateDirectories: true)

// Create empty incomplete file
fileManager.createFile(atPath: incompleteFilePath.path, contents: nil)
}

// Set up the request with appropriate headers
var request = URLRequest(url: url)
var requestHeaders = headers ?? [:]
Expand All @@ -167,12 +130,12 @@ class Downloader: NSObject, ObservableObject {
}

// Set Range header if we're resuming
if existingSize > 0 {
requestHeaders["Range"] = "bytes=\(existingSize)-"
if resumeSize > 0 {
requestHeaders["Range"] = "bytes=\(resumeSize)-"

// Calculate and show initial progress
if let expectedSize, expectedSize > 0 {
let initialProgress = Double(existingSize) / Double(expectedSize)
let initialProgress = Double(resumeSize) / Double(expectedSize)
self.downloadState.value = .downloading(initialProgress)
} else {
self.downloadState.value = .downloading(0)
Expand All @@ -185,10 +148,10 @@ class Downloader: NSObject, ObservableObject {
request.allHTTPHeaderFields = requestHeaders

// Open the incomplete file for writing
let tempFile = try FileHandle(forWritingTo: incompleteFilePath)
let tempFile = try FileHandle(forWritingTo: self.tempFilePath)

// If resuming, seek to end of file
if existingSize > 0 {
if resumeSize > 0 {
try tempFile.seekToEnd()
}

Expand All @@ -197,7 +160,7 @@ class Downloader: NSObject, ObservableObject {

// Clean up and move the completed download to its final destination
tempFile.closeFile()
try fileManager.moveDownloadedFile(from: incompleteFilePath, to: self.destination)
try FileManager.default.moveDownloadedFile(from: self.tempFilePath, to: self.destination)
self.downloadState.value = .completed(self.destination)
} catch {
self.downloadState.value = .failed(error)
Expand Down
26 changes: 12 additions & 14 deletions Sources/Hub/HubApi.swift
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,10 @@ public extension HubApi {
repoMetadataDestination.appending(path: relativeFilename + ".metadata")
}

var incompleteDestination: URL {
repoMetadataDestination.appending(path: relativeFilename + ".incomplete")
}

var downloaded: Bool {
FileManager.default.fileExists(atPath: destination.path)
}
Expand All @@ -370,9 +374,13 @@ public extension HubApi {
try FileManager.default.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil)
}

func prepareMetadataDestination() throws {
let directoryURL = metadataDestination.deletingLastPathComponent()
// We're using incomplete destination to prepare cache destination because incomplete files include lfs + non-lfs files (vs only lfs for metadata files)
func prepareCacheDestination() throws {
let directoryURL = incompleteDestination.deletingLastPathComponent()
try FileManager.default.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil)
if !FileManager.default.fileExists(atPath: incompleteDestination.path) {
try "".write(to: incompleteDestination, atomically: true, encoding: .utf8)
}
}

/// Note we go from Combine in Downloader to callback-based progress reporting
Expand Down Expand Up @@ -423,24 +431,14 @@ public extension HubApi {

// Otherwise, let's download the file!
try prepareDestination()
try prepareMetadataDestination()
try prepareCacheDestination()

// Check for an existing incomplete file
let incompleteFile = Downloader.incompletePath(for: destination)
var resumeSize = 0

if FileManager.default.fileExists(atPath: incompleteFile.path) {
if let fileAttributes = try? FileManager.default.attributesOfItem(atPath: incompleteFile.path) {
resumeSize = (fileAttributes[FileAttributeKey.size] as? Int) ?? 0
}
}

let downloader = Downloader(
from: source,
to: destination,
incompleteDestination: incompleteDestination,
using: hfToken,
inBackground: backgroundSession,
resumeSize: resumeSize,
expectedSize: remoteSize
)

Expand Down
49 changes: 23 additions & 26 deletions Tests/HubTests/DownloaderTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,16 @@ final class DownloaderTests: XCTestCase {

"""

let cacheDir = tempDir.appendingPathComponent("cache")
try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true)

let incompleteDestination = cacheDir.appendingPathComponent("config.json.incomplete")
FileManager.default.createFile(atPath: incompleteDestination.path, contents: nil, attributes: nil)

let downloader = Downloader(
from: url,
to: destination
to: destination,
incompleteDestination: incompleteDestination
)

// Store subscriber outside the continuation to maintain its lifecycle
Expand Down Expand Up @@ -95,10 +102,17 @@ final class DownloaderTests: XCTestCase {
let url = URL(string: "https://huggingface.co/coreml-projects/Llama-2-7b-chat-coreml/resolve/main/config.json")!
let destination = tempDir.appendingPathComponent("config.json")

let cacheDir = tempDir.appendingPathComponent("cache")
try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true)

let incompleteDestination = cacheDir.appendingPathComponent("config.json.incomplete")
FileManager.default.createFile(atPath: incompleteDestination.path, contents: nil, attributes: nil)

// Create downloader with incorrect expected size
let downloader = Downloader(
from: url,
to: destination,
incompleteDestination: incompleteDestination,
expectedSize: 999999 // Incorrect size
)

Expand All @@ -120,10 +134,17 @@ final class DownloaderTests: XCTestCase {
// Create parent directory if it doesn't exist
try FileManager.default.createDirectory(at: destination.deletingLastPathComponent(),
withIntermediateDirectories: true)


let cacheDir = tempDir.appendingPathComponent("cache")
try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true)

let incompleteDestination = cacheDir.appendingPathComponent("config.json.incomplete")
FileManager.default.createFile(atPath: incompleteDestination.path, contents: nil, attributes: nil)

let downloader = Downloader(
from: url,
to: destination,
incompleteDestination: incompleteDestination,
expectedSize: 73194001 // Correct size for verification
)

Expand Down Expand Up @@ -168,28 +189,4 @@ final class DownloaderTests: XCTestCase {
throw error
}
}

func testAutomaticIncompleteFileDetection() throws {
let url = URL(string: "https://huggingface.co/coreml-projects/sam-2-studio/resolve/main/SAM%202%20Studio%201.1.zip")!
let destination = tempDir.appendingPathComponent("SAM%202%20Studio%201.1.zip")

// Create a sample incomplete file with test content
let incompletePath = Downloader.incompletePath(for: destination)
try FileManager.default.createDirectory(at: incompletePath.deletingLastPathComponent(), withIntermediateDirectories: true)
let testContent = Data(repeating: 65, count: 1024) // 1KB of data
FileManager.default.createFile(atPath: incompletePath.path, contents: testContent)

// Create a downloader for the same destination
// It should automatically detect and use the incomplete file
let downloader = Downloader(
from: url,
to: destination
)

// Verify the downloader found and is using the incomplete file
XCTAssertEqual(downloader.downloadedSize, 1024, "Should have detected the incomplete file and set resumeSize")

// Clean up
try? FileManager.default.removeItem(at: incompletePath)
}
}
74 changes: 74 additions & 0 deletions Tests/HubTests/HubApiTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -968,4 +968,78 @@ class SnapshotDownloadTests: XCTestCase {
XCTFail("Unexpected error: \(error)")
}
}

func testResumeDownloadFromEmptyIncomplete() async throws {
let hubApi = HubApi(downloadBase: downloadDestination)
var lastProgress: Progress? = nil
var downloadedTo = FileManager.default.homeDirectoryForCurrentUser
.appendingPathComponent("Library/Caches/huggingface-tests/models/coreml-projects/Llama-2-7b-chat-coreml")

let metadataDestination = downloadedTo.appending(component: ".cache/huggingface/download")

try FileManager.default.createDirectory(at: metadataDestination, withIntermediateDirectories: true, attributes: nil)
try "".write(to: metadataDestination.appendingPathComponent("config.json.incomplete"), atomically: true, encoding: .utf8)
downloadedTo = try await hubApi.snapshot(from: repo, matching: "config.json") { progress in
print("Total Progress: \(progress.fractionCompleted)")
print("Files Completed: \(progress.completedUnitCount) of \(progress.totalUnitCount)")
lastProgress = progress
}
XCTAssertEqual(lastProgress?.fractionCompleted, 1)
XCTAssertEqual(lastProgress?.completedUnitCount, 1)
XCTAssertEqual(downloadedTo, downloadDestination.appending(path: "models/\(repo)"))

let fileContents = try String(contentsOfFile: downloadedTo.appendingPathComponent("config.json").path)

let expected = """
{
"architectures": [
"LlamaForCausalLM"
],
"bos_token_id": 1,
"eos_token_id": 2,
"model_type": "llama",
"pad_token_id": 0,
"vocab_size": 32000
}

"""
XCTAssertTrue(fileContents.contains(expected))
}

func testResumeDownloadFromNonEmptyIncomplete() async throws {
let hubApi = HubApi(downloadBase: downloadDestination)
var lastProgress: Progress? = nil
var downloadedTo = FileManager.default.homeDirectoryForCurrentUser
.appendingPathComponent("Library/Caches/huggingface-tests/models/coreml-projects/Llama-2-7b-chat-coreml")

let metadataDestination = downloadedTo.appending(component: ".cache/huggingface/download")

try FileManager.default.createDirectory(at: metadataDestination, withIntermediateDirectories: true, attributes: nil)
try "X".write(to: metadataDestination.appendingPathComponent("config.json.incomplete"), atomically: true, encoding: .utf8)
downloadedTo = try await hubApi.snapshot(from: repo, matching: "config.json") { progress in
print("Total Progress: \(progress.fractionCompleted)")
print("Files Completed: \(progress.completedUnitCount) of \(progress.totalUnitCount)")
lastProgress = progress
}
XCTAssertEqual(lastProgress?.fractionCompleted, 1)
XCTAssertEqual(lastProgress?.completedUnitCount, 1)
XCTAssertEqual(downloadedTo, downloadDestination.appending(path: "models/\(repo)"))

let fileContents = try String(contentsOfFile: downloadedTo.appendingPathComponent("config.json").path)

let expected = """
X
"architectures": [
"LlamaForCausalLM"
],
"bos_token_id": 1,
"eos_token_id": 2,
"model_type": "llama",
"pad_token_id": 0,
"vocab_size": 32000
}

"""
XCTAssertTrue(fileContents.contains(expected))
}
}