Skip to content

Make downloads resumable across app sessions #187

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 107 additions & 40 deletions Sources/Hub/Downloader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import Foundation

class Downloader: NSObject, ObservableObject {
private(set) var destination: URL
private(set) var sourceURL: URL

private let chunkSize = 10 * 1024 * 1024 // 10MB

Expand All @@ -24,25 +25,68 @@ class Downloader: NSObject, ObservableObject {
enum DownloadError: Error {
case invalidDownloadLocation
case unexpectedError
case tempFileNotFound
}

private(set) lazy var downloadState: CurrentValueSubject<DownloadState, Never> = CurrentValueSubject(.notStarted)
private var stateSubscriber: Cancellable?

private(set) var tempFilePath: URL?
private(set) var expectedSize: Int?
private(set) var downloadedSize: Int = 0

private var urlSession: URLSession? = nil

/// Creates the incomplete file path for a given destination URL
/// This is similar to the Hugging Face Hub approach of using .incomplete files
static func incompletePath(for destination: URL) -> URL {
destination.appendingPathExtension("incomplete")
}

/// Check if an incomplete file exists for the destination and returns its size
/// - Parameter destination: The destination URL for the download
/// - Returns: Size of the incomplete file if it exists, otherwise 0
static func checkForIncompleteFile(at destination: URL) -> Int {
let incompletePath = Self.incompletePath(for: destination)

if FileManager.default.fileExists(atPath: incompletePath.path) {
if let attributes = try? FileManager.default.attributesOfItem(atPath: incompletePath.path),
let fileSize = attributes[.size] as? Int
{
return fileSize
}
}

return 0
}

init(
from url: URL,
to destination: URL,
using authToken: String? = nil,
inBackground: Bool = false,
resumeSize: Int = 0,
resumeSize: Int = 0, // Can be specified manually, but will also check for incomplete files
headers: [String: String]? = nil,
expectedSize: Int? = nil,
timeout: TimeInterval = 10,
numRetries: Int = 5
) {
self.destination = destination
sourceURL = url
self.expectedSize = expectedSize

// Create incomplete file path based on destination
tempFilePath = Downloader.incompletePath(for: destination)

// If resume size wasn't specified, check for an existing incomplete file
let actualResumeSize: Int = if resumeSize > 0 {
resumeSize
} else {
Downloader.checkForIncompleteFile(at: destination)
}

downloadedSize = actualResumeSize

super.init()
let sessionIdentifier = "swift-transformers.hub.downloader"

Expand All @@ -55,7 +99,7 @@ class Downloader: NSObject, ObservableObject {

urlSession = URLSession(configuration: config, delegate: self, delegateQueue: nil)

setupDownload(from: url, with: authToken, resumeSize: resumeSize, headers: headers, expectedSize: expectedSize, timeout: timeout, numRetries: numRetries)
setUpDownload(from: url, with: authToken, resumeSize: actualResumeSize, headers: headers, expectedSize: expectedSize, timeout: timeout, numRetries: numRetries)
}

/// Sets up and initiates a file download operation
Expand All @@ -68,7 +112,7 @@ class Downloader: NSObject, ObservableObject {
/// - expectedSize: Expected file size in bytes for validation
/// - timeout: Time interval before the request times out
/// - numRetries: Number of retry attempts for failed downloads
private func setupDownload(
private func setUpDownload(
from url: URL,
with authToken: String?,
resumeSize: Int,
Expand All @@ -77,59 +121,83 @@ class Downloader: NSObject, ObservableObject {
timeout: TimeInterval,
numRetries: Int
) {
downloadState.value = .downloading(0)
urlSession?.getAllTasks { tasks in
// If there's an existing pending background task with the same URL, let it proceed.
if let existing = tasks.filter({ $0.originalRequest?.url == url }).first {
switch existing.state {
case .running:
// print("Already downloading \(url)")
return
case .suspended:
// print("Resuming suspended download task for \(url)")
existing.resume()
return
case .canceling:
// print("Starting new download task for \(url), previous was canceling")
break
case .completed:
// print("Starting new download task for \(url), previous is complete but the file is no longer present (I think it's cached)")
break
case .canceling, .completed:
existing.cancel()
@unknown default:
// print("Unknown state for running task; cancelling and creating a new one")
existing.cancel()
}
}
var request = URLRequest(url: url)

// Use headers from argument else create an empty header dictionary
var requestHeaders = headers ?? [:]

// Populate header auth and range fields
if let authToken {
requestHeaders["Authorization"] = "Bearer \(authToken)"
}
if resumeSize > 0 {
requestHeaders["Range"] = "bytes=\(resumeSize)-"
}

request.timeoutInterval = timeout
request.allHTTPHeaderFields = requestHeaders

Task {
do {
// Create a temp file to write
let tempURL = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString)
FileManager.default.createFile(atPath: tempURL.path, contents: nil)
let tempFile = try FileHandle(forWritingTo: tempURL)
// Check if incomplete file exists and get its size
var existingSize = 0
guard let incompleteFilePath = self.tempFilePath else {
throw DownloadError.unexpectedError
}

let fileManager = FileManager.default
if fileManager.fileExists(atPath: incompleteFilePath.path) {
let attributes = try fileManager.attributesOfItem(atPath: incompleteFilePath.path)
existingSize = attributes[.size] as? Int ?? 0
self.downloadedSize = existingSize
} else {
// Create parent directory if needed
try fileManager.createDirectory(at: incompleteFilePath.deletingLastPathComponent(), withIntermediateDirectories: true)

// Create empty incomplete file
fileManager.createFile(atPath: incompleteFilePath.path, contents: nil)
}

// Set up the request with appropriate headers
var request = URLRequest(url: url)
var requestHeaders = headers ?? [:]

if let authToken {
requestHeaders["Authorization"] = "Bearer \(authToken)"
}

// Set Range header if we're resuming
if existingSize > 0 {
requestHeaders["Range"] = "bytes=\(existingSize)-"

// Calculate and show initial progress
if let expectedSize, expectedSize > 0 {
let initialProgress = Double(existingSize) / Double(expectedSize)
self.downloadState.value = .downloading(initialProgress)
} else {
self.downloadState.value = .downloading(0)
}
} else {
self.downloadState.value = .downloading(0)
}

request.timeoutInterval = timeout
request.allHTTPHeaderFields = requestHeaders

// Open the incomplete file for writing
let tempFile = try FileHandle(forWritingTo: incompleteFilePath)

// If resuming, seek to end of file
if existingSize > 0 {
try tempFile.seekToEnd()
}

defer { tempFile.closeFile() }
try await self.httpGet(request: request, tempFile: tempFile, resumeSize: resumeSize, numRetries: numRetries, expectedSize: expectedSize)
try await self.httpGet(request: request, tempFile: tempFile, resumeSize: self.downloadedSize, numRetries: numRetries, expectedSize: expectedSize)

// Clean up and move the completed download to its final destination
tempFile.closeFile()
try FileManager.default.moveDownloadedFile(from: tempURL, to: self.destination)

try fileManager.moveDownloadedFile(from: incompleteFilePath, to: self.destination)
self.downloadState.value = .completed(self.destination)
} catch {
self.downloadState.value = .failed(error)
Expand Down Expand Up @@ -169,15 +237,14 @@ class Downloader: NSObject, ObservableObject {
// Start the download and get the byte stream
let (asyncBytes, response) = try await session.bytes(for: newRequest)

guard let response = response as? HTTPURLResponse else {
guard let httpResponse = response as? HTTPURLResponse else {
throw DownloadError.unexpectedError
}

guard (200..<300).contains(response.statusCode) else {
guard (200..<300).contains(httpResponse.statusCode) else {
throw DownloadError.unexpectedError
}

var downloadedSize = resumeSize
downloadedSize = resumeSize

// Create a buffer to collect bytes before writing to disk
var buffer = Data(capacity: chunkSize)
Expand Down Expand Up @@ -218,7 +285,7 @@ class Downloader: NSObject, ObservableObject {
try await httpGet(
request: request,
tempFile: tempFile,
resumeSize: downloadedSize,
resumeSize: self.downloadedSize,
numRetries: newNumRetries - 1,
expectedSize: expectedSize
)
Expand Down
6 changes: 3 additions & 3 deletions Sources/Hub/Hub.swift
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,13 @@ public extension Hub {
}
}

enum RepoType: String {
enum RepoType: String, Codable {
case models
case datasets
case spaces
}

struct Repo {
struct Repo: Codable {
public let id: String
public let type: RepoType

Expand Down
38 changes: 34 additions & 4 deletions Sources/Hub/HubApi.swift
Original file line number Diff line number Diff line change
Expand Up @@ -426,14 +426,44 @@ public extension HubApi {
try prepareDestination()
try prepareMetadataDestination()

let downloader = Downloader(from: source, to: destination, using: hfToken, inBackground: backgroundSession, expectedSize: remoteSize)
// Check for an existing incomplete file
let incompleteFile = Downloader.incompletePath(for: destination)
var resumeSize = 0

if FileManager.default.fileExists(atPath: incompleteFile.path) {
if let fileAttributes = try? FileManager.default.attributesOfItem(atPath: incompleteFile.path) {
resumeSize = (fileAttributes[FileAttributeKey.size] as? Int) ?? 0
}
}

let downloader = Downloader(
from: source,
to: destination,
using: hfToken,
inBackground: backgroundSession,
resumeSize: resumeSize,
expectedSize: remoteSize
)

let downloadSubscriber = downloader.downloadState.sink { state in
if case let .downloading(progress) = state {
switch state {
case let .downloading(progress):
progressHandler(progress)
case .completed, .failed, .notStarted:
break
}
}
_ = try withExtendedLifetime(downloadSubscriber) {
try downloader.waitUntilDone()
do {
_ = try withExtendedLifetime(downloadSubscriber) {
try downloader.waitUntilDone()
}

try HubApi.shared.writeDownloadMetadata(commitHash: remoteCommitHash, etag: remoteEtag, metadataPath: metadataDestination)

return destination
} catch {
// If download fails, leave the incomplete file in place for future resume
throw error
}

try hub.writeDownloadMetadata(commitHash: remoteCommitHash, etag: remoteEtag, metadataPath: metadataDestination)
Expand Down
24 changes: 24 additions & 0 deletions Tests/HubTests/DownloaderTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -168,4 +168,28 @@ final class DownloaderTests: XCTestCase {
throw error
}
}

func testAutomaticIncompleteFileDetection() throws {
let url = URL(string: "https://huggingface.co/coreml-projects/sam-2-studio/resolve/main/SAM%202%20Studio%201.1.zip")!
let destination = tempDir.appendingPathComponent("SAM%202%20Studio%201.1.zip")

// Create a sample incomplete file with test content
let incompletePath = Downloader.incompletePath(for: destination)
try FileManager.default.createDirectory(at: incompletePath.deletingLastPathComponent(), withIntermediateDirectories: true)
let testContent = Data(repeating: 65, count: 1024) // 1KB of data
FileManager.default.createFile(atPath: incompletePath.path, contents: testContent)

// Create a downloader for the same destination
// It should automatically detect and use the incomplete file
let downloader = Downloader(
from: url,
to: destination
)

// Verify the downloader found and is using the incomplete file
XCTAssertEqual(downloader.downloadedSize, 1024, "Should have detected the incomplete file and set resumeSize")

// Clean up
try? FileManager.default.removeItem(at: incompletePath)
}
}