Skip to content

Do not fetch /preupload if already done in upload-large-folder #3100

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 22, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions src/huggingface_hub/_upload_large_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,16 +354,17 @@ def _worker_job(
status.nb_workers_get_upload_mode -= 1

elif job == WorkerJob.PREUPLOAD_LFS:
item = items[0] # single item
try:
_preupload_lfs(item, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
status.queue_commit.put(item)
_preupload_lfs(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
for item in items:
status.queue_commit.put(item)
except KeyboardInterrupt:
raise
except Exception as e:
logger.error(f"Failed to preupload LFS: {e}")
traceback.format_exc()
status.queue_preupload_lfs.put(item)
for item in items:
status.queue_preupload_lfs.put(item)

with status.lock:
status.nb_workers_preupload_lfs -= 1
Expand Down Expand Up @@ -422,7 +423,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0:
status.nb_workers_preupload_lfs += 1
logger.debug("Job: preupload LFS (no other worker preuploading LFS)")
return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, 100))

# 5. Compute sha256 if at least 1 file and no worker is computing sha256
elif status.queue_sha256.qsize() > 0 and status.nb_workers_sha256 == 0:
Expand All @@ -443,7 +444,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
):
status.nb_workers_preupload_lfs += 1
logger.debug("Job: preupload LFS")
return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, 100))

# 8. Compute sha256 if at least 1 file
elif status.queue_sha256.qsize() > 0:
Expand Down Expand Up @@ -531,19 +532,20 @@ def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_t
metadata.save(paths)


def _preupload_lfs(item: JOB_ITEM_T, api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
def _preupload_lfs(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
"""Preupload LFS file and update metadata."""
paths, metadata = item
addition = _build_hacky_operation(item)
additions = [_build_hacky_operation(item) for item in items]
api.preupload_lfs_files(
repo_id=repo_id,
repo_type=repo_type,
revision=revision,
additions=[addition],
additions=additions,
)

metadata.is_uploaded = True
metadata.save(paths)
for item in items:
paths, metadata = item
metadata.is_uploaded = True
metadata.save(paths)


def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
Expand Down