Skip to content

Commit 73754c4

Browse files
committed
Batch preupload calls in upload-large-folder
1 parent 5c3efe3 commit 73754c4

File tree

1 file changed

+14
-12
lines changed

1 file changed

+14
-12
lines changed

src/huggingface_hub/_upload_large_folder.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -354,16 +354,17 @@ def _worker_job(
354354
status.nb_workers_get_upload_mode -= 1
355355

356356
elif job == WorkerJob.PREUPLOAD_LFS:
357-
item = items[0] # single item
358357
try:
359-
_preupload_lfs(item, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
360-
status.queue_commit.put(item)
358+
_preupload_lfs(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
359+
for item in items:
360+
status.queue_commit.put(item)
361361
except KeyboardInterrupt:
362362
raise
363363
except Exception as e:
364364
logger.error(f"Failed to preupload LFS: {e}")
365365
traceback.format_exc()
366-
status.queue_preupload_lfs.put(item)
366+
for item in items:
367+
status.queue_preupload_lfs.put(item)
367368

368369
with status.lock:
369370
status.nb_workers_preupload_lfs -= 1
@@ -422,7 +423,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
422423
elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0:
423424
status.nb_workers_preupload_lfs += 1
424425
logger.debug("Job: preupload LFS (no other worker preuploading LFS)")
425-
return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
426+
return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, 100))
426427

427428
# 5. Compute sha256 if at least 1 file and no worker is computing sha256
428429
elif status.queue_sha256.qsize() > 0 and status.nb_workers_sha256 == 0:
@@ -443,7 +444,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
443444
):
444445
status.nb_workers_preupload_lfs += 1
445446
logger.debug("Job: preupload LFS")
446-
return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
447+
return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, 100))
447448

448449
# 8. Compute sha256 if at least 1 file
449450
elif status.queue_sha256.qsize() > 0:
@@ -531,19 +532,20 @@ def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_t
531532
metadata.save(paths)
532533

533534

534-
def _preupload_lfs(item: JOB_ITEM_T, api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
535+
def _preupload_lfs(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
535536
"""Preupload LFS file and update metadata."""
536-
paths, metadata = item
537-
addition = _build_hacky_operation(item)
537+
additions = [_build_hacky_operation(item) for item in items]
538538
api.preupload_lfs_files(
539539
repo_id=repo_id,
540540
repo_type=repo_type,
541541
revision=revision,
542-
additions=[addition],
542+
additions=additions,
543543
)
544544

545-
metadata.is_uploaded = True
546-
metadata.save(paths)
545+
for item in items:
546+
paths, metadata = item
547+
metadata.is_uploaded = True
548+
metadata.save(paths)
547549

548550

549551
def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:

0 commit comments

Comments
 (0)