Skip to content

Commit ce1e35d

Browse files
committed
let fix #3016 as well
1 parent 87734e4 commit ce1e35d

File tree

1 file changed

+4
-30
lines changed

1 file changed

+4
-30
lines changed

src/huggingface_hub/_upload_large_folder.py

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,6 @@
4343

4444
WAITING_TIME_IF_NO_TASKS = 10 # seconds
4545
MAX_NB_FILES_FETCH_UPLOAD_MODE = 100
46-
MAX_NB_REGULAR_FILES_PER_COMMIT = 75
47-
MAX_NB_LFS_FILES_PER_COMMIT = 150
4846
COMMIT_SIZE_SCALE: List[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
4947

5048

@@ -405,13 +403,13 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
405403
):
406404
status.nb_workers_commit += 1
407405
logger.debug("Job: commit (more than 5 minutes since last commit attempt)")
408-
return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
406+
return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
409407

410408
# 2. Commit if at least 100 files are ready to commit
411409
elif status.nb_workers_commit == 0 and status.queue_commit.qsize() >= 150:
412410
status.nb_workers_commit += 1
413411
logger.debug("Job: commit (>100 files ready)")
414-
return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
412+
return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
415413

416414
# 3. Get upload mode if at least 100 files
417415
elif status.queue_get_upload_mode.qsize() >= MAX_NB_FILES_FETCH_UPLOAD_MODE:
@@ -467,7 +465,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
467465
):
468466
status.nb_workers_commit += 1
469467
logger.debug("Job: commit (1 min since last commit attempt)")
470-
return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
468+
return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
471469

472470
# 11. Commit if at least 1 file all other queues are empty and all workers are waiting
473471
# e.g. when it's the last commit
@@ -483,7 +481,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
483481
):
484482
status.nb_workers_commit += 1
485483
logger.debug("Job: commit")
486-
return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
484+
return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
487485

488486
# 12. If all queues are empty, exit
489487
elif all(metadata.is_committed or metadata.should_ignore for _, metadata in status.items):
@@ -601,30 +599,6 @@ def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> List[JOB_ITEM_T]:
601599
return [queue.get() for _ in range(min(queue.qsize(), n))]
602600

603601

604-
def _get_items_to_commit(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
605-
"""Special case for commit job: the number of items to commit depends on the type of files."""
606-
# Can take at most 50 regular files and/or 100 LFS files in a single commit
607-
items: List[JOB_ITEM_T] = []
608-
nb_lfs, nb_regular = 0, 0
609-
while True:
610-
# If empty queue => commit everything
611-
if queue.qsize() == 0:
612-
return items
613-
614-
# If we have enough items => commit them
615-
if nb_lfs >= MAX_NB_LFS_FILES_PER_COMMIT or nb_regular >= MAX_NB_REGULAR_FILES_PER_COMMIT:
616-
return items
617-
618-
# Else, get a new item and increase counter
619-
item = queue.get()
620-
items.append(item)
621-
_, metadata = item
622-
if metadata.upload_mode == "lfs":
623-
nb_lfs += 1
624-
else:
625-
nb_regular += 1
626-
627-
628602
def _print_overwrite(report: str) -> None:
629603
"""Print a report, overwriting the previous lines.
630604

0 commit comments

Comments
 (0)