44
44
WAITING_TIME_IF_NO_TASKS = 10 # seconds
45
45
MAX_NB_REGULAR_FILES_PER_COMMIT = 75
46
46
MAX_NB_LFS_FILES_PER_COMMIT = 150
47
+ COMMIT_SIZE_SCALE : List [int ] = [20 , 50 , 75 , 100 , 125 , 200 , 250 , 400 , 600 , 1000 ]
47
48
48
49
49
50
def upload_large_folder_internal (
@@ -184,6 +185,8 @@ def __init__(self, items: List[JOB_ITEM_T]):
184
185
self .last_commit_attempt : Optional [float ] = None
185
186
186
187
self ._started_at = datetime .now ()
188
+ self ._chunk_idx : int = 1
189
+ self ._chunk_lock : Lock = Lock ()
187
190
188
191
# Setup queues
189
192
for item in self .items :
@@ -199,6 +202,21 @@ def __init__(self, items: List[JOB_ITEM_T]):
199
202
else :
200
203
logger .debug (f"Skipping file { paths .path_in_repo } (already uploaded and committed)" )
201
204
205
+ def target_chunk (self ) -> int :
206
+ with self ._chunk_lock :
207
+ return COMMIT_SIZE_SCALE [self ._chunk_idx ]
208
+
209
+ def update_chunk (self , success : bool , nb_items : int , duration : float ) -> None :
210
+ with self ._chunk_lock :
211
+ if not success :
212
+ logger .warn (f"Failed to commit { nb_items } files at once. Will retry with less files in next batch." )
213
+ self ._chunk_idx -= 1
214
+ elif nb_items >= COMMIT_SIZE_SCALE [self ._chunk_idx ] and duration < 40 :
215
+ logger .info (f"Successfully committed { nb_items } at once. Increasing the limit for next batch." )
216
+ self ._chunk_idx += 1
217
+
218
+ self ._chunk_idx = max (0 , min (self ._chunk_idx , len (COMMIT_SIZE_SCALE ) - 1 ))
219
+
202
220
def current_report (self ) -> str :
203
221
"""Generate a report of the current status of the large upload."""
204
222
nb_hashed = 0
@@ -351,6 +369,8 @@ def _worker_job(
351
369
status .nb_workers_preupload_lfs -= 1
352
370
353
371
elif job == WorkerJob .COMMIT :
372
+ start_ts = time .time ()
373
+ success = True
354
374
try :
355
375
_commit (items , api = api , repo_id = repo_id , repo_type = repo_type , revision = revision )
356
376
except KeyboardInterrupt :
@@ -360,6 +380,9 @@ def _worker_job(
360
380
traceback .format_exc ()
361
381
for item in items :
362
382
status .queue_commit .put (item )
383
+ success = False
384
+ duration = time .time () - start_ts
385
+ status .update_chunk (success , len (items ), duration )
363
386
with status .lock :
364
387
status .last_commit_attempt = time .time ()
365
388
status .nb_workers_commit -= 1
@@ -393,7 +416,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
393
416
elif status .queue_get_upload_mode .qsize () >= 10 :
394
417
status .nb_workers_get_upload_mode += 1
395
418
logger .debug ("Job: get upload mode (>10 files ready)" )
396
- return (WorkerJob .GET_UPLOAD_MODE , _get_n (status .queue_get_upload_mode , 50 ))
419
+ return (WorkerJob .GET_UPLOAD_MODE , _get_n (status .queue_get_upload_mode , status . target_chunk () ))
397
420
398
421
# 4. Preupload LFS file if at least 1 file and no worker is preuploading LFS
399
422
elif status .queue_preupload_lfs .qsize () > 0 and status .nb_workers_preupload_lfs == 0 :
@@ -411,7 +434,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
411
434
elif status .queue_get_upload_mode .qsize () > 0 and status .nb_workers_get_upload_mode == 0 :
412
435
status .nb_workers_get_upload_mode += 1
413
436
logger .debug ("Job: get upload mode (no other worker getting upload mode)" )
414
- return (WorkerJob .GET_UPLOAD_MODE , _get_n (status .queue_get_upload_mode , 50 ))
437
+ return (WorkerJob .GET_UPLOAD_MODE , _get_n (status .queue_get_upload_mode , status . target_chunk () ))
415
438
416
439
# 7. Preupload LFS file if at least 1 file
417
440
# Skip if hf_transfer is enabled and there is already a worker preuploading LFS
@@ -432,7 +455,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
432
455
elif status .queue_get_upload_mode .qsize () > 0 :
433
456
status .nb_workers_get_upload_mode += 1
434
457
logger .debug ("Job: get upload mode" )
435
- return (WorkerJob .GET_UPLOAD_MODE , _get_n (status .queue_get_upload_mode , 50 ))
458
+ return (WorkerJob .GET_UPLOAD_MODE , _get_n (status .queue_get_upload_mode , status . target_chunk () ))
436
459
437
460
# 10. Commit if at least 1 file and 1 min since last commit attempt
438
461
elif (
0 commit comments