Skip to content

Commit bda6540

Browse files
committed
fix
1 parent f0cd267 commit bda6540

File tree

2 files changed

+29
-7
lines changed

2 files changed

+29
-7
lines changed

services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,9 +449,19 @@ async def _start_warm_buffer_instances(
449449
if (warm_buffer.ec2_instance.type == hot_buffer_instance_type)
450450
and not warm_buffer.assigned_tasks
451451
]
452+
# check there are no empty pending ec2s/nodes that are not assigned to any task
453+
unnassigned_pending_ec2s = [
454+
i.ec2_instance for i in cluster.pending_ec2s if not i.assigned_tasks
455+
]
456+
unnassigned_pending_nodes = [
457+
i.ec2_instance for i in cluster.pending_nodes if not i.assigned_tasks
458+
]
459+
452460
instances_to_start += free_startable_warm_buffers_to_replace_hot_buffers[
453461
: app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
454462
- len(cluster.buffer_drained_nodes)
463+
- len(unnassigned_pending_ec2s)
464+
- len(unnassigned_pending_nodes)
455465
]
456466

457467
if not instances_to_start:

services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2317,11 +2317,23 @@ async def test_warm_buffers_only_replace_hot_buffer_if_service_is_started_issue7
23172317
assert len(spied_cluster.pending_ec2s) == 1
23182318

23192319
# running it again shall do nothing
2320-
await auto_scale_cluster(app=initialized_app, auto_scaling_mode=auto_scaling_mode)
2321-
spied_cluster = assert_cluster_state(
2322-
spied_cluster_analysis, expected_calls=1, expected_num_machines=6
2320+
@tenacity.retry(
2321+
retry=tenacity.retry_always,
2322+
reraise=True,
2323+
wait=tenacity.wait_fixed(0.1),
2324+
stop=tenacity.stop_after_attempt(10),
23232325
)
2324-
assert len(spied_cluster.buffer_drained_nodes) == num_hot_buffer - 1
2325-
assert len(spied_cluster.buffer_ec2s) == buffer_count - 1
2326-
assert len(spied_cluster.active_nodes) == 1
2327-
assert len(spied_cluster.pending_ec2s) == 1
2326+
async def _check_autoscaling_is_stable() -> None:
2327+
await auto_scale_cluster(
2328+
app=initialized_app, auto_scaling_mode=auto_scaling_mode
2329+
)
2330+
spied_cluster = assert_cluster_state(
2331+
spied_cluster_analysis, expected_calls=1, expected_num_machines=6
2332+
)
2333+
assert len(spied_cluster.buffer_drained_nodes) == num_hot_buffer - 1
2334+
assert len(spied_cluster.buffer_ec2s) == buffer_count - 1
2335+
assert len(spied_cluster.active_nodes) == 1
2336+
assert len(spied_cluster.pending_ec2s) == 1
2337+
2338+
with pytest.raises(tenacity.RetryError):
2339+
await _check_autoscaling_is_stable()

0 commit comments

Comments
 (0)