Skip to content

Commit 61ce4fe

Browse files
authored
Merge pull request raspberrypi#91 from sched-ext/scx-sync
scx_sync: Sync scheduler changes from https://github.com/sched-ext/scx
2 parents f0566ba + 234eb2c commit 61ce4fe

File tree

2 files changed

+32
-6
lines changed

2 files changed

+32
-6
lines changed

tools/sched_ext/scx_layered/src/bpf/main.bpf.c

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -250,10 +250,20 @@ struct layer *lookup_layer(int idx)
250250
return &layers[idx];
251251
}
252252

253+
/*
254+
* Because the layer membership is by the default hierarchy cgroups rather than
255+
* the CPU controller membership, we can't use ops.cgroup_move(). Let's iterate
256+
* the tasks manually and set refresh_layer.
257+
*
258+
* The iteration isn't synchronized and may fail spuriously. It's not a big
259+
* practical problem as process migrations are very rare in most modern systems.
260+
* That said, we eventually want this to be based on CPU controller membership.
261+
*/
253262
SEC("tp_btf/cgroup_attach_task")
254263
int BPF_PROG(tp_cgroup_attach_task, struct cgroup *cgrp, const char *cgrp_path,
255264
struct task_struct *leader, bool threadgroup)
256265
{
266+
struct list_head *thread_head;
257267
struct task_struct *next;
258268
struct task_ctx *tctx;
259269
int leader_pid = leader->pid;
@@ -265,6 +275,8 @@ int BPF_PROG(tp_cgroup_attach_task, struct cgroup *cgrp, const char *cgrp_path,
265275
if (!threadgroup)
266276
return 0;
267277

278+
thread_head = &leader->signal->thread_head;
279+
268280
if (!(next = bpf_task_acquire(leader))) {
269281
scx_bpf_error("failed to acquire leader");
270282
return 0;
@@ -274,18 +286,18 @@ int BPF_PROG(tp_cgroup_attach_task, struct cgroup *cgrp, const char *cgrp_path,
274286
struct task_struct *p;
275287
int pid;
276288

277-
p = container_of(next->thread_group.next, struct task_struct, thread_group);
289+
p = container_of(next->thread_node.next, struct task_struct, thread_node);
278290
bpf_task_release(next);
279291

280-
pid = BPF_CORE_READ(p, pid);
281-
if (pid == leader_pid) {
292+
if (&p->thread_node == thread_head) {
282293
next = NULL;
283294
break;
284295
}
285296

297+
pid = BPF_CORE_READ(p, pid);
286298
next = bpf_task_from_pid(pid);
287299
if (!next) {
288-
scx_bpf_error("thread iteration failed");
300+
bpf_printk("scx_layered: tp_cgroup_attach_task: thread iteration failed");
289301
break;
290302
}
291303

tools/sched_ext/scx_rusty/src/bpf/main.bpf.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -966,7 +966,13 @@ s32 BPF_STRUCT_OPS(rusty_prep_enable, struct task_struct *p,
966966
pid_t pid;
967967

968968
pid = p->pid;
969-
ret = bpf_map_update_elem(&task_data, &pid, &taskc, BPF_NOEXIST);
969+
970+
/*
971+
* XXX - We want BPF_NOEXIST but bpf_map_delete_elem() in .disable() may
972+
* fail spuriously due to BPF recursion protection triggering
973+
* unnecessarily.
974+
*/
975+
ret = bpf_map_update_elem(&task_data, &pid, &taskc, 0 /*BPF_NOEXIST*/);
970976
if (ret) {
971977
stat_add(RUSTY_STAT_TASK_GET_ERR, 1);
972978
return ret;
@@ -1003,7 +1009,15 @@ s32 BPF_STRUCT_OPS(rusty_prep_enable, struct task_struct *p,
10031009
void BPF_STRUCT_OPS(rusty_disable, struct task_struct *p)
10041010
{
10051011
pid_t pid = p->pid;
1006-
long ret = bpf_map_delete_elem(&task_data, &pid);
1012+
long ret;
1013+
1014+
/*
1015+
* XXX - There's no reason delete should fail here but BPF's recursion
1016+
* protection can unnecessarily fail the operation. The fact that
1017+
* deletions aren't reliable means that we sometimes leak task_ctx and
1018+
* can't use BPF_NOEXIST on allocation in .prep_enable().
1019+
*/
1020+
ret = bpf_map_delete_elem(&task_data, &pid);
10071021
if (ret) {
10081022
stat_add(RUSTY_STAT_TASK_GET_ERR, 1);
10091023
return;

0 commit comments

Comments
 (0)