Skip to content

Commit fe76421

Browse files
committed
io_uring: allow user configurable IO thread CPU affinity
io-wq defaults to per-node masks for IO workers. This works fine by default, but isn't particularly handy for workloads that prefer more specific affinities, for either performance or isolation reasons. This adds IORING_REGISTER_IOWQ_AFF that allows the user to pass in a CPU mask that is then applied to IO thread workers, and an IORING_UNREGISTER_IOWQ_AFF that simply resets the masks back to the default of per-node. Note that no care is given to existing IO threads, they will need to go through a reschedule before the affinity is correct if they are already running or sleeping. Signed-off-by: Jens Axboe <[email protected]>
1 parent 0e03496 commit fe76421

File tree

4 files changed

+74
-0
lines changed

4 files changed

+74
-0
lines changed

fs/io-wq.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,6 +1087,23 @@ static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node)
10871087
return __io_wq_cpu_online(wq, cpu, false);
10881088
}
10891089

1090+
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask)
1091+
{
1092+
int i;
1093+
1094+
rcu_read_lock();
1095+
for_each_node(i) {
1096+
struct io_wqe *wqe = wq->wqes[i];
1097+
1098+
if (mask)
1099+
cpumask_copy(wqe->cpu_mask, mask);
1100+
else
1101+
cpumask_copy(wqe->cpu_mask, cpumask_of_node(i));
1102+
}
1103+
rcu_read_unlock();
1104+
return 0;
1105+
}
1106+
10901107
static __init int io_wq_init(void)
10911108
{
10921109
int ret;

fs/io-wq.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ void io_wq_put_and_exit(struct io_wq *wq);
128128
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
129129
void io_wq_hash_work(struct io_wq_work *work, void *val);
130130

131+
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
132+
131133
static inline bool io_wq_is_hashed(struct io_wq_work *work)
132134
{
133135
return work->flags & IO_WQ_WORK_HASHED;

fs/io_uring.c

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9983,6 +9983,43 @@ static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
99839983
return -EINVAL;
99849984
}
99859985

9986+
static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg,
9987+
unsigned len)
9988+
{
9989+
struct io_uring_task *tctx = current->io_uring;
9990+
cpumask_var_t new_mask;
9991+
int ret;
9992+
9993+
if (!tctx || !tctx->io_wq)
9994+
return -EINVAL;
9995+
9996+
if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
9997+
return -ENOMEM;
9998+
9999+
cpumask_clear(new_mask);
10000+
if (len > cpumask_size())
10001+
len = cpumask_size();
10002+
10003+
if (copy_from_user(new_mask, arg, len)) {
10004+
free_cpumask_var(new_mask);
10005+
return -EFAULT;
10006+
}
10007+
10008+
ret = io_wq_cpu_affinity(tctx->io_wq, new_mask);
10009+
free_cpumask_var(new_mask);
10010+
return ret;
10011+
}
10012+
10013+
static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
10014+
{
10015+
struct io_uring_task *tctx = current->io_uring;
10016+
10017+
if (!tctx || !tctx->io_wq)
10018+
return -EINVAL;
10019+
10020+
return io_wq_cpu_affinity(tctx->io_wq, NULL);
10021+
}
10022+
998610023
static bool io_register_op_must_quiesce(int op)
998710024
{
998810025
switch (op) {
@@ -9998,6 +10035,8 @@ static bool io_register_op_must_quiesce(int op)
999810035
case IORING_REGISTER_FILES_UPDATE2:
999910036
case IORING_REGISTER_BUFFERS2:
1000010037
case IORING_REGISTER_BUFFERS_UPDATE:
10038+
case IORING_REGISTER_IOWQ_AFF:
10039+
case IORING_UNREGISTER_IOWQ_AFF:
1000110040
return false;
1000210041
default:
1000310042
return true;
@@ -10137,6 +10176,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
1013710176
ret = io_register_rsrc_update(ctx, arg, nr_args,
1013810177
IORING_RSRC_BUFFER);
1013910178
break;
10179+
case IORING_REGISTER_IOWQ_AFF:
10180+
ret = -EINVAL;
10181+
if (!arg || !nr_args)
10182+
break;
10183+
ret = io_register_iowq_aff(ctx, arg, nr_args);
10184+
break;
10185+
case IORING_UNREGISTER_IOWQ_AFF:
10186+
ret = -EINVAL;
10187+
if (arg || nr_args)
10188+
break;
10189+
ret = io_unregister_iowq_aff(ctx);
10190+
break;
1014010191
default:
1014110192
ret = -EINVAL;
1014210193
break;

include/uapi/linux/io_uring.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,10 @@ enum {
306306
IORING_REGISTER_BUFFERS2 = 15,
307307
IORING_REGISTER_BUFFERS_UPDATE = 16,
308308

309+
/* set/clear io-wq thread affinities */
310+
IORING_REGISTER_IOWQ_AFF = 17,
311+
IORING_UNREGISTER_IOWQ_AFF = 18,
312+
309313
/* this goes last */
310314
IORING_REGISTER_LAST
311315
};

0 commit comments

Comments
 (0)