|
3 | 3 | #include <linux/errno.h>
|
4 | 4 | #include <linux/file.h>
|
5 | 5 | #include <linux/slab.h>
|
| 6 | +#include <linux/nospec.h> |
6 | 7 | #include <linux/io_uring.h>
|
7 | 8 |
|
8 | 9 | #include <uapi/linux/io_uring.h>
|
9 | 10 |
|
10 | 11 | #include "io_uring.h"
|
| 12 | +#include "rsrc.h" |
| 13 | +#include "filetable.h" |
11 | 14 | #include "msg_ring.h"
|
12 | 15 |
|
13 | 16 | struct io_msg {
|
14 | 17 | struct file *file;
|
15 | 18 | u64 user_data;
|
16 | 19 | u32 len;
|
| 20 | + u32 cmd; |
| 21 | + u32 src_fd; |
| 22 | + u32 dst_fd; |
| 23 | + u32 flags; |
17 | 24 | };
|
18 | 25 |
|
| 26 | +static int io_msg_ring_data(struct io_kiocb *req) |
| 27 | +{ |
| 28 | + struct io_ring_ctx *target_ctx = req->file->private_data; |
| 29 | + struct io_msg *msg = io_kiocb_to_cmd(req); |
| 30 | + |
| 31 | + if (msg->src_fd || msg->dst_fd || msg->flags) |
| 32 | + return -EINVAL; |
| 33 | + |
| 34 | + if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) |
| 35 | + return 0; |
| 36 | + |
| 37 | + return -EOVERFLOW; |
| 38 | +} |
| 39 | + |
| 40 | +static void io_double_unlock_ctx(struct io_ring_ctx *ctx, |
| 41 | + struct io_ring_ctx *octx, |
| 42 | + unsigned int issue_flags) |
| 43 | +{ |
| 44 | + if (issue_flags & IO_URING_F_UNLOCKED) |
| 45 | + mutex_unlock(&ctx->uring_lock); |
| 46 | + mutex_unlock(&octx->uring_lock); |
| 47 | +} |
| 48 | + |
| 49 | +static int io_double_lock_ctx(struct io_ring_ctx *ctx, |
| 50 | + struct io_ring_ctx *octx, |
| 51 | + unsigned int issue_flags) |
| 52 | +{ |
| 53 | + /* |
| 54 | + * To ensure proper ordering between the two ctxs, we can only |
| 55 | + * attempt a trylock on the target. If that fails and we already have |
| 56 | + * the source ctx lock, punt to io-wq. |
| 57 | + */ |
| 58 | + if (!(issue_flags & IO_URING_F_UNLOCKED)) { |
| 59 | + if (!mutex_trylock(&octx->uring_lock)) |
| 60 | + return -EAGAIN; |
| 61 | + return 0; |
| 62 | + } |
| 63 | + |
| 64 | + /* Always grab smallest value ctx first. We know ctx != octx. */ |
| 65 | + if (ctx < octx) { |
| 66 | + mutex_lock(&ctx->uring_lock); |
| 67 | + mutex_lock(&octx->uring_lock); |
| 68 | + } else { |
| 69 | + mutex_lock(&octx->uring_lock); |
| 70 | + mutex_lock(&ctx->uring_lock); |
| 71 | + } |
| 72 | + |
| 73 | + return 0; |
| 74 | +} |
| 75 | + |
| 76 | +static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) |
| 77 | +{ |
| 78 | + struct io_ring_ctx *target_ctx = req->file->private_data; |
| 79 | + struct io_msg *msg = io_kiocb_to_cmd(req); |
| 80 | + struct io_ring_ctx *ctx = req->ctx; |
| 81 | + unsigned long file_ptr; |
| 82 | + struct file *src_file; |
| 83 | + int ret; |
| 84 | + |
| 85 | + if (target_ctx == ctx) |
| 86 | + return -EINVAL; |
| 87 | + |
| 88 | + ret = io_double_lock_ctx(ctx, target_ctx, issue_flags); |
| 89 | + if (unlikely(ret)) |
| 90 | + return ret; |
| 91 | + |
| 92 | + ret = -EBADF; |
| 93 | + if (unlikely(msg->src_fd >= ctx->nr_user_files)) |
| 94 | + goto out_unlock; |
| 95 | + |
| 96 | + msg->src_fd = array_index_nospec(msg->src_fd, ctx->nr_user_files); |
| 97 | + file_ptr = io_fixed_file_slot(&ctx->file_table, msg->src_fd)->file_ptr; |
| 98 | + src_file = (struct file *) (file_ptr & FFS_MASK); |
| 99 | + get_file(src_file); |
| 100 | + |
| 101 | + ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd); |
| 102 | + if (ret < 0) { |
| 103 | + fput(src_file); |
| 104 | + goto out_unlock; |
| 105 | + } |
| 106 | + |
| 107 | + if (msg->flags & IORING_MSG_RING_CQE_SKIP) |
| 108 | + goto out_unlock; |
| 109 | + |
| 110 | + /* |
| 111 | + * If this fails, the target still received the file descriptor but |
| 112 | + * wasn't notified of the fact. This means that if this request |
| 113 | + * completes with -EOVERFLOW, then the sender must ensure that a |
| 114 | + * later IORING_OP_MSG_RING delivers the message. |
| 115 | + */ |
| 116 | + if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) |
| 117 | + ret = -EOVERFLOW; |
| 118 | +out_unlock: |
| 119 | + io_double_unlock_ctx(ctx, target_ctx, issue_flags); |
| 120 | + return ret; |
| 121 | +} |
| 122 | + |
19 | 123 | int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
20 | 124 | {
|
21 | 125 | struct io_msg *msg = io_kiocb_to_cmd(req);
|
22 | 126 |
|
23 |
| - if (unlikely(sqe->addr || sqe->rw_flags || sqe->splice_fd_in || |
24 |
| - sqe->buf_index || sqe->personality)) |
| 127 | + if (unlikely(sqe->buf_index || sqe->personality)) |
25 | 128 | return -EINVAL;
|
26 | 129 |
|
27 | 130 | msg->user_data = READ_ONCE(sqe->off);
|
28 | 131 | msg->len = READ_ONCE(sqe->len);
|
| 132 | + msg->cmd = READ_ONCE(sqe->addr); |
| 133 | + msg->src_fd = READ_ONCE(sqe->addr3); |
| 134 | + msg->dst_fd = READ_ONCE(sqe->file_index); |
| 135 | + msg->flags = READ_ONCE(sqe->msg_ring_flags); |
| 136 | + if (msg->flags & ~IORING_MSG_RING_CQE_SKIP) |
| 137 | + return -EINVAL; |
| 138 | + |
29 | 139 | return 0;
|
30 | 140 | }
|
31 | 141 |
|
32 | 142 | int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
|
33 | 143 | {
|
34 | 144 | struct io_msg *msg = io_kiocb_to_cmd(req);
|
35 |
| - struct io_ring_ctx *target_ctx; |
36 | 145 | int ret;
|
37 | 146 |
|
38 | 147 | ret = -EBADFD;
|
39 | 148 | if (!io_is_uring_fops(req->file))
|
40 | 149 | goto done;
|
41 | 150 |
|
42 |
| - ret = -EOVERFLOW; |
43 |
| - target_ctx = req->file->private_data; |
44 |
| - if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) |
45 |
| - ret = 0; |
| 151 | + switch (msg->cmd) { |
| 152 | + case IORING_MSG_DATA: |
| 153 | + ret = io_msg_ring_data(req); |
| 154 | + break; |
| 155 | + case IORING_MSG_SEND_FD: |
| 156 | + ret = io_msg_send_fd(req, issue_flags); |
| 157 | + break; |
| 158 | + default: |
| 159 | + ret = -EINVAL; |
| 160 | + break; |
| 161 | + } |
46 | 162 |
|
47 | 163 | done:
|
48 | 164 | if (ret < 0)
|
|
0 commit comments