Skip to content

Commit 4e63acd

Browse files
rdnaAlexei Starovoitov
authored and
Alexei Starovoitov
committed
bpf: Introduce bpf_sysctl_{get,set}_new_value helpers
Add helpers to work with new value being written to sysctl by user space. bpf_sysctl_get_new_value() copies value being written to sysctl into provided buffer. bpf_sysctl_set_new_value() overrides new value being written by user space with a one from provided buffer. Buffer should contain string representation of the value, similar to what can be seen in /proc/sys/. Both helpers can be used only on sysctl write. File position matters and can be managed by an interface that will be introduced separately. E.g. if user space calls sys_write to a file in /proc/sys/ at file position = X, where X > 0, then the value set by bpf_sysctl_set_new_value() will be written starting from X. If program wants to override whole value with specified buffer, file position has to be set to zero. Documentation for the new helpers is provided in bpf.h UAPI. Signed-off-by: Andrey Ignatov <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 1d11b30 commit 4e63acd

File tree

5 files changed

+142
-10
lines changed

5 files changed

+142
-10
lines changed

fs/proc/proc_sysctl.c

+17-5
Original file line numberDiff line numberDiff line change
@@ -570,8 +570,8 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
570570
struct inode *inode = file_inode(filp);
571571
struct ctl_table_header *head = grab_header(inode);
572572
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
573+
void *new_buf = NULL;
573574
ssize_t error;
574-
size_t res;
575575

576576
if (IS_ERR(head))
577577
return PTR_ERR(head);
@@ -589,15 +589,27 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
589589
if (!table->proc_handler)
590590
goto out;
591591

592-
error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write);
592+
error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
593+
&new_buf);
593594
if (error)
594595
goto out;
595596

596597
/* careful: calling conventions are nasty here */
597-
res = count;
598-
error = table->proc_handler(table, write, buf, &res, ppos);
598+
if (new_buf) {
599+
mm_segment_t old_fs;
600+
601+
old_fs = get_fs();
602+
set_fs(KERNEL_DS);
603+
error = table->proc_handler(table, write, (void __user *)new_buf,
604+
&count, ppos);
605+
set_fs(old_fs);
606+
kfree(new_buf);
607+
} else {
608+
error = table->proc_handler(table, write, buf, &count, ppos);
609+
}
610+
599611
if (!error)
600-
error = res;
612+
error = count;
601613
out:
602614
sysctl_head_finish(head);
603615

include/linux/bpf-cgroup.h

+5-3
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,8 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
113113

114114
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
115115
struct ctl_table *table, int write,
116-
enum bpf_attach_type type);
116+
void __user *buf, size_t *pcount,
117+
void **new_buf, enum bpf_attach_type type);
117118

118119
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
119120
struct bpf_map *map)
@@ -261,11 +262,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
261262
})
262263

263264

264-
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) \
265+
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, nbuf) \
265266
({ \
266267
int __ret = 0; \
267268
if (cgroup_bpf_enabled) \
268269
__ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
270+
buf, count, nbuf, \
269271
BPF_CGROUP_SYSCTL); \
270272
__ret; \
271273
})
@@ -338,7 +340,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
338340
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
339341
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
340342
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
341-
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) ({ 0; })
343+
#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,nbuf) ({ 0; })
342344

343345
#define for_each_cgroup_storage_type(stype) for (; false; )
344346

include/linux/filter.h

+3
Original file line numberDiff line numberDiff line change
@@ -1184,6 +1184,9 @@ struct bpf_sysctl_kern {
11841184
struct ctl_table *table;
11851185
void *cur_val;
11861186
size_t cur_len;
1187+
void *new_val;
1188+
size_t new_len;
1189+
int new_updated;
11871190
int write;
11881191
};
11891192

include/uapi/linux/bpf.h

+37-1
Original file line numberDiff line numberDiff line change
@@ -2541,6 +2541,40 @@ union bpf_attr {
25412541
*
25422542
* **-EINVAL** if current value was unavailable, e.g. because
25432543
* sysctl is uninitialized and read returns -EIO for it.
2544+
*
2545+
* int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
2546+
* Description
2547+
* Get new value being written by user space to sysctl (before
2548+
* the actual write happens) and copy it as a string into
2549+
* provided by program buffer *buf* of size *buf_len*.
2550+
*
2551+
* User space may write new value at file position > 0.
2552+
*
2553+
* The buffer is always NUL terminated, unless it's zero-sized.
2554+
* Return
2555+
* Number of character copied (not including the trailing NUL).
2556+
*
2557+
* **-E2BIG** if the buffer wasn't big enough (*buf* will contain
2558+
* truncated name in this case).
2559+
*
2560+
* **-EINVAL** if sysctl is being read.
2561+
*
2562+
* int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
2563+
* Description
2564+
* Override new value being written by user space to sysctl with
2565+
* value provided by program in buffer *buf* of size *buf_len*.
2566+
*
2567+
* *buf* should contain a string in same form as provided by user
2568+
* space on sysctl write.
2569+
*
2570+
* User space may write new value at file position > 0. To override
2571+
* the whole sysctl value file position should be set to zero.
2572+
* Return
2573+
* 0 on success.
2574+
*
2575+
* **-E2BIG** if the *buf_len* is too big.
2576+
*
2577+
* **-EINVAL** if sysctl is being read.
25442578
*/
25452579
#define __BPF_FUNC_MAPPER(FN) \
25462580
FN(unspec), \
@@ -2645,7 +2679,9 @@ union bpf_attr {
26452679
FN(skc_lookup_tcp), \
26462680
FN(tcp_check_syncookie), \
26472681
FN(sysctl_get_name), \
2648-
FN(sysctl_get_current_value),
2682+
FN(sysctl_get_current_value), \
2683+
FN(sysctl_get_new_value), \
2684+
FN(sysctl_set_new_value),
26492685

26502686
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
26512687
* function eBPF program intends to call

kernel/bpf/cgroup.c

+80-1
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,13 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
778778
* @head: sysctl table header
779779
* @table: sysctl table
780780
* @write: sysctl is being read (= 0) or written (= 1)
781+
* @buf: pointer to buffer passed by user space
782+
* @pcount: value-result argument: value is size of buffer pointed to by @buf,
783+
* result is size of @new_buf if program set new value, initial value
784+
* otherwise
785+
* @new_buf: pointer to pointer to new buffer that will be allocated if program
786+
* overrides new value provided by user space on sysctl write
787+
* NOTE: it's caller responsibility to free *new_buf if it was set
781788
* @type: type of program to be executed
782789
*
783790
* Program is run when sysctl is being accessed, either read or written, and
@@ -788,14 +795,18 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
788795
*/
789796
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
790797
struct ctl_table *table, int write,
791-
enum bpf_attach_type type)
798+
void __user *buf, size_t *pcount,
799+
void **new_buf, enum bpf_attach_type type)
792800
{
793801
struct bpf_sysctl_kern ctx = {
794802
.head = head,
795803
.table = table,
796804
.write = write,
797805
.cur_val = NULL,
798806
.cur_len = PAGE_SIZE,
807+
.new_val = NULL,
808+
.new_len = 0,
809+
.new_updated = 0,
799810
};
800811
struct cgroup *cgrp;
801812
int ret;
@@ -818,13 +829,32 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
818829
ctx.cur_len = 0;
819830
}
820831

832+
if (write && buf && *pcount) {
833+
/* BPF program should be able to override new value with a
834+
* buffer bigger than provided by user.
835+
*/
836+
ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
837+
ctx.new_len = min(PAGE_SIZE, *pcount);
838+
if (!ctx.new_val ||
839+
copy_from_user(ctx.new_val, buf, ctx.new_len))
840+
/* Let BPF program decide how to proceed. */
841+
ctx.new_len = 0;
842+
}
843+
821844
rcu_read_lock();
822845
cgrp = task_dfl_cgroup(current);
823846
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
824847
rcu_read_unlock();
825848

826849
kfree(ctx.cur_val);
827850

851+
if (ret == 1 && ctx.new_updated) {
852+
*new_buf = ctx.new_val;
853+
*pcount = ctx.new_len;
854+
} else {
855+
kfree(ctx.new_val);
856+
}
857+
828858
return ret == 1 ? 0 : -EPERM;
829859
}
830860
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
@@ -932,6 +962,51 @@ static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {
932962
.arg3_type = ARG_CONST_SIZE,
933963
};
934964

965+
BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,
966+
size_t, buf_len)
967+
{
968+
if (!ctx->write) {
969+
if (buf && buf_len)
970+
memset(buf, '\0', buf_len);
971+
return -EINVAL;
972+
}
973+
return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);
974+
}
975+
976+
static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {
977+
.func = bpf_sysctl_get_new_value,
978+
.gpl_only = false,
979+
.ret_type = RET_INTEGER,
980+
.arg1_type = ARG_PTR_TO_CTX,
981+
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
982+
.arg3_type = ARG_CONST_SIZE,
983+
};
984+
985+
BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
986+
const char *, buf, size_t, buf_len)
987+
{
988+
if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len)
989+
return -EINVAL;
990+
991+
if (buf_len > PAGE_SIZE - 1)
992+
return -E2BIG;
993+
994+
memcpy(ctx->new_val, buf, buf_len);
995+
ctx->new_len = buf_len;
996+
ctx->new_updated = 1;
997+
998+
return 0;
999+
}
1000+
1001+
static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
1002+
.func = bpf_sysctl_set_new_value,
1003+
.gpl_only = false,
1004+
.ret_type = RET_INTEGER,
1005+
.arg1_type = ARG_PTR_TO_CTX,
1006+
.arg2_type = ARG_PTR_TO_MEM,
1007+
.arg3_type = ARG_CONST_SIZE,
1008+
};
1009+
9351010
static const struct bpf_func_proto *
9361011
sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
9371012
{
@@ -940,6 +1015,10 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
9401015
return &bpf_sysctl_get_name_proto;
9411016
case BPF_FUNC_sysctl_get_current_value:
9421017
return &bpf_sysctl_get_current_value_proto;
1018+
case BPF_FUNC_sysctl_get_new_value:
1019+
return &bpf_sysctl_get_new_value_proto;
1020+
case BPF_FUNC_sysctl_set_new_value:
1021+
return &bpf_sysctl_set_new_value_proto;
9431022
default:
9441023
return cgroup_base_func_proto(func_id, prog);
9451024
}

0 commit comments

Comments
 (0)