Skip to content

Commit 7b146ce

Browse files
rdnaAlexei Starovoitov
authored and
Alexei Starovoitov
committed
bpf: Sysctl hook
Containerized applications may run as root and it may create problems for whole host. Specifically such applications may change a sysctl and affect applications in other containers. Furthermore in existing infrastructure it may not be possible to just completely disable writing to sysctl, instead such a process should be gradual with ability to log what sysctl are being changed by a container, investigate, limit the set of writable sysctl to currently used ones (so that new ones can not be changed) and eventually reduce this set to zero. The patch introduces new program type BPF_PROG_TYPE_CGROUP_SYSCTL and attach type BPF_CGROUP_SYSCTL to solve these problems on cgroup basis. New program type has access to following minimal context: struct bpf_sysctl { __u32 write; }; Where @Write indicates whether sysctl is being read (= 0) or written (= 1). Helpers to access sysctl name and value will be introduced separately. BPF_CGROUP_SYSCTL attach point is added to sysctl code right before passing control to ctl_table->proc_handler so that BPF program can either allow or deny access to sysctl. Suggested-by: Roman Gushchin <[email protected]> Signed-off-by: Andrey Ignatov <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent b1cd609 commit 7b146ce

File tree

8 files changed

+141
-0
lines changed

8 files changed

+141
-0
lines changed

fs/proc/proc_sysctl.c

+5
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <linux/namei.h>
1414
#include <linux/mm.h>
1515
#include <linux/module.h>
16+
#include <linux/bpf-cgroup.h>
1617
#include "internal.h"
1718

1819
static const struct dentry_operations proc_sys_dentry_operations;
@@ -588,6 +589,10 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
588589
if (!table->proc_handler)
589590
goto out;
590591

592+
error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write);
593+
if (error)
594+
goto out;
595+
591596
/* careful: calling conventions are nasty here */
592597
res = count;
593598
error = table->proc_handler(table, write, buf, &res, ppos);

include/linux/bpf-cgroup.h

+18
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ struct bpf_map;
1717
struct bpf_prog;
1818
struct bpf_sock_ops_kern;
1919
struct bpf_cgroup_storage;
20+
struct ctl_table;
21+
struct ctl_table_header;
2022

2123
#ifdef CONFIG_CGROUP_BPF
2224

@@ -109,6 +111,10 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
109111
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
110112
short access, enum bpf_attach_type type);
111113

114+
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
115+
struct ctl_table *table, int write,
116+
enum bpf_attach_type type);
117+
112118
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
113119
struct bpf_map *map)
114120
{
@@ -253,6 +259,17 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
253259
\
254260
__ret; \
255261
})
262+
263+
264+
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) \
265+
({ \
266+
int __ret = 0; \
267+
if (cgroup_bpf_enabled) \
268+
__ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
269+
BPF_CGROUP_SYSCTL); \
270+
__ret; \
271+
})
272+
256273
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
257274
enum bpf_prog_type ptype, struct bpf_prog *prog);
258275
int cgroup_bpf_prog_detach(const union bpf_attr *attr,
@@ -321,6 +338,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
321338
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
322339
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
323340
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
341+
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) ({ 0; })
324342

325343
#define for_each_cgroup_storage_type(stype) for (; false; )
326344

include/linux/bpf_types.h

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
2828
#endif
2929
#ifdef CONFIG_CGROUP_BPF
3030
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
31+
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl)
3132
#endif
3233
#ifdef CONFIG_BPF_LIRC_MODE2
3334
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)

include/linux/filter.h

+8
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ struct bpf_prog_aux;
3333
struct xdp_rxq_info;
3434
struct xdp_buff;
3535
struct sock_reuseport;
36+
struct ctl_table;
37+
struct ctl_table_header;
3638

3739
/* ArgX, context and stack frame pointer register positions. Note,
3840
* Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -1177,4 +1179,10 @@ struct bpf_sock_ops_kern {
11771179
*/
11781180
};
11791181

1182+
struct bpf_sysctl_kern {
1183+
struct ctl_table_header *head;
1184+
struct ctl_table *table;
1185+
int write;
1186+
};
1187+
11801188
#endif /* __LINUX_FILTER_H__ */

include/uapi/linux/bpf.h

+9
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ enum bpf_prog_type {
167167
BPF_PROG_TYPE_LIRC_MODE2,
168168
BPF_PROG_TYPE_SK_REUSEPORT,
169169
BPF_PROG_TYPE_FLOW_DISSECTOR,
170+
BPF_PROG_TYPE_CGROUP_SYSCTL,
170171
};
171172

172173
enum bpf_attach_type {
@@ -188,6 +189,7 @@ enum bpf_attach_type {
188189
BPF_CGROUP_UDP6_SENDMSG,
189190
BPF_LIRC_MODE2,
190191
BPF_FLOW_DISSECTOR,
192+
BPF_CGROUP_SYSCTL,
191193
__MAX_BPF_ATTACH_TYPE
192194
};
193195

@@ -3308,4 +3310,11 @@ struct bpf_line_info {
33083310
struct bpf_spin_lock {
33093311
__u32 val;
33103312
};
3313+
3314+
struct bpf_sysctl {
3315+
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
3316+
* Allows 1,2,4-byte read, but no write.
3317+
*/
3318+
};
3319+
33113320
#endif /* _UAPI__LINUX_BPF_H__ */

kernel/bpf/cgroup.c

+92
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
#include <linux/kernel.h>
1212
#include <linux/atomic.h>
1313
#include <linux/cgroup.h>
14+
#include <linux/filter.h>
1415
#include <linux/slab.h>
16+
#include <linux/sysctl.h>
1517
#include <linux/bpf.h>
1618
#include <linux/bpf-cgroup.h>
1719
#include <net/sock.h>
@@ -768,3 +770,93 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
768770
.get_func_proto = cgroup_dev_func_proto,
769771
.is_valid_access = cgroup_dev_is_valid_access,
770772
};
773+
774+
/**
775+
* __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
776+
*
777+
* @head: sysctl table header
778+
* @table: sysctl table
779+
* @write: sysctl is being read (= 0) or written (= 1)
780+
* @type: type of program to be executed
781+
*
782+
* Program is run when sysctl is being accessed, either read or written, and
783+
* can allow or deny such access.
784+
*
785+
* This function will return %-EPERM if an attached program is found and
786+
* returned value != 1 during execution. In all other cases 0 is returned.
787+
*/
788+
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
789+
struct ctl_table *table, int write,
790+
enum bpf_attach_type type)
791+
{
792+
struct bpf_sysctl_kern ctx = {
793+
.head = head,
794+
.table = table,
795+
.write = write,
796+
};
797+
struct cgroup *cgrp;
798+
int ret;
799+
800+
rcu_read_lock();
801+
cgrp = task_dfl_cgroup(current);
802+
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
803+
rcu_read_unlock();
804+
805+
return ret == 1 ? 0 : -EPERM;
806+
}
807+
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
808+
809+
static const struct bpf_func_proto *
810+
sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
811+
{
812+
return cgroup_base_func_proto(func_id, prog);
813+
}
814+
815+
static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
816+
const struct bpf_prog *prog,
817+
struct bpf_insn_access_aux *info)
818+
{
819+
const int size_default = sizeof(__u32);
820+
821+
if (off < 0 || off + size > sizeof(struct bpf_sysctl) ||
822+
off % size || type != BPF_READ)
823+
return false;
824+
825+
switch (off) {
826+
case offsetof(struct bpf_sysctl, write):
827+
bpf_ctx_record_field_size(info, size_default);
828+
return bpf_ctx_narrow_access_ok(off, size, size_default);
829+
default:
830+
return false;
831+
}
832+
}
833+
834+
static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
835+
const struct bpf_insn *si,
836+
struct bpf_insn *insn_buf,
837+
struct bpf_prog *prog, u32 *target_size)
838+
{
839+
struct bpf_insn *insn = insn_buf;
840+
841+
switch (si->off) {
842+
case offsetof(struct bpf_sysctl, write):
843+
*insn++ = BPF_LDX_MEM(
844+
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
845+
bpf_target_off(struct bpf_sysctl_kern, write,
846+
FIELD_SIZEOF(struct bpf_sysctl_kern,
847+
write),
848+
target_size));
849+
break;
850+
}
851+
852+
return insn - insn_buf;
853+
}
854+
855+
const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
856+
.get_func_proto = sysctl_func_proto,
857+
.is_valid_access = sysctl_is_valid_access,
858+
.convert_ctx_access = sysctl_convert_ctx_access,
859+
};
860+
861+
const struct bpf_prog_ops cg_sysctl_prog_ops = {
862+
};

kernel/bpf/syscall.c

+7
Original file line numberDiff line numberDiff line change
@@ -1888,6 +1888,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
18881888
case BPF_FLOW_DISSECTOR:
18891889
ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
18901890
break;
1891+
case BPF_CGROUP_SYSCTL:
1892+
ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
1893+
break;
18911894
default:
18921895
return -EINVAL;
18931896
}
@@ -1966,6 +1969,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
19661969
return lirc_prog_detach(attr);
19671970
case BPF_FLOW_DISSECTOR:
19681971
return skb_flow_dissector_bpf_prog_detach(attr);
1972+
case BPF_CGROUP_SYSCTL:
1973+
ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
1974+
break;
19691975
default:
19701976
return -EINVAL;
19711977
}
@@ -1999,6 +2005,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
19992005
case BPF_CGROUP_UDP6_SENDMSG:
20002006
case BPF_CGROUP_SOCK_OPS:
20012007
case BPF_CGROUP_DEVICE:
2008+
case BPF_CGROUP_SYSCTL:
20022009
break;
20032010
case BPF_LIRC_MODE2:
20042011
return lirc_prog_query(attr, uattr);

kernel/bpf/verifier.c

+1
Original file line numberDiff line numberDiff line change
@@ -5267,6 +5267,7 @@ static int check_return_code(struct bpf_verifier_env *env)
52675267
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
52685268
case BPF_PROG_TYPE_SOCK_OPS:
52695269
case BPF_PROG_TYPE_CGROUP_DEVICE:
5270+
case BPF_PROG_TYPE_CGROUP_SYSCTL:
52705271
break;
52715272
default:
52725273
return 0;

0 commit comments

Comments
 (0)