Skip to content

Commit ada6c1d

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next This a bit large (and late) patchset that contains Netfilter updates for net-next. Most relevantly br_netfilter fixes, ipset RCU support, removal of x_tables percpu ruleset copy and rework of the nf_tables netdev support. More specifically, they are: 1) Warn the user when there is a better protocol conntracker available, from Marcelo Ricardo Leitner. 2) Fix forwarding of IPv6 fragmented traffic in br_netfilter, from Bernhard Thaler. This comes with several patches to prepare the change in first place. 3) Get rid of special mtu handling of PPPoE/VLAN frames for br_netfilter. This is not needed anymore since now we use the largest fragment size to refragment, from Florian Westphal. 4) Restore vlan tag when refragmenting in br_netfilter, also from Florian. 5) Get rid of the percpu ruleset copy in x_tables, from Florian. Plus another follow up patch to refine it from Eric Dumazet. 6) Several ipset cleanups, fixes and finally RCU support, from Jozsef Kadlecsik. 7) Get rid of parens in Netfilter Kconfig files. 8) Attach the net_device to the basechain as opposed to the initial per table approach in the nf_tables netdev family. 9) Subscribe to netdev events to detect the removal and registration of a device that is referenced by a basechain. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 758f0d4 + 835b803 commit ada6c1d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1972
-1718
lines changed

include/linux/netfilter/ipset/ip_set.h

+17-12
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,13 @@ struct ip_set_counter {
108108
atomic64_t packets;
109109
};
110110

111+
struct ip_set_comment_rcu {
112+
struct rcu_head rcu;
113+
char str[0];
114+
};
115+
111116
struct ip_set_comment {
112-
char *str;
117+
struct ip_set_comment_rcu __rcu *c;
113118
};
114119

115120
struct ip_set_skbinfo {
@@ -176,6 +181,9 @@ struct ip_set_type_variant {
176181
/* List elements */
177182
int (*list)(const struct ip_set *set, struct sk_buff *skb,
178183
struct netlink_callback *cb);
184+
/* Keep listing private when resizing runs parallel */
185+
void (*uref)(struct ip_set *set, struct netlink_callback *cb,
186+
bool start);
179187

180188
/* Return true if "b" set is the same as "a"
181189
* according to the create set parameters */
@@ -223,7 +231,7 @@ struct ip_set {
223231
/* The name of the set */
224232
char name[IPSET_MAXNAMELEN];
225233
/* Lock protecting the set data */
226-
rwlock_t lock;
234+
spinlock_t lock;
227235
/* References to the set */
228236
u32 ref;
229237
/* The core set type */
@@ -341,12 +349,11 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
341349
cpu_to_be64((u64)skbinfo->skbmark << 32 |
342350
skbinfo->skbmarkmask))) ||
343351
(skbinfo->skbprio &&
344-
nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
352+
nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
345353
cpu_to_be32(skbinfo->skbprio))) ||
346354
(skbinfo->skbqueue &&
347-
nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
355+
nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
348356
cpu_to_be16(skbinfo->skbqueue)));
349-
350357
}
351358

352359
static inline void
@@ -380,12 +387,12 @@ ip_set_init_counter(struct ip_set_counter *counter,
380387

381388
/* Netlink CB args */
382389
enum {
383-
IPSET_CB_NET = 0,
384-
IPSET_CB_DUMP,
385-
IPSET_CB_INDEX,
386-
IPSET_CB_ARG0,
390+
IPSET_CB_NET = 0, /* net namespace */
391+
IPSET_CB_DUMP, /* dump single set/all sets */
392+
IPSET_CB_INDEX, /* set index */
393+
IPSET_CB_PRIVATE, /* set private data */
394+
IPSET_CB_ARG0, /* type specific */
387395
IPSET_CB_ARG1,
388-
IPSET_CB_ARG2,
389396
};
390397

391398
/* register and unregister set references */
@@ -545,8 +552,6 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
545552
{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \
546553
.timeout = (set)->timeout }
547554

548-
#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b))
549-
550555
#define IPSET_CONCAT(a, b) a##b
551556
#define IPSET_TOKEN(a, b) IPSET_CONCAT(a, b)
552557

include/linux/netfilter/ipset/ip_set_comment.h

+27-11
Original file line numberDiff line numberDiff line change
@@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb)
1616
return nla_data(tb);
1717
}
1818

19+
/* Called from uadd only, protected by the set spinlock.
20+
* The kadt functions don't use the comment extensions in any way.
21+
*/
1922
static inline void
2023
ip_set_init_comment(struct ip_set_comment *comment,
2124
const struct ip_set_ext *ext)
2225
{
26+
struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
2327
size_t len = ext->comment ? strlen(ext->comment) : 0;
2428

25-
if (unlikely(comment->str)) {
26-
kfree(comment->str);
27-
comment->str = NULL;
29+
if (unlikely(c)) {
30+
kfree_rcu(c, rcu);
31+
rcu_assign_pointer(comment->c, NULL);
2832
}
2933
if (!len)
3034
return;
3135
if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
3236
len = IPSET_MAX_COMMENT_SIZE;
33-
comment->str = kzalloc(len + 1, GFP_ATOMIC);
34-
if (unlikely(!comment->str))
37+
c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
38+
if (unlikely(!c))
3539
return;
36-
strlcpy(comment->str, ext->comment, len + 1);
40+
strlcpy(c->str, ext->comment, len + 1);
41+
rcu_assign_pointer(comment->c, c);
3742
}
3843

44+
/* Used only when dumping a set, protected by rcu_read_lock_bh() */
3945
static inline int
4046
ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
4147
{
42-
if (!comment->str)
48+
struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
49+
50+
if (!c)
4351
return 0;
44-
return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str);
52+
return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
4553
}
4654

55+
/* Called from uadd/udel, flush or the garbage collectors protected
56+
* by the set spinlock.
57+
* Called when the set is destroyed and when there can't be any user
58+
* of the set data anymore.
59+
*/
4760
static inline void
4861
ip_set_comment_free(struct ip_set_comment *comment)
4962
{
50-
if (unlikely(!comment->str))
63+
struct ip_set_comment_rcu *c;
64+
65+
c = rcu_dereference_protected(comment->c, 1);
66+
if (unlikely(!c))
5167
return;
52-
kfree(comment->str);
53-
comment->str = NULL;
68+
kfree_rcu(c, rcu);
69+
rcu_assign_pointer(comment->c, NULL);
5470
}
5571

5672
#endif

include/linux/netfilter/ipset/ip_set_timeout.h

+11-16
Original file line numberDiff line numberDiff line change
@@ -40,38 +40,33 @@ ip_set_timeout_uget(struct nlattr *tb)
4040
}
4141

4242
static inline bool
43-
ip_set_timeout_test(unsigned long timeout)
43+
ip_set_timeout_expired(unsigned long *t)
4444
{
45-
return timeout == IPSET_ELEM_PERMANENT ||
46-
time_is_after_jiffies(timeout);
47-
}
48-
49-
static inline bool
50-
ip_set_timeout_expired(unsigned long *timeout)
51-
{
52-
return *timeout != IPSET_ELEM_PERMANENT &&
53-
time_is_before_jiffies(*timeout);
45+
return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
5446
}
5547

5648
static inline void
57-
ip_set_timeout_set(unsigned long *timeout, u32 t)
49+
ip_set_timeout_set(unsigned long *timeout, u32 value)
5850
{
59-
if (!t) {
51+
unsigned long t;
52+
53+
if (!value) {
6054
*timeout = IPSET_ELEM_PERMANENT;
6155
return;
6256
}
6357

64-
*timeout = msecs_to_jiffies(t * 1000) + jiffies;
65-
if (*timeout == IPSET_ELEM_PERMANENT)
58+
t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
59+
if (t == IPSET_ELEM_PERMANENT)
6660
/* Bingo! :-) */
67-
(*timeout)--;
61+
t--;
62+
*timeout = t;
6863
}
6964

7065
static inline u32
7166
ip_set_timeout_get(unsigned long *timeout)
7267
{
7368
return *timeout == IPSET_ELEM_PERMANENT ? 0 :
74-
jiffies_to_msecs(*timeout - jiffies)/1000;
69+
jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
7570
}
7671

7772
#endif /* __KERNEL__ */

include/linux/netfilter/x_tables.h

+51-5
Original file line numberDiff line numberDiff line change
@@ -224,13 +224,10 @@ struct xt_table_info {
224224
unsigned int stacksize;
225225
unsigned int __percpu *stackptr;
226226
void ***jumpstack;
227-
/* ipt_entry tables: one per CPU */
228-
/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
229-
void *entries[1];
227+
228+
unsigned char entries[0] __aligned(8);
230229
};
231230

232-
#define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
233-
+ nr_cpu_ids * sizeof(char *))
234231
int xt_register_target(struct xt_target *target);
235232
void xt_unregister_target(struct xt_target *target);
236233
int xt_register_targets(struct xt_target *target, unsigned int n);
@@ -353,6 +350,55 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
353350
return ret;
354351
}
355352

353+
354+
/* On SMP, ip(6)t_entry->counters.pcnt holds address of the
355+
* real (percpu) counter. On !SMP, its just the packet count,
356+
* so nothing needs to be done there.
357+
*
358+
* xt_percpu_counter_alloc returns the address of the percpu
359+
* counter, or 0 on !SMP.
360+
*
361+
* Hence caller must use IS_ERR_VALUE to check for error, this
362+
* allows us to return 0 for single core systems without forcing
363+
* callers to deal with SMP vs. NONSMP issues.
364+
*/
365+
static inline u64 xt_percpu_counter_alloc(void)
366+
{
367+
if (nr_cpu_ids > 1) {
368+
void __percpu *res = alloc_percpu(struct xt_counters);
369+
370+
if (res == NULL)
371+
return (u64) -ENOMEM;
372+
373+
return (__force u64) res;
374+
}
375+
376+
return 0;
377+
}
378+
static inline void xt_percpu_counter_free(u64 pcnt)
379+
{
380+
if (nr_cpu_ids > 1)
381+
free_percpu((void __percpu *) pcnt);
382+
}
383+
384+
static inline struct xt_counters *
385+
xt_get_this_cpu_counter(struct xt_counters *cnt)
386+
{
387+
if (nr_cpu_ids > 1)
388+
return this_cpu_ptr((void __percpu *) cnt->pcnt);
389+
390+
return cnt;
391+
}
392+
393+
static inline struct xt_counters *
394+
xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
395+
{
396+
if (nr_cpu_ids > 1)
397+
return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu);
398+
399+
return cnt;
400+
}
401+
356402
struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
357403
void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
358404

include/linux/netfilter_bridge.h

-7
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,6 @@ enum nf_br_hook_priorities {
2020
#define BRNF_BRIDGED_DNAT 0x02
2121
#define BRNF_NF_BRIDGE_PREROUTING 0x08
2222

23-
static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
24-
{
25-
if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
26-
return PPPOE_SES_HLEN;
27-
return 0;
28-
}
29-
3023
int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
3124

3225
static inline void br_drop_fake_rtable(struct sk_buff *skb)

include/linux/netfilter_ipv6.h

+3
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ void ipv6_netfilter_fini(void);
2525
struct nf_ipv6_ops {
2626
int (*chk_addr)(struct net *net, const struct in6_addr *addr,
2727
const struct net_device *dev, int strict);
28+
void (*route_input)(struct sk_buff *skb);
29+
int (*fragment)(struct sock *sk, struct sk_buff *skb,
30+
int (*output)(struct sock *, struct sk_buff *));
2831
};
2932

3033
extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops;

include/linux/skbuff.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include <linux/sched.h>
3737
#include <net/flow_dissector.h>
3838
#include <linux/splice.h>
39+
#include <linux/in6.h>
3940

4041
/* A. Checksumming of received packets by device.
4142
*
@@ -173,13 +174,17 @@ struct nf_bridge_info {
173174
BRNF_PROTO_PPPOE
174175
} orig_proto:8;
175176
bool pkt_otherhost;
177+
__u16 frag_max_size;
176178
unsigned int mask;
177179
struct net_device *physindev;
178180
union {
179181
struct net_device *physoutdev;
180182
char neigh_header[8];
181183
};
182-
__be32 ipv4_daddr;
184+
union {
185+
__be32 ipv4_daddr;
186+
struct in6_addr ipv6_daddr;
187+
};
183188
};
184189
#endif
185190

include/net/netfilter/nf_tables.h

+9-2
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,7 @@ struct nft_stats {
781781
};
782782

783783
#define NFT_HOOK_OPS_MAX 2
784+
#define NFT_BASECHAIN_DISABLED (1 << 0)
784785

785786
/**
786787
* struct nft_base_chain - nf_tables base chain
@@ -791,21 +792,29 @@ struct nft_stats {
791792
* @policy: default policy
792793
* @stats: per-cpu chain stats
793794
* @chain: the chain
795+
* @dev_name: device name that this base chain is attached to (if any)
794796
*/
795797
struct nft_base_chain {
796798
struct nf_hook_ops ops[NFT_HOOK_OPS_MAX];
797799
possible_net_t pnet;
798800
const struct nf_chain_type *type;
799801
u8 policy;
802+
u8 flags;
800803
struct nft_stats __percpu *stats;
801804
struct nft_chain chain;
805+
char dev_name[IFNAMSIZ];
802806
};
803807

804808
static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain)
805809
{
806810
return container_of(chain, struct nft_base_chain, chain);
807811
}
808812

813+
int nft_register_basechain(struct nft_base_chain *basechain,
814+
unsigned int hook_nops);
815+
void nft_unregister_basechain(struct nft_base_chain *basechain,
816+
unsigned int hook_nops);
817+
809818
unsigned int nft_do_chain(struct nft_pktinfo *pkt,
810819
const struct nf_hook_ops *ops);
811820

@@ -819,7 +828,6 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt,
819828
* @use: number of chain references to this table
820829
* @flags: table flag (see enum nft_table_flags)
821830
* @name: name of the table
822-
* @dev: this table is bound to this device (if any)
823831
*/
824832
struct nft_table {
825833
struct list_head list;
@@ -829,7 +837,6 @@ struct nft_table {
829837
u32 use;
830838
u16 flags;
831839
char name[NFT_TABLE_MAXNAMELEN];
832-
struct net_device *dev;
833840
};
834841

835842
enum nft_af_flags {

include/uapi/linux/netfilter/ipset/ip_set.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
/* The protocol version */
1616
#define IPSET_PROTOCOL 6
1717

18-
/* The maximum permissible comment length we will accept over netlink */
19-
#define IPSET_MAX_COMMENT_SIZE 255
20-
2118
/* The max length of strings including NUL: set and type identifiers */
2219
#define IPSET_MAXNAMELEN 32
2320

21+
/* The maximum permissible comment length we will accept over netlink */
22+
#define IPSET_MAX_COMMENT_SIZE 255
23+
2424
/* Message types and commands */
2525
enum ipset_cmd {
2626
IPSET_CMD_NONE,

0 commit comments

Comments
 (0)