Skip to content

Commit d52d399

Browse files
iamkafaidavem330
authored andcommitted
ipv6: Create percpu rt6_info
After the patch 'ipv6: Only create RTF_CACHE routes after encountering pmtu exception', we need to compensate the performance hit (bouncing dst->__refcnt). Signed-off-by: Martin KaFai Lau <[email protected]> Cc: Hannes Frederic Sowa <[email protected]> Cc: Steffen Klassert <[email protected]> Cc: Julian Anastasov <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 83a09ab commit d52d399

File tree

4 files changed

+142
-18
lines changed

4 files changed

+142
-18
lines changed

include/net/ip6_fib.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ struct rt6_info {
124124
struct uncached_list *rt6i_uncached_list;
125125

126126
struct inet6_dev *rt6i_idev;
127+
struct rt6_info * __percpu *rt6i_pcpu;
127128

128129
u32 rt6i_metric;
129130
u32 rt6i_pmtu;
@@ -164,7 +165,7 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
164165

165166
static inline u32 rt6_get_cookie(const struct rt6_info *rt)
166167
{
167-
if (unlikely(rt->dst.flags & DST_NOCACHE))
168+
if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE))
168169
rt = (struct rt6_info *)(rt->dst.from);
169170

170171
return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;

include/uapi/linux/ipv6_route.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#define RTF_PREF(pref) ((pref) << 27)
3535
#define RTF_PREF_MASK 0x18000000
3636

37+
#define RTF_PCPU 0x40000000
3738
#define RTF_LOCAL 0x80000000
3839

3940

net/ipv6/ip6_fib.c

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,10 +154,32 @@ static void node_free(struct fib6_node *fn)
154154
kmem_cache_free(fib6_node_kmem, fn);
155155
}
156156

157+
static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
158+
{
159+
int cpu;
160+
161+
if (!non_pcpu_rt->rt6i_pcpu)
162+
return;
163+
164+
for_each_possible_cpu(cpu) {
165+
struct rt6_info **ppcpu_rt;
166+
struct rt6_info *pcpu_rt;
167+
168+
ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu);
169+
pcpu_rt = *ppcpu_rt;
170+
if (pcpu_rt) {
171+
dst_free(&pcpu_rt->dst);
172+
*ppcpu_rt = NULL;
173+
}
174+
}
175+
}
176+
157177
static void rt6_release(struct rt6_info *rt)
158178
{
159-
if (atomic_dec_and_test(&rt->rt6i_ref))
179+
if (atomic_dec_and_test(&rt->rt6i_ref)) {
180+
rt6_free_pcpu(rt);
160181
dst_free(&rt->dst);
182+
}
161183
}
162184

163185
static void fib6_link_table(struct net *net, struct fib6_table *tb)

net/ipv6/route.c

Lines changed: 116 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,18 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
165165
}
166166
}
167167

168+
static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
169+
{
170+
return dst_metrics_write_ptr(rt->dst.from);
171+
}
172+
168173
static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
169174
{
170175
struct rt6_info *rt = (struct rt6_info *)dst;
171176

172-
if (rt->rt6i_flags & RTF_CACHE)
177+
if (rt->rt6i_flags & RTF_PCPU)
178+
return rt6_pcpu_cow_metrics(rt);
179+
else if (rt->rt6i_flags & RTF_CACHE)
173180
return NULL;
174181
else
175182
return dst_cow_metrics_generic(dst, old);
@@ -309,10 +316,10 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
309316
#endif
310317

311318
/* allocate dst with ip6_dst_ops */
312-
static inline struct rt6_info *ip6_dst_alloc(struct net *net,
313-
struct net_device *dev,
314-
int flags,
315-
struct fib6_table *table)
319+
static struct rt6_info *__ip6_dst_alloc(struct net *net,
320+
struct net_device *dev,
321+
int flags,
322+
struct fib6_table *table)
316323
{
317324
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
318325
0, DST_OBSOLETE_FORCE_CHK, flags);
@@ -327,6 +334,34 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
327334
return rt;
328335
}
329336

337+
static struct rt6_info *ip6_dst_alloc(struct net *net,
338+
struct net_device *dev,
339+
int flags,
340+
struct fib6_table *table)
341+
{
342+
struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table);
343+
344+
if (rt) {
345+
rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
346+
if (rt->rt6i_pcpu) {
347+
int cpu;
348+
349+
for_each_possible_cpu(cpu) {
350+
struct rt6_info **p;
351+
352+
p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
353+
/* no one shares rt */
354+
*p = NULL;
355+
}
356+
} else {
357+
dst_destroy((struct dst_entry *)rt);
358+
return NULL;
359+
}
360+
}
361+
362+
return rt;
363+
}
364+
330365
static void ip6_dst_destroy(struct dst_entry *dst)
331366
{
332367
struct rt6_info *rt = (struct rt6_info *)dst;
@@ -335,6 +370,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
335370

336371
dst_destroy_metrics_generic(dst);
337372

373+
if (rt->rt6i_pcpu)
374+
free_percpu(rt->rt6i_pcpu);
375+
338376
rt6_uncached_list_del(rt);
339377

340378
idev = rt->rt6i_idev;
@@ -912,11 +950,11 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
912950
* Clone the route.
913951
*/
914952

915-
if (ort->rt6i_flags & RTF_CACHE)
953+
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
916954
ort = (struct rt6_info *)ort->dst.from;
917955

918-
rt = ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev,
919-
0, ort->rt6i_table);
956+
rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev,
957+
0, ort->rt6i_table);
920958

921959
if (!rt)
922960
return NULL;
@@ -943,6 +981,54 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
943981
return rt;
944982
}
945983

984+
static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
985+
{
986+
struct rt6_info *pcpu_rt;
987+
988+
pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
989+
rt->dst.dev, rt->dst.flags,
990+
rt->rt6i_table);
991+
992+
if (!pcpu_rt)
993+
return NULL;
994+
ip6_rt_copy_init(pcpu_rt, rt);
995+
pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
996+
pcpu_rt->rt6i_flags |= RTF_PCPU;
997+
return pcpu_rt;
998+
}
999+
1000+
/* It should be called with read_lock_bh(&tb6_lock) acquired */
1001+
static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1002+
{
1003+
struct rt6_info *pcpu_rt, *prev, **p;
1004+
1005+
p = this_cpu_ptr(rt->rt6i_pcpu);
1006+
pcpu_rt = *p;
1007+
1008+
if (pcpu_rt)
1009+
goto done;
1010+
1011+
pcpu_rt = ip6_rt_pcpu_alloc(rt);
1012+
if (!pcpu_rt) {
1013+
struct net *net = dev_net(rt->dst.dev);
1014+
1015+
pcpu_rt = net->ipv6.ip6_null_entry;
1016+
goto done;
1017+
}
1018+
1019+
prev = cmpxchg(p, NULL, pcpu_rt);
1020+
if (prev) {
1021+
/* If someone did it before us, return prev instead */
1022+
dst_destroy(&pcpu_rt->dst);
1023+
pcpu_rt = prev;
1024+
}
1025+
1026+
done:
1027+
dst_hold(&pcpu_rt->dst);
1028+
rt6_dst_from_metrics_check(pcpu_rt);
1029+
return pcpu_rt;
1030+
}
1031+
9461032
static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
9471033
struct flowi6 *fl6, int flags)
9481034
{
@@ -975,11 +1061,13 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
9751061
}
9761062
}
9771063

978-
dst_use(&rt->dst, jiffies);
979-
read_unlock_bh(&table->tb6_lock);
9801064

9811065
if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
982-
goto done;
1066+
dst_use(&rt->dst, jiffies);
1067+
read_unlock_bh(&table->tb6_lock);
1068+
1069+
rt6_dst_from_metrics_check(rt);
1070+
return rt;
9831071
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
9841072
!(rt->rt6i_flags & RTF_GATEWAY))) {
9851073
/* Create a RTF_CACHE clone which will not be
@@ -990,20 +1078,32 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
9901078

9911079
struct rt6_info *uncached_rt;
9921080

1081+
dst_use(&rt->dst, jiffies);
1082+
read_unlock_bh(&table->tb6_lock);
1083+
9931084
uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
9941085
dst_release(&rt->dst);
9951086

9961087
if (uncached_rt)
9971088
rt6_uncached_list_add(uncached_rt);
9981089
else
9991090
uncached_rt = net->ipv6.ip6_null_entry;
1091+
10001092
dst_hold(&uncached_rt->dst);
10011093
return uncached_rt;
1002-
}
10031094

1004-
done:
1005-
rt6_dst_from_metrics_check(rt);
1006-
return rt;
1095+
} else {
1096+
/* Get a percpu copy */
1097+
1098+
struct rt6_info *pcpu_rt;
1099+
1100+
rt->dst.lastuse = jiffies;
1101+
rt->dst.__use++;
1102+
pcpu_rt = rt6_get_pcpu_route(rt);
1103+
read_unlock_bh(&table->tb6_lock);
1104+
1105+
return pcpu_rt;
1106+
}
10071107
}
10081108

10091109
static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
@@ -1147,7 +1247,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
11471247

11481248
rt6_dst_from_metrics_check(rt);
11491249

1150-
if (unlikely(dst->flags & DST_NOCACHE))
1250+
if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
11511251
return rt6_dst_from_check(rt, cookie);
11521252
else
11531253
return rt6_check(rt, cookie);

0 commit comments

Comments
 (0)