Skip to content

Commit f06cc66

Browse files
author
Peter Zijlstra
committed
perf: Optimize perf_cgroup_switch()
Namhyung reported that bd27568 ("perf: Rewrite core context handling") regresses context switch overhead when perf-cgroup is in use together with 'slow' PMUs like uncore. Specifically, perf_cgroup_switch()'s perf_ctx_disable() / ctx_sched_out() etc.. all iterate the full list of active PMUs for that CPU, even if they don't have cgroup events. Previously there was cgrp_cpuctx_list which linked the relevant PMUs together, but that got lost in the rework. Instead of re-instruducing a similar list, let the perf_event_pmu_context iteration skip those that do not have cgroup events. This avoids growing multiple versions of the perf_event_pmu_context iteration. Measured performance (on a slightly different patch): Before) $ taskset -c 0 ./perf bench sched pipe -l 10000 -G AAA,BBB # Running 'sched/pipe' benchmark: # Executed 10000 pipe operations between two processes Total time: 0.901 [sec] 90.128700 usecs/op 11095 ops/sec After) $ taskset -c 0 ./perf bench sched pipe -l 10000 -G AAA,BBB # Running 'sched/pipe' benchmark: # Executed 10000 pipe operations between two processes Total time: 0.065 [sec] 6.560100 usecs/op 152436 ops/sec Fixes: bd27568 ("perf: Rewrite core context handling") Reported-by: Namhyung Kim <[email protected]> Debugged-by: Namhyung Kim <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 8f4156d commit f06cc66

File tree

2 files changed

+61
-55
lines changed

2 files changed

+61
-55
lines changed

include/linux/perf_event.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,7 @@ struct perf_event_pmu_context {
878878
unsigned int embedded : 1;
879879

880880
unsigned int nr_events;
881+
unsigned int nr_cgroups;
881882

882883
atomic_t refcount; /* event <-> epc */
883884
struct rcu_head rcu_head;

kernel/events/core.c

Lines changed: 60 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,7 @@ enum event_type_t {
375375
EVENT_TIME = 0x4,
376376
/* see ctx_resched() for details */
377377
EVENT_CPU = 0x8,
378+
EVENT_CGROUP = 0x10,
378379
EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
379380
};
380381

@@ -684,20 +685,26 @@ do { \
684685
___p; \
685686
})
686687

687-
static void perf_ctx_disable(struct perf_event_context *ctx)
688+
static void perf_ctx_disable(struct perf_event_context *ctx, bool cgroup)
688689
{
689690
struct perf_event_pmu_context *pmu_ctx;
690691

691-
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry)
692+
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
693+
if (cgroup && !pmu_ctx->nr_cgroups)
694+
continue;
692695
perf_pmu_disable(pmu_ctx->pmu);
696+
}
693697
}
694698

695-
static void perf_ctx_enable(struct perf_event_context *ctx)
699+
static void perf_ctx_enable(struct perf_event_context *ctx, bool cgroup)
696700
{
697701
struct perf_event_pmu_context *pmu_ctx;
698702

699-
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry)
703+
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
704+
if (cgroup && !pmu_ctx->nr_cgroups)
705+
continue;
700706
perf_pmu_enable(pmu_ctx->pmu);
707+
}
701708
}
702709

703710
static void ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type);
@@ -856,9 +863,9 @@ static void perf_cgroup_switch(struct task_struct *task)
856863
return;
857864

858865
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
859-
perf_ctx_disable(&cpuctx->ctx);
866+
perf_ctx_disable(&cpuctx->ctx, true);
860867

861-
ctx_sched_out(&cpuctx->ctx, EVENT_ALL);
868+
ctx_sched_out(&cpuctx->ctx, EVENT_ALL|EVENT_CGROUP);
862869
/*
863870
* must not be done before ctxswout due
864871
* to update_cgrp_time_from_cpuctx() in
@@ -870,9 +877,9 @@ static void perf_cgroup_switch(struct task_struct *task)
870877
* perf_cgroup_set_timestamp() in ctx_sched_in()
871878
* to not have to pass task around
872879
*/
873-
ctx_sched_in(&cpuctx->ctx, EVENT_ALL);
880+
ctx_sched_in(&cpuctx->ctx, EVENT_ALL|EVENT_CGROUP);
874881

875-
perf_ctx_enable(&cpuctx->ctx);
882+
perf_ctx_enable(&cpuctx->ctx, true);
876883
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
877884
}
878885

@@ -965,6 +972,8 @@ perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ct
965972
if (!is_cgroup_event(event))
966973
return;
967974

975+
event->pmu_ctx->nr_cgroups++;
976+
968977
/*
969978
* Because cgroup events are always per-cpu events,
970979
* @ctx == &cpuctx->ctx.
@@ -985,6 +994,8 @@ perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *c
985994
if (!is_cgroup_event(event))
986995
return;
987996

997+
event->pmu_ctx->nr_cgroups--;
998+
988999
/*
9891000
* Because cgroup events are always per-cpu events,
9901001
* @ctx == &cpuctx->ctx.
@@ -2677,9 +2688,9 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
26772688

26782689
event_type &= EVENT_ALL;
26792690

2680-
perf_ctx_disable(&cpuctx->ctx);
2691+
perf_ctx_disable(&cpuctx->ctx, false);
26812692
if (task_ctx) {
2682-
perf_ctx_disable(task_ctx);
2693+
perf_ctx_disable(task_ctx, false);
26832694
task_ctx_sched_out(task_ctx, event_type);
26842695
}
26852696

@@ -2697,9 +2708,9 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
26972708

26982709
perf_event_sched_in(cpuctx, task_ctx);
26992710

2700-
perf_ctx_enable(&cpuctx->ctx);
2711+
perf_ctx_enable(&cpuctx->ctx, false);
27012712
if (task_ctx)
2702-
perf_ctx_enable(task_ctx);
2713+
perf_ctx_enable(task_ctx, false);
27032714
}
27042715

27052716
void perf_pmu_resched(struct pmu *pmu)
@@ -3244,6 +3255,9 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type)
32443255
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
32453256
struct perf_event_pmu_context *pmu_ctx;
32463257
int is_active = ctx->is_active;
3258+
bool cgroup = event_type & EVENT_CGROUP;
3259+
3260+
event_type &= ~EVENT_CGROUP;
32473261

32483262
lockdep_assert_held(&ctx->lock);
32493263

@@ -3290,8 +3304,11 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type)
32903304

32913305
is_active ^= ctx->is_active; /* changed bits */
32923306

3293-
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry)
3307+
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
3308+
if (cgroup && !pmu_ctx->nr_cgroups)
3309+
continue;
32943310
__pmu_ctx_sched_out(pmu_ctx, is_active);
3311+
}
32953312
}
32963313

32973314
/*
@@ -3482,7 +3499,7 @@ perf_event_context_sched_out(struct task_struct *task, struct task_struct *next)
34823499
raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
34833500
if (context_equiv(ctx, next_ctx)) {
34843501

3485-
perf_ctx_disable(ctx);
3502+
perf_ctx_disable(ctx, false);
34863503

34873504
/* PMIs are disabled; ctx->nr_pending is stable. */
34883505
if (local_read(&ctx->nr_pending) ||
@@ -3502,7 +3519,7 @@ perf_event_context_sched_out(struct task_struct *task, struct task_struct *next)
35023519
perf_ctx_sched_task_cb(ctx, false);
35033520
perf_event_swap_task_ctx_data(ctx, next_ctx);
35043521

3505-
perf_ctx_enable(ctx);
3522+
perf_ctx_enable(ctx, false);
35063523

35073524
/*
35083525
* RCU_INIT_POINTER here is safe because we've not
@@ -3526,13 +3543,13 @@ perf_event_context_sched_out(struct task_struct *task, struct task_struct *next)
35263543

35273544
if (do_switch) {
35283545
raw_spin_lock(&ctx->lock);
3529-
perf_ctx_disable(ctx);
3546+
perf_ctx_disable(ctx, false);
35303547

35313548
inside_switch:
35323549
perf_ctx_sched_task_cb(ctx, false);
35333550
task_ctx_sched_out(ctx, EVENT_ALL);
35343551

3535-
perf_ctx_enable(ctx);
3552+
perf_ctx_enable(ctx, false);
35363553
raw_spin_unlock(&ctx->lock);
35373554
}
35383555
}
@@ -3818,54 +3835,42 @@ static int merge_sched_in(struct perf_event *event, void *data)
38183835
return 0;
38193836
}
38203837

3821-
static void ctx_pinned_sched_in(struct perf_event_context *ctx, struct pmu *pmu)
3838+
static void pmu_groups_sched_in(struct perf_event_context *ctx,
3839+
struct perf_event_groups *groups,
3840+
struct pmu *pmu)
38223841
{
3823-
struct perf_event_pmu_context *pmu_ctx;
38243842
int can_add_hw = 1;
3825-
3826-
if (pmu) {
3827-
visit_groups_merge(ctx, &ctx->pinned_groups,
3828-
smp_processor_id(), pmu,
3829-
merge_sched_in, &can_add_hw);
3830-
} else {
3831-
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
3832-
can_add_hw = 1;
3833-
visit_groups_merge(ctx, &ctx->pinned_groups,
3834-
smp_processor_id(), pmu_ctx->pmu,
3835-
merge_sched_in, &can_add_hw);
3836-
}
3837-
}
3843+
visit_groups_merge(ctx, groups, smp_processor_id(), pmu,
3844+
merge_sched_in, &can_add_hw);
38383845
}
38393846

3840-
static void ctx_flexible_sched_in(struct perf_event_context *ctx, struct pmu *pmu)
3847+
static void ctx_groups_sched_in(struct perf_event_context *ctx,
3848+
struct perf_event_groups *groups,
3849+
bool cgroup)
38413850
{
38423851
struct perf_event_pmu_context *pmu_ctx;
3843-
int can_add_hw = 1;
38443852

3845-
if (pmu) {
3846-
visit_groups_merge(ctx, &ctx->flexible_groups,
3847-
smp_processor_id(), pmu,
3848-
merge_sched_in, &can_add_hw);
3849-
} else {
3850-
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
3851-
can_add_hw = 1;
3852-
visit_groups_merge(ctx, &ctx->flexible_groups,
3853-
smp_processor_id(), pmu_ctx->pmu,
3854-
merge_sched_in, &can_add_hw);
3855-
}
3853+
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
3854+
if (cgroup && !pmu_ctx->nr_cgroups)
3855+
continue;
3856+
pmu_groups_sched_in(ctx, groups, pmu_ctx->pmu);
38563857
}
38573858
}
38583859

3859-
static void __pmu_ctx_sched_in(struct perf_event_context *ctx, struct pmu *pmu)
3860+
static void __pmu_ctx_sched_in(struct perf_event_context *ctx,
3861+
struct pmu *pmu)
38603862
{
3861-
ctx_flexible_sched_in(ctx, pmu);
3863+
pmu_groups_sched_in(ctx, &ctx->flexible_groups, pmu);
38623864
}
38633865

38643866
static void
38653867
ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
38663868
{
38673869
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
38683870
int is_active = ctx->is_active;
3871+
bool cgroup = event_type & EVENT_CGROUP;
3872+
3873+
event_type &= ~EVENT_CGROUP;
38693874

38703875
lockdep_assert_held(&ctx->lock);
38713876

@@ -3898,11 +3903,11 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
38983903
* in order to give them the best chance of going on.
38993904
*/
39003905
if (is_active & EVENT_PINNED)
3901-
ctx_pinned_sched_in(ctx, NULL);
3906+
ctx_groups_sched_in(ctx, &ctx->pinned_groups, cgroup);
39023907

39033908
/* Then walk through the lower prio flexible groups */
39043909
if (is_active & EVENT_FLEXIBLE)
3905-
ctx_flexible_sched_in(ctx, NULL);
3910+
ctx_groups_sched_in(ctx, &ctx->flexible_groups, cgroup);
39063911
}
39073912

39083913
static void perf_event_context_sched_in(struct task_struct *task)
@@ -3917,11 +3922,11 @@ static void perf_event_context_sched_in(struct task_struct *task)
39173922

39183923
if (cpuctx->task_ctx == ctx) {
39193924
perf_ctx_lock(cpuctx, ctx);
3920-
perf_ctx_disable(ctx);
3925+
perf_ctx_disable(ctx, false);
39213926

39223927
perf_ctx_sched_task_cb(ctx, true);
39233928

3924-
perf_ctx_enable(ctx);
3929+
perf_ctx_enable(ctx, false);
39253930
perf_ctx_unlock(cpuctx, ctx);
39263931
goto rcu_unlock;
39273932
}
@@ -3934,7 +3939,7 @@ static void perf_event_context_sched_in(struct task_struct *task)
39343939
if (!ctx->nr_events)
39353940
goto unlock;
39363941

3937-
perf_ctx_disable(ctx);
3942+
perf_ctx_disable(ctx, false);
39383943
/*
39393944
* We want to keep the following priority order:
39403945
* cpu pinned (that don't need to move), task pinned,
@@ -3944,7 +3949,7 @@ static void perf_event_context_sched_in(struct task_struct *task)
39443949
* events, no need to flip the cpuctx's events around.
39453950
*/
39463951
if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) {
3947-
perf_ctx_disable(&cpuctx->ctx);
3952+
perf_ctx_disable(&cpuctx->ctx, false);
39483953
ctx_sched_out(&cpuctx->ctx, EVENT_FLEXIBLE);
39493954
}
39503955

@@ -3953,9 +3958,9 @@ static void perf_event_context_sched_in(struct task_struct *task)
39533958
perf_ctx_sched_task_cb(cpuctx->task_ctx, true);
39543959

39553960
if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
3956-
perf_ctx_enable(&cpuctx->ctx);
3961+
perf_ctx_enable(&cpuctx->ctx, false);
39573962

3958-
perf_ctx_enable(ctx);
3963+
perf_ctx_enable(ctx, false);
39593964

39603965
unlock:
39613966
perf_ctx_unlock(cpuctx, ctx);

0 commit comments

Comments
 (0)