Skip to content

Commit 135c561

Browse files
Andi KleenIngo Molnar
Andi Kleen
authored and
Ingo Molnar
committed
perf/x86/intel: Support Haswell/v4 LBR format
Haswell has two additional LBR from flags for TSX: in_tx and abort_tx, implemented as a new "v4" version of the LBR format. Handle those in and adjust the sign extension code to still correctly extend. The flags are exported similarly in the LBR record to the existing misprediction flag Signed-off-by: Andi Kleen <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 72db559 commit 135c561

File tree

3 files changed

+61
-7
lines changed

3 files changed

+61
-7
lines changed

arch/x86/kernel/cpu/perf_event_intel_lbr.c

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,16 @@ enum {
1212
LBR_FORMAT_LIP = 0x01,
1313
LBR_FORMAT_EIP = 0x02,
1414
LBR_FORMAT_EIP_FLAGS = 0x03,
15+
LBR_FORMAT_EIP_FLAGS2 = 0x04,
16+
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2,
17+
};
18+
19+
static enum {
20+
LBR_EIP_FLAGS = 1,
21+
LBR_TSX = 2,
22+
} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
23+
[LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
24+
[LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
1525
};
1626

1727
/*
@@ -56,6 +66,8 @@ enum {
5666
LBR_FAR)
5767

5868
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
69+
#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
70+
#define LBR_FROM_FLAG_ABORT (1ULL << 61)
5971

6072
#define for_each_branch_sample_type(x) \
6173
for ((x) = PERF_SAMPLE_BRANCH_USER; \
@@ -81,9 +93,13 @@ enum {
8193
X86_BR_JMP = 1 << 9, /* jump */
8294
X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
8395
X86_BR_IND_CALL = 1 << 11,/* indirect calls */
96+
X86_BR_ABORT = 1 << 12,/* transaction abort */
97+
X86_BR_IN_TX = 1 << 13,/* in transaction */
98+
X86_BR_NO_TX = 1 << 14,/* not in transaction */
8499
};
85100

86101
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
102+
#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
87103

88104
#define X86_BR_ANY \
89105
(X86_BR_CALL |\
@@ -95,6 +111,7 @@ enum {
95111
X86_BR_JCC |\
96112
X86_BR_JMP |\
97113
X86_BR_IRQ |\
114+
X86_BR_ABORT |\
98115
X86_BR_IND_CALL)
99116

100117
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -270,21 +287,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
270287

271288
for (i = 0; i < x86_pmu.lbr_nr; i++) {
272289
unsigned long lbr_idx = (tos - i) & mask;
273-
u64 from, to, mis = 0, pred = 0;
290+
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
291+
int skip = 0;
292+
int lbr_flags = lbr_desc[lbr_format];
274293

275294
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
276295
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
277296

278-
if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
297+
if (lbr_flags & LBR_EIP_FLAGS) {
279298
mis = !!(from & LBR_FROM_FLAG_MISPRED);
280299
pred = !mis;
281-
from = (u64)((((s64)from) << 1) >> 1);
300+
skip = 1;
301+
}
302+
if (lbr_flags & LBR_TSX) {
303+
in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
304+
abort = !!(from & LBR_FROM_FLAG_ABORT);
305+
skip = 3;
282306
}
307+
from = (u64)((((s64)from) << skip) >> skip);
283308

284309
cpuc->lbr_entries[i].from = from;
285310
cpuc->lbr_entries[i].to = to;
286311
cpuc->lbr_entries[i].mispred = mis;
287312
cpuc->lbr_entries[i].predicted = pred;
313+
cpuc->lbr_entries[i].in_tx = in_tx;
314+
cpuc->lbr_entries[i].abort = abort;
288315
cpuc->lbr_entries[i].reserved = 0;
289316
}
290317
cpuc->lbr_stack.nr = i;
@@ -334,6 +361,16 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
334361

335362
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
336363
mask |= X86_BR_IND_CALL;
364+
365+
if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
366+
mask |= X86_BR_ABORT;
367+
368+
if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
369+
mask |= X86_BR_IN_TX;
370+
371+
if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
372+
mask |= X86_BR_NO_TX;
373+
337374
/*
338375
* stash actual user request into reg, it may
339376
* be used by fixup code for some CPU
@@ -408,7 +445,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
408445
* decoded (e.g., text page not present), then X86_BR_NONE is
409446
* returned.
410447
*/
411-
static int branch_type(unsigned long from, unsigned long to)
448+
static int branch_type(unsigned long from, unsigned long to, int abort)
412449
{
413450
struct insn insn;
414451
void *addr;
@@ -428,6 +465,9 @@ static int branch_type(unsigned long from, unsigned long to)
428465
if (from == 0 || to == 0)
429466
return X86_BR_NONE;
430467

468+
if (abort)
469+
return X86_BR_ABORT | to_plm;
470+
431471
if (from_plm == X86_BR_USER) {
432472
/*
433473
* can happen if measuring at the user level only
@@ -574,7 +614,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
574614
from = cpuc->lbr_entries[i].from;
575615
to = cpuc->lbr_entries[i].to;
576616

577-
type = branch_type(from, to);
617+
type = branch_type(from, to, cpuc->lbr_entries[i].abort);
618+
if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
619+
if (cpuc->lbr_entries[i].in_tx)
620+
type |= X86_BR_IN_TX;
621+
else
622+
type |= X86_BR_NO_TX;
623+
}
578624

579625
/* if type does not correspond, then discard */
580626
if (type == X86_BR_NONE || (br_sel & type) != type) {

include/linux/perf_event.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,18 @@ struct perf_raw_record {
7373
*
7474
* support for mispred, predicted is optional. In case it
7575
* is not supported mispred = predicted = 0.
76+
*
77+
* in_tx: running in a hardware transaction
78+
* abort: aborting a hardware transaction
7679
*/
7780
struct perf_branch_entry {
7881
__u64 from;
7982
__u64 to;
8083
__u64 mispred:1, /* target mispredicted */
8184
predicted:1,/* target predicted */
82-
reserved:62;
85+
in_tx:1, /* in transaction */
86+
abort:1, /* transaction abort */
87+
reserved:60;
8388
};
8489

8590
/*

include/uapi/linux/perf_event.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,11 @@ enum perf_branch_sample_type {
157157
PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */
158158
PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */
159159
PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */
160+
PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */
161+
PERF_SAMPLE_BRANCH_IN_TX = 1U << 8, /* in transaction */
162+
PERF_SAMPLE_BRANCH_NO_TX = 1U << 9, /* not in transaction */
160163

161-
PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */
164+
PERF_SAMPLE_BRANCH_MAX = 1U << 10, /* non-ABI */
162165
};
163166

164167
#define PERF_SAMPLE_BRANCH_PLM_ALL \

0 commit comments

Comments
 (0)