1 #include <linux/bitops.h>
2 #include <linux/types.h>
3 #include <linux/slab.h>
4 
5 #include <asm/perf_event.h>
6 #include <asm/insn.h>
7 
8 #include "perf_event.h"
9 
10 /* The size of a BTS record in bytes: */
11 #define BTS_RECORD_SIZE		24
12 
13 #define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)
14 #define PEBS_BUFFER_SIZE	PAGE_SIZE
15 #define PEBS_FIXUP_SIZE		PAGE_SIZE
16 
17 /*
18  * pebs_record_32 for p4 and core not supported
19 
20 struct pebs_record_32 {
21 	u32 flags, ip;
22 	u32 ax, bc, cx, dx;
23 	u32 si, di, bp, sp;
24 };
25 
26  */
27 
28 union intel_x86_pebs_dse {
29 	u64 val;
30 	struct {
31 		unsigned int ld_dse:4;
32 		unsigned int ld_stlb_miss:1;
33 		unsigned int ld_locked:1;
34 		unsigned int ld_reserved:26;
35 	};
36 	struct {
37 		unsigned int st_l1d_hit:1;
38 		unsigned int st_reserved1:3;
39 		unsigned int st_stlb_miss:1;
40 		unsigned int st_locked:1;
41 		unsigned int st_reserved2:26;
42 	};
43 };
44 
45 
46 /*
47  * Map PEBS Load Latency Data Source encodings to generic
48  * memory data source information
49  */
50 #define P(a, b) PERF_MEM_S(a, b)
51 #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
52 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
53 
54 static const u64 pebs_data_source[] = {
55 	P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
56 	OP_LH | P(LVL, L1)  | P(SNOOP, NONE),	/* 0x01: L1 local */
57 	OP_LH | P(LVL, LFB) | P(SNOOP, NONE),	/* 0x02: LFB hit */
58 	OP_LH | P(LVL, L2)  | P(SNOOP, NONE),	/* 0x03: L2 hit */
59 	OP_LH | P(LVL, L3)  | P(SNOOP, NONE),	/* 0x04: L3 hit */
60 	OP_LH | P(LVL, L3)  | P(SNOOP, MISS),	/* 0x05: L3 hit, snoop miss */
61 	OP_LH | P(LVL, L3)  | P(SNOOP, HIT),	/* 0x06: L3 hit, snoop hit */
62 	OP_LH | P(LVL, L3)  | P(SNOOP, HITM),	/* 0x07: L3 hit, snoop hitm */
63 	OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
64 	OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
65 	OP_LH | P(LVL, LOC_RAM)  | P(SNOOP, HIT),  /* 0x0a: L3 miss, shared */
66 	OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
67 	OP_LH | P(LVL, LOC_RAM)  | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
68 	OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
69 	OP_LH | P(LVL, IO)  | P(SNOOP, NONE), /* 0x0e: I/O */
70 	OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
71 };
72 
precise_store_data(u64 status)73 static u64 precise_store_data(u64 status)
74 {
75 	union intel_x86_pebs_dse dse;
76 	u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
77 
78 	dse.val = status;
79 
80 	/*
81 	 * bit 4: TLB access
82 	 * 1 = stored missed 2nd level TLB
83 	 *
84 	 * so it either hit the walker or the OS
85 	 * otherwise hit 2nd level TLB
86 	 */
87 	if (dse.st_stlb_miss)
88 		val |= P(TLB, MISS);
89 	else
90 		val |= P(TLB, HIT);
91 
92 	/*
93 	 * bit 0: hit L1 data cache
94 	 * if not set, then all we know is that
95 	 * it missed L1D
96 	 */
97 	if (dse.st_l1d_hit)
98 		val |= P(LVL, HIT);
99 	else
100 		val |= P(LVL, MISS);
101 
102 	/*
103 	 * bit 5: Locked prefix
104 	 */
105 	if (dse.st_locked)
106 		val |= P(LOCK, LOCKED);
107 
108 	return val;
109 }
110 
precise_datala_hsw(struct perf_event * event,u64 status)111 static u64 precise_datala_hsw(struct perf_event *event, u64 status)
112 {
113 	union perf_mem_data_src dse;
114 
115 	dse.val = PERF_MEM_NA;
116 
117 	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
118 		dse.mem_op = PERF_MEM_OP_STORE;
119 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
120 		dse.mem_op = PERF_MEM_OP_LOAD;
121 
122 	/*
123 	 * L1 info only valid for following events:
124 	 *
125 	 * MEM_UOPS_RETIRED.STLB_MISS_STORES
126 	 * MEM_UOPS_RETIRED.LOCK_STORES
127 	 * MEM_UOPS_RETIRED.SPLIT_STORES
128 	 * MEM_UOPS_RETIRED.ALL_STORES
129 	 */
130 	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
131 		if (status & 1)
132 			dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
133 		else
134 			dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
135 	}
136 	return dse.val;
137 }
138 
load_latency_data(u64 status)139 static u64 load_latency_data(u64 status)
140 {
141 	union intel_x86_pebs_dse dse;
142 	u64 val;
143 	int model = boot_cpu_data.x86_model;
144 	int fam = boot_cpu_data.x86;
145 
146 	dse.val = status;
147 
148 	/*
149 	 * use the mapping table for bit 0-3
150 	 */
151 	val = pebs_data_source[dse.ld_dse];
152 
153 	/*
154 	 * Nehalem models do not support TLB, Lock infos
155 	 */
156 	if (fam == 0x6 && (model == 26 || model == 30
157 	    || model == 31 || model == 46)) {
158 		val |= P(TLB, NA) | P(LOCK, NA);
159 		return val;
160 	}
161 	/*
162 	 * bit 4: TLB access
163 	 * 0 = did not miss 2nd level TLB
164 	 * 1 = missed 2nd level TLB
165 	 */
166 	if (dse.ld_stlb_miss)
167 		val |= P(TLB, MISS) | P(TLB, L2);
168 	else
169 		val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
170 
171 	/*
172 	 * bit 5: locked prefix
173 	 */
174 	if (dse.ld_locked)
175 		val |= P(LOCK, LOCKED);
176 
177 	return val;
178 }
179 
180 struct pebs_record_core {
181 	u64 flags, ip;
182 	u64 ax, bx, cx, dx;
183 	u64 si, di, bp, sp;
184 	u64 r8,  r9,  r10, r11;
185 	u64 r12, r13, r14, r15;
186 };
187 
188 struct pebs_record_nhm {
189 	u64 flags, ip;
190 	u64 ax, bx, cx, dx;
191 	u64 si, di, bp, sp;
192 	u64 r8,  r9,  r10, r11;
193 	u64 r12, r13, r14, r15;
194 	u64 status, dla, dse, lat;
195 };
196 
197 /*
198  * Same as pebs_record_nhm, with two additional fields.
199  */
200 struct pebs_record_hsw {
201 	u64 flags, ip;
202 	u64 ax, bx, cx, dx;
203 	u64 si, di, bp, sp;
204 	u64 r8,  r9,  r10, r11;
205 	u64 r12, r13, r14, r15;
206 	u64 status, dla, dse, lat;
207 	u64 real_ip, tsx_tuning;
208 };
209 
210 union hsw_tsx_tuning {
211 	struct {
212 		u32 cycles_last_block     : 32,
213 		    hle_abort		  : 1,
214 		    rtm_abort		  : 1,
215 		    instruction_abort     : 1,
216 		    non_instruction_abort : 1,
217 		    retry		  : 1,
218 		    data_conflict	  : 1,
219 		    capacity_writes	  : 1,
220 		    capacity_reads	  : 1;
221 	};
222 	u64	    value;
223 };
224 
225 #define PEBS_HSW_TSX_FLAGS	0xff00000000ULL
226 
init_debug_store_on_cpu(int cpu)227 void init_debug_store_on_cpu(int cpu)
228 {
229 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
230 
231 	if (!ds)
232 		return;
233 
234 	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
235 		     (u32)((u64)(unsigned long)ds),
236 		     (u32)((u64)(unsigned long)ds >> 32));
237 }
238 
fini_debug_store_on_cpu(int cpu)239 void fini_debug_store_on_cpu(int cpu)
240 {
241 	if (!per_cpu(cpu_hw_events, cpu).ds)
242 		return;
243 
244 	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
245 }
246 
247 static DEFINE_PER_CPU(void *, insn_buffer);
248 
alloc_pebs_buffer(int cpu)249 static int alloc_pebs_buffer(int cpu)
250 {
251 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
252 	int node = cpu_to_node(cpu);
253 	int max, thresh = 1; /* always use a single PEBS record */
254 	void *buffer, *ibuffer;
255 
256 	if (!x86_pmu.pebs)
257 		return 0;
258 
259 	buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
260 	if (unlikely(!buffer))
261 		return -ENOMEM;
262 
263 	/*
264 	 * HSW+ already provides us the eventing ip; no need to allocate this
265 	 * buffer then.
266 	 */
267 	if (x86_pmu.intel_cap.pebs_format < 2) {
268 		ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
269 		if (!ibuffer) {
270 			kfree(buffer);
271 			return -ENOMEM;
272 		}
273 		per_cpu(insn_buffer, cpu) = ibuffer;
274 	}
275 
276 	max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
277 
278 	ds->pebs_buffer_base = (u64)(unsigned long)buffer;
279 	ds->pebs_index = ds->pebs_buffer_base;
280 	ds->pebs_absolute_maximum = ds->pebs_buffer_base +
281 		max * x86_pmu.pebs_record_size;
282 
283 	ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
284 		thresh * x86_pmu.pebs_record_size;
285 
286 	return 0;
287 }
288 
release_pebs_buffer(int cpu)289 static void release_pebs_buffer(int cpu)
290 {
291 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
292 
293 	if (!ds || !x86_pmu.pebs)
294 		return;
295 
296 	kfree(per_cpu(insn_buffer, cpu));
297 	per_cpu(insn_buffer, cpu) = NULL;
298 
299 	kfree((void *)(unsigned long)ds->pebs_buffer_base);
300 	ds->pebs_buffer_base = 0;
301 }
302 
alloc_bts_buffer(int cpu)303 static int alloc_bts_buffer(int cpu)
304 {
305 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
306 	int node = cpu_to_node(cpu);
307 	int max, thresh;
308 	void *buffer;
309 
310 	if (!x86_pmu.bts)
311 		return 0;
312 
313 	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
314 	if (unlikely(!buffer)) {
315 		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
316 		return -ENOMEM;
317 	}
318 
319 	max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
320 	thresh = max / 16;
321 
322 	ds->bts_buffer_base = (u64)(unsigned long)buffer;
323 	ds->bts_index = ds->bts_buffer_base;
324 	ds->bts_absolute_maximum = ds->bts_buffer_base +
325 		max * BTS_RECORD_SIZE;
326 	ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
327 		thresh * BTS_RECORD_SIZE;
328 
329 	return 0;
330 }
331 
release_bts_buffer(int cpu)332 static void release_bts_buffer(int cpu)
333 {
334 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
335 
336 	if (!ds || !x86_pmu.bts)
337 		return;
338 
339 	kfree((void *)(unsigned long)ds->bts_buffer_base);
340 	ds->bts_buffer_base = 0;
341 }
342 
alloc_ds_buffer(int cpu)343 static int alloc_ds_buffer(int cpu)
344 {
345 	int node = cpu_to_node(cpu);
346 	struct debug_store *ds;
347 
348 	ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
349 	if (unlikely(!ds))
350 		return -ENOMEM;
351 
352 	per_cpu(cpu_hw_events, cpu).ds = ds;
353 
354 	return 0;
355 }
356 
release_ds_buffer(int cpu)357 static void release_ds_buffer(int cpu)
358 {
359 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
360 
361 	if (!ds)
362 		return;
363 
364 	per_cpu(cpu_hw_events, cpu).ds = NULL;
365 	kfree(ds);
366 }
367 
release_ds_buffers(void)368 void release_ds_buffers(void)
369 {
370 	int cpu;
371 
372 	if (!x86_pmu.bts && !x86_pmu.pebs)
373 		return;
374 
375 	get_online_cpus();
376 	for_each_online_cpu(cpu)
377 		fini_debug_store_on_cpu(cpu);
378 
379 	for_each_possible_cpu(cpu) {
380 		release_pebs_buffer(cpu);
381 		release_bts_buffer(cpu);
382 		release_ds_buffer(cpu);
383 	}
384 	put_online_cpus();
385 }
386 
reserve_ds_buffers(void)387 void reserve_ds_buffers(void)
388 {
389 	int bts_err = 0, pebs_err = 0;
390 	int cpu;
391 
392 	x86_pmu.bts_active = 0;
393 	x86_pmu.pebs_active = 0;
394 
395 	if (!x86_pmu.bts && !x86_pmu.pebs)
396 		return;
397 
398 	if (!x86_pmu.bts)
399 		bts_err = 1;
400 
401 	if (!x86_pmu.pebs)
402 		pebs_err = 1;
403 
404 	get_online_cpus();
405 
406 	for_each_possible_cpu(cpu) {
407 		if (alloc_ds_buffer(cpu)) {
408 			bts_err = 1;
409 			pebs_err = 1;
410 		}
411 
412 		if (!bts_err && alloc_bts_buffer(cpu))
413 			bts_err = 1;
414 
415 		if (!pebs_err && alloc_pebs_buffer(cpu))
416 			pebs_err = 1;
417 
418 		if (bts_err && pebs_err)
419 			break;
420 	}
421 
422 	if (bts_err) {
423 		for_each_possible_cpu(cpu)
424 			release_bts_buffer(cpu);
425 	}
426 
427 	if (pebs_err) {
428 		for_each_possible_cpu(cpu)
429 			release_pebs_buffer(cpu);
430 	}
431 
432 	if (bts_err && pebs_err) {
433 		for_each_possible_cpu(cpu)
434 			release_ds_buffer(cpu);
435 	} else {
436 		if (x86_pmu.bts && !bts_err)
437 			x86_pmu.bts_active = 1;
438 
439 		if (x86_pmu.pebs && !pebs_err)
440 			x86_pmu.pebs_active = 1;
441 
442 		for_each_online_cpu(cpu)
443 			init_debug_store_on_cpu(cpu);
444 	}
445 
446 	put_online_cpus();
447 }
448 
449 /*
450  * BTS
451  */
452 
453 struct event_constraint bts_constraint =
454 	EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
455 
intel_pmu_enable_bts(u64 config)456 void intel_pmu_enable_bts(u64 config)
457 {
458 	unsigned long debugctlmsr;
459 
460 	debugctlmsr = get_debugctlmsr();
461 
462 	debugctlmsr |= DEBUGCTLMSR_TR;
463 	debugctlmsr |= DEBUGCTLMSR_BTS;
464 	if (config & ARCH_PERFMON_EVENTSEL_INT)
465 		debugctlmsr |= DEBUGCTLMSR_BTINT;
466 
467 	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
468 		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
469 
470 	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
471 		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
472 
473 	update_debugctlmsr(debugctlmsr);
474 }
475 
intel_pmu_disable_bts(void)476 void intel_pmu_disable_bts(void)
477 {
478 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
479 	unsigned long debugctlmsr;
480 
481 	if (!cpuc->ds)
482 		return;
483 
484 	debugctlmsr = get_debugctlmsr();
485 
486 	debugctlmsr &=
487 		~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
488 		  DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
489 
490 	update_debugctlmsr(debugctlmsr);
491 }
492 
intel_pmu_drain_bts_buffer(void)493 int intel_pmu_drain_bts_buffer(void)
494 {
495 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
496 	struct debug_store *ds = cpuc->ds;
497 	struct bts_record {
498 		u64	from;
499 		u64	to;
500 		u64	flags;
501 	};
502 	struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
503 	struct bts_record *at, *top;
504 	struct perf_output_handle handle;
505 	struct perf_event_header header;
506 	struct perf_sample_data data;
507 	struct pt_regs regs;
508 
509 	if (!event)
510 		return 0;
511 
512 	if (!x86_pmu.bts_active)
513 		return 0;
514 
515 	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
516 	top = (struct bts_record *)(unsigned long)ds->bts_index;
517 
518 	if (top <= at)
519 		return 0;
520 
521 	memset(&regs, 0, sizeof(regs));
522 
523 	ds->bts_index = ds->bts_buffer_base;
524 
525 	perf_sample_data_init(&data, 0, event->hw.last_period);
526 
527 	/*
528 	 * Prepare a generic sample, i.e. fill in the invariant fields.
529 	 * We will overwrite the from and to address before we output
530 	 * the sample.
531 	 */
532 	perf_prepare_sample(&header, &data, event, &regs);
533 
534 	if (perf_output_begin(&handle, event, header.size * (top - at)))
535 		return 1;
536 
537 	for (; at < top; at++) {
538 		data.ip		= at->from;
539 		data.addr	= at->to;
540 
541 		perf_output_sample(&handle, &header, &data, event);
542 	}
543 
544 	perf_output_end(&handle);
545 
546 	/* There's new data available. */
547 	event->hw.interrupts++;
548 	event->pending_kill = POLL_IN;
549 	return 1;
550 }
551 
552 /*
553  * PEBS
554  */
555 struct event_constraint intel_core2_pebs_event_constraints[] = {
556 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
557 	INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
558 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
559 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
560 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
561 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
562 	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
563 	EVENT_CONSTRAINT_END
564 };
565 
566 struct event_constraint intel_atom_pebs_event_constraints[] = {
567 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
568 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
569 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
570 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
571 	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
572 	EVENT_CONSTRAINT_END
573 };
574 
575 struct event_constraint intel_slm_pebs_event_constraints[] = {
576 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
577 	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
578 	/* Allow all events as PEBS with no flags */
579 	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
580 	EVENT_CONSTRAINT_END
581 };
582 
583 struct event_constraint intel_nehalem_pebs_event_constraints[] = {
584 	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
585 	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
586 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
587 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
588 	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
589 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
590 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
591 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
592 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
593 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
594 	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
595 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
596 	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
597 	EVENT_CONSTRAINT_END
598 };
599 
600 struct event_constraint intel_westmere_pebs_event_constraints[] = {
601 	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
602 	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
603 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
604 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
605 	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
606 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
607 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
608 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
609 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
610 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
611 	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
612 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
613 	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
614 	EVENT_CONSTRAINT_END
615 };
616 
617 struct event_constraint intel_snb_pebs_event_constraints[] = {
618 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
619 	INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
620 	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
621 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
622 	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
623         INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
624         INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
625         INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
626         INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
627 	/* Allow all events as PEBS with no flags */
628 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
629 	EVENT_CONSTRAINT_END
630 };
631 
632 struct event_constraint intel_ivb_pebs_event_constraints[] = {
633         INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
634         INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
635 	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
636 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
637 	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
638 	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
639 	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
640 	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
641 	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
642 	/* Allow all events as PEBS with no flags */
643 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
644         EVENT_CONSTRAINT_END
645 };
646 
647 struct event_constraint intel_hsw_pebs_event_constraints[] = {
648 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
649 	INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
650 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
651 	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
652 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
653 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
654 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
655 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
656 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
657 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
658 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
659 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
660 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
661 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
662 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
663 	/* Allow all events as PEBS with no flags */
664 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
665 	EVENT_CONSTRAINT_END
666 };
667 
intel_pebs_constraints(struct perf_event * event)668 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
669 {
670 	struct event_constraint *c;
671 
672 	if (!event->attr.precise_ip)
673 		return NULL;
674 
675 	if (x86_pmu.pebs_constraints) {
676 		for_each_event_constraint(c, x86_pmu.pebs_constraints) {
677 			if ((event->hw.config & c->cmask) == c->code) {
678 				event->hw.flags |= c->flags;
679 				return c;
680 			}
681 		}
682 	}
683 
684 	return &emptyconstraint;
685 }
686 
intel_pmu_pebs_enable(struct perf_event * event)687 void intel_pmu_pebs_enable(struct perf_event *event)
688 {
689 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
690 	struct hw_perf_event *hwc = &event->hw;
691 
692 	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
693 
694 	cpuc->pebs_enabled |= 1ULL << hwc->idx;
695 
696 	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
697 		cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
698 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
699 		cpuc->pebs_enabled |= 1ULL << 63;
700 }
701 
intel_pmu_pebs_disable(struct perf_event * event)702 void intel_pmu_pebs_disable(struct perf_event *event)
703 {
704 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
705 	struct hw_perf_event *hwc = &event->hw;
706 
707 	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
708 
709 	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
710 		cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
711 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
712 		cpuc->pebs_enabled &= ~(1ULL << 63);
713 
714 	if (cpuc->enabled)
715 		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
716 
717 	hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
718 }
719 
intel_pmu_pebs_enable_all(void)720 void intel_pmu_pebs_enable_all(void)
721 {
722 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
723 
724 	if (cpuc->pebs_enabled)
725 		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
726 }
727 
intel_pmu_pebs_disable_all(void)728 void intel_pmu_pebs_disable_all(void)
729 {
730 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
731 
732 	if (cpuc->pebs_enabled)
733 		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
734 }
735 
intel_pmu_pebs_fixup_ip(struct pt_regs * regs)736 static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
737 {
738 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
739 	unsigned long from = cpuc->lbr_entries[0].from;
740 	unsigned long old_to, to = cpuc->lbr_entries[0].to;
741 	unsigned long ip = regs->ip;
742 	int is_64bit = 0;
743 	void *kaddr;
744 	int size;
745 
746 	/*
747 	 * We don't need to fixup if the PEBS assist is fault like
748 	 */
749 	if (!x86_pmu.intel_cap.pebs_trap)
750 		return 1;
751 
752 	/*
753 	 * No LBR entry, no basic block, no rewinding
754 	 */
755 	if (!cpuc->lbr_stack.nr || !from || !to)
756 		return 0;
757 
758 	/*
759 	 * Basic blocks should never cross user/kernel boundaries
760 	 */
761 	if (kernel_ip(ip) != kernel_ip(to))
762 		return 0;
763 
764 	/*
765 	 * unsigned math, either ip is before the start (impossible) or
766 	 * the basic block is larger than 1 page (sanity)
767 	 */
768 	if ((ip - to) > PEBS_FIXUP_SIZE)
769 		return 0;
770 
771 	/*
772 	 * We sampled a branch insn, rewind using the LBR stack
773 	 */
774 	if (ip == to) {
775 		set_linear_ip(regs, from);
776 		return 1;
777 	}
778 
779 	size = ip - to;
780 	if (!kernel_ip(ip)) {
781 		int bytes;
782 		u8 *buf = this_cpu_read(insn_buffer);
783 
784 		/* 'size' must fit our buffer, see above */
785 		bytes = copy_from_user_nmi(buf, (void __user *)to, size);
786 		if (bytes != 0)
787 			return 0;
788 
789 		kaddr = buf;
790 	} else {
791 		kaddr = (void *)to;
792 	}
793 
794 	do {
795 		struct insn insn;
796 
797 		old_to = to;
798 
799 #ifdef CONFIG_X86_64
800 		is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
801 #endif
802 		insn_init(&insn, kaddr, size, is_64bit);
803 		insn_get_length(&insn);
804 		/*
805 		 * Make sure there was not a problem decoding the
806 		 * instruction and getting the length.  This is
807 		 * doubly important because we have an infinite
808 		 * loop if insn.length=0.
809 		 */
810 		if (!insn.length)
811 			break;
812 
813 		to += insn.length;
814 		kaddr += insn.length;
815 		size -= insn.length;
816 	} while (to < ip);
817 
818 	if (to == ip) {
819 		set_linear_ip(regs, old_to);
820 		return 1;
821 	}
822 
823 	/*
824 	 * Even though we decoded the basic block, the instruction stream
825 	 * never matched the given IP, either the TO or the IP got corrupted.
826 	 */
827 	return 0;
828 }
829 
intel_hsw_weight(struct pebs_record_hsw * pebs)830 static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
831 {
832 	if (pebs->tsx_tuning) {
833 		union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
834 		return tsx.cycles_last_block;
835 	}
836 	return 0;
837 }
838 
intel_hsw_transaction(struct pebs_record_hsw * pebs)839 static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
840 {
841 	u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
842 
843 	/* For RTM XABORTs also log the abort code from AX */
844 	if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
845 		txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
846 	return txn;
847 }
848 
__intel_pmu_pebs_event(struct perf_event * event,struct pt_regs * iregs,void * __pebs)849 static void __intel_pmu_pebs_event(struct perf_event *event,
850 				   struct pt_regs *iregs, void *__pebs)
851 {
852 #define PERF_X86_EVENT_PEBS_HSW_PREC \
853 		(PERF_X86_EVENT_PEBS_ST_HSW | \
854 		 PERF_X86_EVENT_PEBS_LD_HSW | \
855 		 PERF_X86_EVENT_PEBS_NA_HSW)
856 	/*
857 	 * We cast to the biggest pebs_record but are careful not to
858 	 * unconditionally access the 'extra' entries.
859 	 */
860 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
861 	struct pebs_record_hsw *pebs = __pebs;
862 	struct perf_sample_data data;
863 	struct pt_regs regs;
864 	u64 sample_type;
865 	int fll, fst, dsrc;
866 	int fl = event->hw.flags;
867 
868 	if (!intel_pmu_save_and_restart(event))
869 		return;
870 
871 	sample_type = event->attr.sample_type;
872 	dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
873 
874 	fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
875 	fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
876 
877 	perf_sample_data_init(&data, 0, event->hw.last_period);
878 
879 	data.period = event->hw.last_period;
880 
881 	/*
882 	 * Use latency for weight (only avail with PEBS-LL)
883 	 */
884 	if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
885 		data.weight = pebs->lat;
886 
887 	/*
888 	 * data.data_src encodes the data source
889 	 */
890 	if (dsrc) {
891 		u64 val = PERF_MEM_NA;
892 		if (fll)
893 			val = load_latency_data(pebs->dse);
894 		else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
895 			val = precise_datala_hsw(event, pebs->dse);
896 		else if (fst)
897 			val = precise_store_data(pebs->dse);
898 		data.data_src.val = val;
899 	}
900 
901 	/*
902 	 * We use the interrupt regs as a base because the PEBS record
903 	 * does not contain a full regs set, specifically it seems to
904 	 * lack segment descriptors, which get used by things like
905 	 * user_mode().
906 	 *
907 	 * In the simple case fix up only the IP and BP,SP regs, for
908 	 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
909 	 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
910 	 */
911 	regs = *iregs;
912 	regs.flags = pebs->flags;
913 	set_linear_ip(&regs, pebs->ip);
914 	regs.bp = pebs->bp;
915 	regs.sp = pebs->sp;
916 
917 	if (sample_type & PERF_SAMPLE_REGS_INTR) {
918 		regs.ax = pebs->ax;
919 		regs.bx = pebs->bx;
920 		regs.cx = pebs->cx;
921 		regs.dx = pebs->dx;
922 		regs.si = pebs->si;
923 		regs.di = pebs->di;
924 		regs.bp = pebs->bp;
925 		regs.sp = pebs->sp;
926 
927 		regs.flags = pebs->flags;
928 #ifndef CONFIG_X86_32
929 		regs.r8 = pebs->r8;
930 		regs.r9 = pebs->r9;
931 		regs.r10 = pebs->r10;
932 		regs.r11 = pebs->r11;
933 		regs.r12 = pebs->r12;
934 		regs.r13 = pebs->r13;
935 		regs.r14 = pebs->r14;
936 		regs.r15 = pebs->r15;
937 #endif
938 	}
939 
940 	if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
941 		regs.ip = pebs->real_ip;
942 		regs.flags |= PERF_EFLAGS_EXACT;
943 	} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
944 		regs.flags |= PERF_EFLAGS_EXACT;
945 	else
946 		regs.flags &= ~PERF_EFLAGS_EXACT;
947 
948 	if ((sample_type & PERF_SAMPLE_ADDR) &&
949 	    x86_pmu.intel_cap.pebs_format >= 1)
950 		data.addr = pebs->dla;
951 
952 	if (x86_pmu.intel_cap.pebs_format >= 2) {
953 		/* Only set the TSX weight when no memory weight. */
954 		if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
955 			data.weight = intel_hsw_weight(pebs);
956 
957 		if (sample_type & PERF_SAMPLE_TRANSACTION)
958 			data.txn = intel_hsw_transaction(pebs);
959 	}
960 
961 	if (has_branch_stack(event))
962 		data.br_stack = &cpuc->lbr_stack;
963 
964 	if (perf_event_overflow(event, &data, &regs))
965 		x86_pmu_stop(event, 0);
966 }
967 
intel_pmu_drain_pebs_core(struct pt_regs * iregs)968 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
969 {
970 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
971 	struct debug_store *ds = cpuc->ds;
972 	struct perf_event *event = cpuc->events[0]; /* PMC0 only */
973 	struct pebs_record_core *at, *top;
974 	int n;
975 
976 	if (!x86_pmu.pebs_active)
977 		return;
978 
979 	at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
980 	top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
981 
982 	/*
983 	 * Whatever else happens, drain the thing
984 	 */
985 	ds->pebs_index = ds->pebs_buffer_base;
986 
987 	if (!test_bit(0, cpuc->active_mask))
988 		return;
989 
990 	WARN_ON_ONCE(!event);
991 
992 	if (!event->attr.precise_ip)
993 		return;
994 
995 	n = top - at;
996 	if (n <= 0)
997 		return;
998 
999 	/*
1000 	 * Should not happen, we program the threshold at 1 and do not
1001 	 * set a reset value.
1002 	 */
1003 	WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
1004 	at += n - 1;
1005 
1006 	__intel_pmu_pebs_event(event, iregs, at);
1007 }
1008 
intel_pmu_drain_pebs_nhm(struct pt_regs * iregs)1009 static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
1010 {
1011 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1012 	struct debug_store *ds = cpuc->ds;
1013 	struct perf_event *event = NULL;
1014 	void *at, *top;
1015 	u64 status = 0;
1016 	int bit;
1017 
1018 	if (!x86_pmu.pebs_active)
1019 		return;
1020 
1021 	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
1022 	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
1023 
1024 	ds->pebs_index = ds->pebs_buffer_base;
1025 
1026 	if (unlikely(at > top))
1027 		return;
1028 
1029 	/*
1030 	 * Should not happen, we program the threshold at 1 and do not
1031 	 * set a reset value.
1032 	 */
1033 	WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
1034 		  "Unexpected number of pebs records %ld\n",
1035 		  (long)(top - at) / x86_pmu.pebs_record_size);
1036 
1037 	for (; at < top; at += x86_pmu.pebs_record_size) {
1038 		struct pebs_record_nhm *p = at;
1039 
1040 		for_each_set_bit(bit, (unsigned long *)&p->status,
1041 				 x86_pmu.max_pebs_events) {
1042 			event = cpuc->events[bit];
1043 			if (!test_bit(bit, cpuc->active_mask))
1044 				continue;
1045 
1046 			WARN_ON_ONCE(!event);
1047 
1048 			if (!event->attr.precise_ip)
1049 				continue;
1050 
1051 			if (__test_and_set_bit(bit, (unsigned long *)&status))
1052 				continue;
1053 
1054 			break;
1055 		}
1056 
1057 		if (!event || bit >= x86_pmu.max_pebs_events)
1058 			continue;
1059 
1060 		__intel_pmu_pebs_event(event, iregs, at);
1061 	}
1062 }
1063 
1064 /*
1065  * BTS, PEBS probe and setup
1066  */
1067 
intel_ds_init(void)1068 void __init intel_ds_init(void)
1069 {
1070 	/*
1071 	 * No support for 32bit formats
1072 	 */
1073 	if (!boot_cpu_has(X86_FEATURE_DTES64))
1074 		return;
1075 
1076 	x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
1077 	x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
1078 	if (x86_pmu.pebs) {
1079 		char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
1080 		int format = x86_pmu.intel_cap.pebs_format;
1081 
1082 		switch (format) {
1083 		case 0:
1084 			printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
1085 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
1086 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
1087 			break;
1088 
1089 		case 1:
1090 			printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
1091 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
1092 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
1093 			break;
1094 
1095 		case 2:
1096 			pr_cont("PEBS fmt2%c, ", pebs_type);
1097 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
1098 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
1099 			break;
1100 
1101 		default:
1102 			printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
1103 			x86_pmu.pebs = 0;
1104 		}
1105 	}
1106 }
1107 
perf_restore_debug_store(void)1108 void perf_restore_debug_store(void)
1109 {
1110 	struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
1111 
1112 	if (!x86_pmu.bts && !x86_pmu.pebs)
1113 		return;
1114 
1115 	wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
1116 }
1117