1/*
2 * Performance event support - Freescale Embedded Performance Monitor
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 * Copyright 2010 Freescale Semiconductor, Inc.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12#include <linux/kernel.h>
13#include <linux/sched.h>
14#include <linux/perf_event.h>
15#include <linux/percpu.h>
16#include <linux/hardirq.h>
17#include <asm/reg_fsl_emb.h>
18#include <asm/pmc.h>
19#include <asm/machdep.h>
20#include <asm/firmware.h>
21#include <asm/ptrace.h>
22
23struct cpu_hw_events {
24	int n_events;
25	int disabled;
26	u8  pmcs_enabled;
27	struct perf_event *event[MAX_HWEVENTS];
28};
29static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
30
31static struct fsl_emb_pmu *ppmu;
32
33/* Number of perf_events counting hardware events */
34static atomic_t num_events;
35/* Used to avoid races in calling reserve/release_pmc_hardware */
36static DEFINE_MUTEX(pmc_reserve_mutex);
37
38/*
39 * If interrupts were soft-disabled when a PMU interrupt occurs, treat
40 * it as an NMI.
41 */
42static inline int perf_intr_is_nmi(struct pt_regs *regs)
43{
44#ifdef __powerpc64__
45	return !regs->softe;
46#else
47	return 0;
48#endif
49}
50
51static void perf_event_interrupt(struct pt_regs *regs);
52
53/*
54 * Read one performance monitor counter (PMC).
55 */
56static unsigned long read_pmc(int idx)
57{
58	unsigned long val;
59
60	switch (idx) {
61	case 0:
62		val = mfpmr(PMRN_PMC0);
63		break;
64	case 1:
65		val = mfpmr(PMRN_PMC1);
66		break;
67	case 2:
68		val = mfpmr(PMRN_PMC2);
69		break;
70	case 3:
71		val = mfpmr(PMRN_PMC3);
72		break;
73	case 4:
74		val = mfpmr(PMRN_PMC4);
75		break;
76	case 5:
77		val = mfpmr(PMRN_PMC5);
78		break;
79	default:
80		printk(KERN_ERR "oops trying to read PMC%d\n", idx);
81		val = 0;
82	}
83	return val;
84}
85
86/*
87 * Write one PMC.
88 */
89static void write_pmc(int idx, unsigned long val)
90{
91	switch (idx) {
92	case 0:
93		mtpmr(PMRN_PMC0, val);
94		break;
95	case 1:
96		mtpmr(PMRN_PMC1, val);
97		break;
98	case 2:
99		mtpmr(PMRN_PMC2, val);
100		break;
101	case 3:
102		mtpmr(PMRN_PMC3, val);
103		break;
104	case 4:
105		mtpmr(PMRN_PMC4, val);
106		break;
107	case 5:
108		mtpmr(PMRN_PMC5, val);
109		break;
110	default:
111		printk(KERN_ERR "oops trying to write PMC%d\n", idx);
112	}
113
114	isync();
115}
116
117/*
118 * Write one local control A register
119 */
120static void write_pmlca(int idx, unsigned long val)
121{
122	switch (idx) {
123	case 0:
124		mtpmr(PMRN_PMLCA0, val);
125		break;
126	case 1:
127		mtpmr(PMRN_PMLCA1, val);
128		break;
129	case 2:
130		mtpmr(PMRN_PMLCA2, val);
131		break;
132	case 3:
133		mtpmr(PMRN_PMLCA3, val);
134		break;
135	case 4:
136		mtpmr(PMRN_PMLCA4, val);
137		break;
138	case 5:
139		mtpmr(PMRN_PMLCA5, val);
140		break;
141	default:
142		printk(KERN_ERR "oops trying to write PMLCA%d\n", idx);
143	}
144
145	isync();
146}
147
148/*
149 * Write one local control B register
150 */
151static void write_pmlcb(int idx, unsigned long val)
152{
153	switch (idx) {
154	case 0:
155		mtpmr(PMRN_PMLCB0, val);
156		break;
157	case 1:
158		mtpmr(PMRN_PMLCB1, val);
159		break;
160	case 2:
161		mtpmr(PMRN_PMLCB2, val);
162		break;
163	case 3:
164		mtpmr(PMRN_PMLCB3, val);
165		break;
166	case 4:
167		mtpmr(PMRN_PMLCB4, val);
168		break;
169	case 5:
170		mtpmr(PMRN_PMLCB5, val);
171		break;
172	default:
173		printk(KERN_ERR "oops trying to write PMLCB%d\n", idx);
174	}
175
176	isync();
177}
178
179static void fsl_emb_pmu_read(struct perf_event *event)
180{
181	s64 val, delta, prev;
182
183	if (event->hw.state & PERF_HES_STOPPED)
184		return;
185
186	/*
187	 * Performance monitor interrupts come even when interrupts
188	 * are soft-disabled, as long as interrupts are hard-enabled.
189	 * Therefore we treat them like NMIs.
190	 */
191	do {
192		prev = local64_read(&event->hw.prev_count);
193		barrier();
194		val = read_pmc(event->hw.idx);
195	} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
196
197	/* The counters are only 32 bits wide */
198	delta = (val - prev) & 0xfffffffful;
199	local64_add(delta, &event->count);
200	local64_sub(delta, &event->hw.period_left);
201}
202
203/*
204 * Disable all events to prevent PMU interrupts and to allow
205 * events to be added or removed.
206 */
207static void fsl_emb_pmu_disable(struct pmu *pmu)
208{
209	struct cpu_hw_events *cpuhw;
210	unsigned long flags;
211
212	local_irq_save(flags);
213	cpuhw = this_cpu_ptr(&cpu_hw_events);
214
215	if (!cpuhw->disabled) {
216		cpuhw->disabled = 1;
217
218		/*
219		 * Check if we ever enabled the PMU on this cpu.
220		 */
221		if (!cpuhw->pmcs_enabled) {
222			ppc_enable_pmcs();
223			cpuhw->pmcs_enabled = 1;
224		}
225
226		if (atomic_read(&num_events)) {
227			/*
228			 * Set the 'freeze all counters' bit, and disable
229			 * interrupts.  The barrier is to make sure the
230			 * mtpmr has been executed and the PMU has frozen
231			 * the events before we return.
232			 */
233
234			mtpmr(PMRN_PMGC0, PMGC0_FAC);
235			isync();
236		}
237	}
238	local_irq_restore(flags);
239}
240
241/*
242 * Re-enable all events if disable == 0.
243 * If we were previously disabled and events were added, then
244 * put the new config on the PMU.
245 */
246static void fsl_emb_pmu_enable(struct pmu *pmu)
247{
248	struct cpu_hw_events *cpuhw;
249	unsigned long flags;
250
251	local_irq_save(flags);
252	cpuhw = this_cpu_ptr(&cpu_hw_events);
253	if (!cpuhw->disabled)
254		goto out;
255
256	cpuhw->disabled = 0;
257	ppc_set_pmu_inuse(cpuhw->n_events != 0);
258
259	if (cpuhw->n_events > 0) {
260		mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
261		isync();
262	}
263
264 out:
265	local_irq_restore(flags);
266}
267
268static int collect_events(struct perf_event *group, int max_count,
269			  struct perf_event *ctrs[])
270{
271	int n = 0;
272	struct perf_event *event;
273
274	if (!is_software_event(group)) {
275		if (n >= max_count)
276			return -1;
277		ctrs[n] = group;
278		n++;
279	}
280	list_for_each_entry(event, &group->sibling_list, group_entry) {
281		if (!is_software_event(event) &&
282		    event->state != PERF_EVENT_STATE_OFF) {
283			if (n >= max_count)
284				return -1;
285			ctrs[n] = event;
286			n++;
287		}
288	}
289	return n;
290}
291
292/* context locked on entry */
293static int fsl_emb_pmu_add(struct perf_event *event, int flags)
294{
295	struct cpu_hw_events *cpuhw;
296	int ret = -EAGAIN;
297	int num_counters = ppmu->n_counter;
298	u64 val;
299	int i;
300
301	perf_pmu_disable(event->pmu);
302	cpuhw = &get_cpu_var(cpu_hw_events);
303
304	if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
305		num_counters = ppmu->n_restricted;
306
307	/*
308	 * Allocate counters from top-down, so that restricted-capable
309	 * counters are kept free as long as possible.
310	 */
311	for (i = num_counters - 1; i >= 0; i--) {
312		if (cpuhw->event[i])
313			continue;
314
315		break;
316	}
317
318	if (i < 0)
319		goto out;
320
321	event->hw.idx = i;
322	cpuhw->event[i] = event;
323	++cpuhw->n_events;
324
325	val = 0;
326	if (event->hw.sample_period) {
327		s64 left = local64_read(&event->hw.period_left);
328		if (left < 0x80000000L)
329			val = 0x80000000L - left;
330	}
331	local64_set(&event->hw.prev_count, val);
332
333	if (unlikely(!(flags & PERF_EF_START))) {
334		event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
335		val = 0;
336	} else {
337		event->hw.state &= ~(PERF_HES_STOPPED | PERF_HES_UPTODATE);
338	}
339
340	write_pmc(i, val);
341	perf_event_update_userpage(event);
342
343	write_pmlcb(i, event->hw.config >> 32);
344	write_pmlca(i, event->hw.config_base);
345
346	ret = 0;
347 out:
348	put_cpu_var(cpu_hw_events);
349	perf_pmu_enable(event->pmu);
350	return ret;
351}
352
353/* context locked on entry */
354static void fsl_emb_pmu_del(struct perf_event *event, int flags)
355{
356	struct cpu_hw_events *cpuhw;
357	int i = event->hw.idx;
358
359	perf_pmu_disable(event->pmu);
360	if (i < 0)
361		goto out;
362
363	fsl_emb_pmu_read(event);
364
365	cpuhw = &get_cpu_var(cpu_hw_events);
366
367	WARN_ON(event != cpuhw->event[event->hw.idx]);
368
369	write_pmlca(i, 0);
370	write_pmlcb(i, 0);
371	write_pmc(i, 0);
372
373	cpuhw->event[i] = NULL;
374	event->hw.idx = -1;
375
376	/*
377	 * TODO: if at least one restricted event exists, and we
378	 * just freed up a non-restricted-capable counter, and
379	 * there is a restricted-capable counter occupied by
380	 * a non-restricted event, migrate that event to the
381	 * vacated counter.
382	 */
383
384	cpuhw->n_events--;
385
386 out:
387	perf_pmu_enable(event->pmu);
388	put_cpu_var(cpu_hw_events);
389}
390
391static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
392{
393	unsigned long flags;
394	unsigned long val;
395	s64 left;
396
397	if (event->hw.idx < 0 || !event->hw.sample_period)
398		return;
399
400	if (!(event->hw.state & PERF_HES_STOPPED))
401		return;
402
403	if (ef_flags & PERF_EF_RELOAD)
404		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
405
406	local_irq_save(flags);
407	perf_pmu_disable(event->pmu);
408
409	event->hw.state = 0;
410	left = local64_read(&event->hw.period_left);
411	val = 0;
412	if (left < 0x80000000L)
413		val = 0x80000000L - left;
414	write_pmc(event->hw.idx, val);
415
416	perf_event_update_userpage(event);
417	perf_pmu_enable(event->pmu);
418	local_irq_restore(flags);
419}
420
421static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
422{
423	unsigned long flags;
424
425	if (event->hw.idx < 0 || !event->hw.sample_period)
426		return;
427
428	if (event->hw.state & PERF_HES_STOPPED)
429		return;
430
431	local_irq_save(flags);
432	perf_pmu_disable(event->pmu);
433
434	fsl_emb_pmu_read(event);
435	event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
436	write_pmc(event->hw.idx, 0);
437
438	perf_event_update_userpage(event);
439	perf_pmu_enable(event->pmu);
440	local_irq_restore(flags);
441}
442
443/*
444 * Release the PMU if this is the last perf_event.
445 */
446static void hw_perf_event_destroy(struct perf_event *event)
447{
448	if (!atomic_add_unless(&num_events, -1, 1)) {
449		mutex_lock(&pmc_reserve_mutex);
450		if (atomic_dec_return(&num_events) == 0)
451			release_pmc_hardware();
452		mutex_unlock(&pmc_reserve_mutex);
453	}
454}
455
456/*
457 * Translate a generic cache event_id config to a raw event_id code.
458 */
459static int hw_perf_cache_event(u64 config, u64 *eventp)
460{
461	unsigned long type, op, result;
462	int ev;
463
464	if (!ppmu->cache_events)
465		return -EINVAL;
466
467	/* unpack config */
468	type = config & 0xff;
469	op = (config >> 8) & 0xff;
470	result = (config >> 16) & 0xff;
471
472	if (type >= PERF_COUNT_HW_CACHE_MAX ||
473	    op >= PERF_COUNT_HW_CACHE_OP_MAX ||
474	    result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
475		return -EINVAL;
476
477	ev = (*ppmu->cache_events)[type][op][result];
478	if (ev == 0)
479		return -EOPNOTSUPP;
480	if (ev == -1)
481		return -EINVAL;
482	*eventp = ev;
483	return 0;
484}
485
486static int fsl_emb_pmu_event_init(struct perf_event *event)
487{
488	u64 ev;
489	struct perf_event *events[MAX_HWEVENTS];
490	int n;
491	int err;
492	int num_restricted;
493	int i;
494
495	if (ppmu->n_counter > MAX_HWEVENTS) {
496		WARN(1, "No. of perf counters (%d) is higher than max array size(%d)\n",
497			ppmu->n_counter, MAX_HWEVENTS);
498		ppmu->n_counter = MAX_HWEVENTS;
499	}
500
501	switch (event->attr.type) {
502	case PERF_TYPE_HARDWARE:
503		ev = event->attr.config;
504		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
505			return -EOPNOTSUPP;
506		ev = ppmu->generic_events[ev];
507		break;
508
509	case PERF_TYPE_HW_CACHE:
510		err = hw_perf_cache_event(event->attr.config, &ev);
511		if (err)
512			return err;
513		break;
514
515	case PERF_TYPE_RAW:
516		ev = event->attr.config;
517		break;
518
519	default:
520		return -ENOENT;
521	}
522
523	event->hw.config = ppmu->xlate_event(ev);
524	if (!(event->hw.config & FSL_EMB_EVENT_VALID))
525		return -EINVAL;
526
527	/*
528	 * If this is in a group, check if it can go on with all the
529	 * other hardware events in the group.  We assume the event
530	 * hasn't been linked into its leader's sibling list at this point.
531	 */
532	n = 0;
533	if (event->group_leader != event) {
534		n = collect_events(event->group_leader,
535		                   ppmu->n_counter - 1, events);
536		if (n < 0)
537			return -EINVAL;
538	}
539
540	if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
541		num_restricted = 0;
542		for (i = 0; i < n; i++) {
543			if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED)
544				num_restricted++;
545		}
546
547		if (num_restricted >= ppmu->n_restricted)
548			return -EINVAL;
549	}
550
551	event->hw.idx = -1;
552
553	event->hw.config_base = PMLCA_CE | PMLCA_FCM1 |
554	                        (u32)((ev << 16) & PMLCA_EVENT_MASK);
555
556	if (event->attr.exclude_user)
557		event->hw.config_base |= PMLCA_FCU;
558	if (event->attr.exclude_kernel)
559		event->hw.config_base |= PMLCA_FCS;
560	if (event->attr.exclude_idle)
561		return -ENOTSUPP;
562
563	event->hw.last_period = event->hw.sample_period;
564	local64_set(&event->hw.period_left, event->hw.last_period);
565
566	/*
567	 * See if we need to reserve the PMU.
568	 * If no events are currently in use, then we have to take a
569	 * mutex to ensure that we don't race with another task doing
570	 * reserve_pmc_hardware or release_pmc_hardware.
571	 */
572	err = 0;
573	if (!atomic_inc_not_zero(&num_events)) {
574		mutex_lock(&pmc_reserve_mutex);
575		if (atomic_read(&num_events) == 0 &&
576		    reserve_pmc_hardware(perf_event_interrupt))
577			err = -EBUSY;
578		else
579			atomic_inc(&num_events);
580		mutex_unlock(&pmc_reserve_mutex);
581
582		mtpmr(PMRN_PMGC0, PMGC0_FAC);
583		isync();
584	}
585	event->destroy = hw_perf_event_destroy;
586
587	return err;
588}
589
590static struct pmu fsl_emb_pmu = {
591	.pmu_enable	= fsl_emb_pmu_enable,
592	.pmu_disable	= fsl_emb_pmu_disable,
593	.event_init	= fsl_emb_pmu_event_init,
594	.add		= fsl_emb_pmu_add,
595	.del		= fsl_emb_pmu_del,
596	.start		= fsl_emb_pmu_start,
597	.stop		= fsl_emb_pmu_stop,
598	.read		= fsl_emb_pmu_read,
599};
600
601/*
602 * A counter has overflowed; update its count and record
603 * things if requested.  Note that interrupts are hard-disabled
604 * here so there is no possibility of being interrupted.
605 */
606static void record_and_restart(struct perf_event *event, unsigned long val,
607			       struct pt_regs *regs)
608{
609	u64 period = event->hw.sample_period;
610	s64 prev, delta, left;
611	int record = 0;
612
613	if (event->hw.state & PERF_HES_STOPPED) {
614		write_pmc(event->hw.idx, 0);
615		return;
616	}
617
618	/* we don't have to worry about interrupts here */
619	prev = local64_read(&event->hw.prev_count);
620	delta = (val - prev) & 0xfffffffful;
621	local64_add(delta, &event->count);
622
623	/*
624	 * See if the total period for this event has expired,
625	 * and update for the next period.
626	 */
627	val = 0;
628	left = local64_read(&event->hw.period_left) - delta;
629	if (period) {
630		if (left <= 0) {
631			left += period;
632			if (left <= 0)
633				left = period;
634			record = 1;
635			event->hw.last_period = event->hw.sample_period;
636		}
637		if (left < 0x80000000LL)
638			val = 0x80000000LL - left;
639	}
640
641	write_pmc(event->hw.idx, val);
642	local64_set(&event->hw.prev_count, val);
643	local64_set(&event->hw.period_left, left);
644	perf_event_update_userpage(event);
645
646	/*
647	 * Finally record data if requested.
648	 */
649	if (record) {
650		struct perf_sample_data data;
651
652		perf_sample_data_init(&data, 0, event->hw.last_period);
653
654		if (perf_event_overflow(event, &data, regs))
655			fsl_emb_pmu_stop(event, 0);
656	}
657}
658
659static void perf_event_interrupt(struct pt_regs *regs)
660{
661	int i;
662	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
663	struct perf_event *event;
664	unsigned long val;
665	int found = 0;
666	int nmi;
667
668	nmi = perf_intr_is_nmi(regs);
669	if (nmi)
670		nmi_enter();
671	else
672		irq_enter();
673
674	for (i = 0; i < ppmu->n_counter; ++i) {
675		event = cpuhw->event[i];
676
677		val = read_pmc(i);
678		if ((int)val < 0) {
679			if (event) {
680				/* event has overflowed */
681				found = 1;
682				record_and_restart(event, val, regs);
683			} else {
684				/*
685				 * Disabled counter is negative,
686				 * reset it just in case.
687				 */
688				write_pmc(i, 0);
689			}
690		}
691	}
692
693	/* PMM will keep counters frozen until we return from the interrupt. */
694	mtmsr(mfmsr() | MSR_PMM);
695	mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
696	isync();
697
698	if (nmi)
699		nmi_exit();
700	else
701		irq_exit();
702}
703
704void hw_perf_event_setup(int cpu)
705{
706	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
707
708	memset(cpuhw, 0, sizeof(*cpuhw));
709}
710
711int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
712{
713	if (ppmu)
714		return -EBUSY;		/* something's already registered */
715
716	ppmu = pmu;
717	pr_info("%s performance monitor hardware support registered\n",
718		pmu->name);
719
720	perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
721
722	return 0;
723}
724