1/*
2 * S390 Version
3 *   Copyright IBM Corp. 2002, 2011
4 *   Author(s): Thomas Spatzier (tspat@de.ibm.com)
5 *   Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
6 *   Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
7 *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
8 *
9 * @remark Copyright 2002-2011 OProfile authors
10 */
11
12#include <linux/oprofile.h>
13#include <linux/perf_event.h>
14#include <linux/init.h>
15#include <linux/errno.h>
16#include <linux/fs.h>
17#include <linux/module.h>
18#include <asm/processor.h>
19#include <asm/perf_event.h>
20
21#include "../../../drivers/oprofile/oprof.h"
22
23extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
24
25#include "hwsampler.h"
26#include "op_counter.h"
27
28#define DEFAULT_INTERVAL	4127518
29
30#define DEFAULT_SDBT_BLOCKS	1
31#define DEFAULT_SDB_BLOCKS	511
32
33static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
34static unsigned long oprofile_min_interval;
35static unsigned long oprofile_max_interval;
36
37static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
38static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
39
40static int hwsampler_enabled;
41static int hwsampler_running;	/* start_mutex must be held to change */
42static int hwsampler_available;
43
44static struct oprofile_operations timer_ops;
45
46struct op_counter_config counter_config;
47
48enum __force_cpu_type {
49	reserved = 0,		/* do not force */
50	timer,
51};
52static int force_cpu_type;
53
54static int set_cpu_type(const char *str, struct kernel_param *kp)
55{
56	if (!strcmp(str, "timer")) {
57		force_cpu_type = timer;
58		printk(KERN_INFO "oprofile: forcing timer to be returned "
59		                 "as cpu type\n");
60	} else {
61		force_cpu_type = 0;
62	}
63
64	return 0;
65}
66module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
67MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
68		           "(report cpu_type \"timer\"");
69
70static int __oprofile_hwsampler_start(void)
71{
72	int retval;
73
74	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
75	if (retval)
76		return retval;
77
78	retval = hwsampler_start_all(oprofile_hw_interval);
79	if (retval)
80		hwsampler_deallocate();
81
82	return retval;
83}
84
85static int oprofile_hwsampler_start(void)
86{
87	int retval;
88
89	hwsampler_running = hwsampler_enabled;
90
91	if (!hwsampler_running)
92		return timer_ops.start();
93
94	retval = perf_reserve_sampling();
95	if (retval)
96		return retval;
97
98	retval = __oprofile_hwsampler_start();
99	if (retval)
100		perf_release_sampling();
101
102	return retval;
103}
104
105static void oprofile_hwsampler_stop(void)
106{
107	if (!hwsampler_running) {
108		timer_ops.stop();
109		return;
110	}
111
112	hwsampler_stop_all();
113	hwsampler_deallocate();
114	perf_release_sampling();
115	return;
116}
117
118/*
119 * File ops used for:
120 * /dev/oprofile/0/enabled
121 * /dev/oprofile/hwsampling/hwsampler  (cpu_type = timer)
122 */
123
124static ssize_t hwsampler_read(struct file *file, char __user *buf,
125		size_t count, loff_t *offset)
126{
127	return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
128}
129
130static ssize_t hwsampler_write(struct file *file, char const __user *buf,
131		size_t count, loff_t *offset)
132{
133	unsigned long val;
134	int retval;
135
136	if (*offset)
137		return -EINVAL;
138
139	retval = oprofilefs_ulong_from_user(&val, buf, count);
140	if (retval <= 0)
141		return retval;
142
143	if (val != 0 && val != 1)
144		return -EINVAL;
145
146	if (oprofile_started)
147		/*
148		 * save to do without locking as we set
149		 * hwsampler_running in start() when start_mutex is
150		 * held
151		 */
152		return -EBUSY;
153
154	hwsampler_enabled = val;
155
156	return count;
157}
158
159static const struct file_operations hwsampler_fops = {
160	.read		= hwsampler_read,
161	.write		= hwsampler_write,
162};
163
164/*
165 * File ops used for:
166 * /dev/oprofile/0/count
167 * /dev/oprofile/hwsampling/hw_interval  (cpu_type = timer)
168 *
169 * Make sure that the value is within the hardware range.
170 */
171
172static ssize_t hw_interval_read(struct file *file, char __user *buf,
173				size_t count, loff_t *offset)
174{
175	return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
176					count, offset);
177}
178
179static ssize_t hw_interval_write(struct file *file, char const __user *buf,
180				 size_t count, loff_t *offset)
181{
182	unsigned long val;
183	int retval;
184
185	if (*offset)
186		return -EINVAL;
187	retval = oprofilefs_ulong_from_user(&val, buf, count);
188	if (retval <= 0)
189		return retval;
190	if (val < oprofile_min_interval)
191		oprofile_hw_interval = oprofile_min_interval;
192	else if (val > oprofile_max_interval)
193		oprofile_hw_interval = oprofile_max_interval;
194	else
195		oprofile_hw_interval = val;
196
197	return count;
198}
199
200static const struct file_operations hw_interval_fops = {
201	.read		= hw_interval_read,
202	.write		= hw_interval_write,
203};
204
205/*
206 * File ops used for:
207 * /dev/oprofile/0/event
208 * Only a single event with number 0 is supported with this counter.
209 *
210 * /dev/oprofile/0/unit_mask
211 * This is a dummy file needed by the user space tools.
212 * No value other than 0 is accepted or returned.
213 */
214
215static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
216				    size_t count, loff_t *offset)
217{
218	return oprofilefs_ulong_to_user(0, buf, count, offset);
219}
220
221static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
222				     size_t count, loff_t *offset)
223{
224	unsigned long val;
225	int retval;
226
227	if (*offset)
228		return -EINVAL;
229
230	retval = oprofilefs_ulong_from_user(&val, buf, count);
231	if (retval <= 0)
232		return retval;
233	if (val != 0)
234		return -EINVAL;
235	return count;
236}
237
238static const struct file_operations zero_fops = {
239	.read		= hwsampler_zero_read,
240	.write		= hwsampler_zero_write,
241};
242
243/* /dev/oprofile/0/kernel file ops.  */
244
245static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
246				     size_t count, loff_t *offset)
247{
248	return oprofilefs_ulong_to_user(counter_config.kernel,
249					buf, count, offset);
250}
251
252static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
253				      size_t count, loff_t *offset)
254{
255	unsigned long val;
256	int retval;
257
258	if (*offset)
259		return -EINVAL;
260
261	retval = oprofilefs_ulong_from_user(&val, buf, count);
262	if (retval <= 0)
263		return retval;
264
265	if (val != 0 && val != 1)
266		return -EINVAL;
267
268	counter_config.kernel = val;
269
270	return count;
271}
272
273static const struct file_operations kernel_fops = {
274	.read		= hwsampler_kernel_read,
275	.write		= hwsampler_kernel_write,
276};
277
278/* /dev/oprofile/0/user file ops. */
279
280static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
281				   size_t count, loff_t *offset)
282{
283	return oprofilefs_ulong_to_user(counter_config.user,
284					buf, count, offset);
285}
286
287static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
288				    size_t count, loff_t *offset)
289{
290	unsigned long val;
291	int retval;
292
293	if (*offset)
294		return -EINVAL;
295
296	retval = oprofilefs_ulong_from_user(&val, buf, count);
297	if (retval <= 0)
298		return retval;
299
300	if (val != 0 && val != 1)
301		return -EINVAL;
302
303	counter_config.user = val;
304
305	return count;
306}
307
308static const struct file_operations user_fops = {
309	.read		= hwsampler_user_read,
310	.write		= hwsampler_user_write,
311};
312
313
314/*
315 * File ops used for: /dev/oprofile/timer/enabled
316 * The value always has to be the inverted value of hwsampler_enabled. So
317 * no separate variable is created. That way we do not need locking.
318 */
319
320static ssize_t timer_enabled_read(struct file *file, char __user *buf,
321				  size_t count, loff_t *offset)
322{
323	return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
324}
325
326static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
327				   size_t count, loff_t *offset)
328{
329	unsigned long val;
330	int retval;
331
332	if (*offset)
333		return -EINVAL;
334
335	retval = oprofilefs_ulong_from_user(&val, buf, count);
336	if (retval <= 0)
337		return retval;
338
339	if (val != 0 && val != 1)
340		return -EINVAL;
341
342	/* Timer cannot be disabled without having hardware sampling.  */
343	if (val == 0 && !hwsampler_available)
344		return -EINVAL;
345
346	if (oprofile_started)
347		/*
348		 * save to do without locking as we set
349		 * hwsampler_running in start() when start_mutex is
350		 * held
351		 */
352		return -EBUSY;
353
354	hwsampler_enabled = !val;
355
356	return count;
357}
358
359static const struct file_operations timer_enabled_fops = {
360	.read		= timer_enabled_read,
361	.write		= timer_enabled_write,
362};
363
364
365static int oprofile_create_hwsampling_files(struct dentry *root)
366{
367	struct dentry *dir;
368
369	dir = oprofilefs_mkdir(root, "timer");
370	if (!dir)
371		return -EINVAL;
372
373	oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
374
375	if (!hwsampler_available)
376		return 0;
377
378	/* reinitialize default values */
379	hwsampler_enabled = 1;
380	counter_config.kernel = 1;
381	counter_config.user = 1;
382
383	if (!force_cpu_type) {
384		/*
385		 * Create the counter file system.  A single virtual
386		 * counter is created which can be used to
387		 * enable/disable hardware sampling dynamically from
388		 * user space.  The user space will configure a single
389		 * counter with a single event.  The value of 'event'
390		 * and 'unit_mask' are not evaluated by the kernel code
391		 * and can only be set to 0.
392		 */
393
394		dir = oprofilefs_mkdir(root, "0");
395		if (!dir)
396			return -EINVAL;
397
398		oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
399		oprofilefs_create_file(dir, "event", &zero_fops);
400		oprofilefs_create_file(dir, "count", &hw_interval_fops);
401		oprofilefs_create_file(dir, "unit_mask", &zero_fops);
402		oprofilefs_create_file(dir, "kernel", &kernel_fops);
403		oprofilefs_create_file(dir, "user", &user_fops);
404		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
405					&oprofile_sdbt_blocks);
406
407	} else {
408		/*
409		 * Hardware sampling can be used but the cpu_type is
410		 * forced to timer in order to deal with legacy user
411		 * space tools.  The /dev/oprofile/hwsampling fs is
412		 * provided in that case.
413		 */
414		dir = oprofilefs_mkdir(root, "hwsampling");
415		if (!dir)
416			return -EINVAL;
417
418		oprofilefs_create_file(dir, "hwsampler",
419				       &hwsampler_fops);
420		oprofilefs_create_file(dir, "hw_interval",
421				       &hw_interval_fops);
422		oprofilefs_create_ro_ulong(dir, "hw_min_interval",
423					   &oprofile_min_interval);
424		oprofilefs_create_ro_ulong(dir, "hw_max_interval",
425					   &oprofile_max_interval);
426		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
427					&oprofile_sdbt_blocks);
428	}
429	return 0;
430}
431
432static int oprofile_hwsampler_init(struct oprofile_operations *ops)
433{
434	/*
435	 * Initialize the timer mode infrastructure as well in order
436	 * to be able to switch back dynamically.  oprofile_timer_init
437	 * is not supposed to fail.
438	 */
439	if (oprofile_timer_init(ops))
440		BUG();
441
442	memcpy(&timer_ops, ops, sizeof(timer_ops));
443	ops->create_files = oprofile_create_hwsampling_files;
444
445	/*
446	 * If the user space tools do not support newer cpu types,
447	 * the force_cpu_type module parameter
448	 * can be used to always return \"timer\" as cpu type.
449	 */
450	if (force_cpu_type != timer) {
451		struct cpuid id;
452
453		get_cpu_id (&id);
454
455		switch (id.machine) {
456		case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
457		case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
458		case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
459		default: return -ENODEV;
460		}
461	}
462
463	if (hwsampler_setup())
464		return -ENODEV;
465
466	/*
467	 * Query the range for the sampling interval from the
468	 * hardware.
469	 */
470	oprofile_min_interval = hwsampler_query_min_interval();
471	if (oprofile_min_interval == 0)
472		return -ENODEV;
473	oprofile_max_interval = hwsampler_query_max_interval();
474	if (oprofile_max_interval == 0)
475		return -ENODEV;
476
477	/* The initial value should be sane */
478	if (oprofile_hw_interval < oprofile_min_interval)
479		oprofile_hw_interval = oprofile_min_interval;
480	if (oprofile_hw_interval > oprofile_max_interval)
481		oprofile_hw_interval = oprofile_max_interval;
482
483	printk(KERN_INFO "oprofile: System z hardware sampling "
484	       "facility found.\n");
485
486	ops->start = oprofile_hwsampler_start;
487	ops->stop = oprofile_hwsampler_stop;
488
489	return 0;
490}
491
492static void oprofile_hwsampler_exit(void)
493{
494	hwsampler_shutdown();
495}
496
497int __init oprofile_arch_init(struct oprofile_operations *ops)
498{
499	ops->backtrace = s390_backtrace;
500
501	/*
502	 * -ENODEV is not reported to the caller.  The module itself
503         * will use the timer mode sampling as fallback and this is
504         * always available.
505	 */
506	hwsampler_available = oprofile_hwsampler_init(ops) == 0;
507
508	return 0;
509}
510
511void oprofile_arch_exit(void)
512{
513	oprofile_hwsampler_exit();
514}
515