1 /*
2  * S390 Version
3  *   Copyright IBM Corp. 2002, 2011
4  *   Author(s): Thomas Spatzier (tspat@de.ibm.com)
5  *   Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
6  *   Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
7  *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
8  *
9  * @remark Copyright 2002-2011 OProfile authors
10  */
11 
12 #include <linux/oprofile.h>
13 #include <linux/perf_event.h>
14 #include <linux/init.h>
15 #include <linux/errno.h>
16 #include <linux/fs.h>
17 #include <linux/module.h>
18 #include <asm/processor.h>
19 #include <asm/perf_event.h>
20 
21 #include "../../../drivers/oprofile/oprof.h"
22 
23 extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
24 
25 #include "hwsampler.h"
26 #include "op_counter.h"
27 
28 #define DEFAULT_INTERVAL	4127518
29 
30 #define DEFAULT_SDBT_BLOCKS	1
31 #define DEFAULT_SDB_BLOCKS	511
32 
33 static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
34 static unsigned long oprofile_min_interval;
35 static unsigned long oprofile_max_interval;
36 
37 static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
38 static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
39 
40 static int hwsampler_enabled;
41 static int hwsampler_running;	/* start_mutex must be held to change */
42 static int hwsampler_available;
43 
44 static struct oprofile_operations timer_ops;
45 
46 struct op_counter_config counter_config;
47 
48 enum __force_cpu_type {
49 	reserved = 0,		/* do not force */
50 	timer,
51 };
52 static int force_cpu_type;
53 
set_cpu_type(const char * str,struct kernel_param * kp)54 static int set_cpu_type(const char *str, struct kernel_param *kp)
55 {
56 	if (!strcmp(str, "timer")) {
57 		force_cpu_type = timer;
58 		printk(KERN_INFO "oprofile: forcing timer to be returned "
59 		                 "as cpu type\n");
60 	} else {
61 		force_cpu_type = 0;
62 	}
63 
64 	return 0;
65 }
66 module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
67 MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
68 		           "(report cpu_type \"timer\"");
69 
__oprofile_hwsampler_start(void)70 static int __oprofile_hwsampler_start(void)
71 {
72 	int retval;
73 
74 	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
75 	if (retval)
76 		return retval;
77 
78 	retval = hwsampler_start_all(oprofile_hw_interval);
79 	if (retval)
80 		hwsampler_deallocate();
81 
82 	return retval;
83 }
84 
oprofile_hwsampler_start(void)85 static int oprofile_hwsampler_start(void)
86 {
87 	int retval;
88 
89 	hwsampler_running = hwsampler_enabled;
90 
91 	if (!hwsampler_running)
92 		return timer_ops.start();
93 
94 	retval = perf_reserve_sampling();
95 	if (retval)
96 		return retval;
97 
98 	retval = __oprofile_hwsampler_start();
99 	if (retval)
100 		perf_release_sampling();
101 
102 	return retval;
103 }
104 
oprofile_hwsampler_stop(void)105 static void oprofile_hwsampler_stop(void)
106 {
107 	if (!hwsampler_running) {
108 		timer_ops.stop();
109 		return;
110 	}
111 
112 	hwsampler_stop_all();
113 	hwsampler_deallocate();
114 	perf_release_sampling();
115 	return;
116 }
117 
118 /*
119  * File ops used for:
120  * /dev/oprofile/0/enabled
121  * /dev/oprofile/hwsampling/hwsampler  (cpu_type = timer)
122  */
123 
hwsampler_read(struct file * file,char __user * buf,size_t count,loff_t * offset)124 static ssize_t hwsampler_read(struct file *file, char __user *buf,
125 		size_t count, loff_t *offset)
126 {
127 	return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
128 }
129 
hwsampler_write(struct file * file,char const __user * buf,size_t count,loff_t * offset)130 static ssize_t hwsampler_write(struct file *file, char const __user *buf,
131 		size_t count, loff_t *offset)
132 {
133 	unsigned long val;
134 	int retval;
135 
136 	if (*offset)
137 		return -EINVAL;
138 
139 	retval = oprofilefs_ulong_from_user(&val, buf, count);
140 	if (retval <= 0)
141 		return retval;
142 
143 	if (val != 0 && val != 1)
144 		return -EINVAL;
145 
146 	if (oprofile_started)
147 		/*
148 		 * save to do without locking as we set
149 		 * hwsampler_running in start() when start_mutex is
150 		 * held
151 		 */
152 		return -EBUSY;
153 
154 	hwsampler_enabled = val;
155 
156 	return count;
157 }
158 
159 static const struct file_operations hwsampler_fops = {
160 	.read		= hwsampler_read,
161 	.write		= hwsampler_write,
162 };
163 
164 /*
165  * File ops used for:
166  * /dev/oprofile/0/count
167  * /dev/oprofile/hwsampling/hw_interval  (cpu_type = timer)
168  *
169  * Make sure that the value is within the hardware range.
170  */
171 
hw_interval_read(struct file * file,char __user * buf,size_t count,loff_t * offset)172 static ssize_t hw_interval_read(struct file *file, char __user *buf,
173 				size_t count, loff_t *offset)
174 {
175 	return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
176 					count, offset);
177 }
178 
hw_interval_write(struct file * file,char const __user * buf,size_t count,loff_t * offset)179 static ssize_t hw_interval_write(struct file *file, char const __user *buf,
180 				 size_t count, loff_t *offset)
181 {
182 	unsigned long val;
183 	int retval;
184 
185 	if (*offset)
186 		return -EINVAL;
187 	retval = oprofilefs_ulong_from_user(&val, buf, count);
188 	if (retval <= 0)
189 		return retval;
190 	if (val < oprofile_min_interval)
191 		oprofile_hw_interval = oprofile_min_interval;
192 	else if (val > oprofile_max_interval)
193 		oprofile_hw_interval = oprofile_max_interval;
194 	else
195 		oprofile_hw_interval = val;
196 
197 	return count;
198 }
199 
200 static const struct file_operations hw_interval_fops = {
201 	.read		= hw_interval_read,
202 	.write		= hw_interval_write,
203 };
204 
205 /*
206  * File ops used for:
207  * /dev/oprofile/0/event
208  * Only a single event with number 0 is supported with this counter.
209  *
210  * /dev/oprofile/0/unit_mask
211  * This is a dummy file needed by the user space tools.
212  * No value other than 0 is accepted or returned.
213  */
214 
hwsampler_zero_read(struct file * file,char __user * buf,size_t count,loff_t * offset)215 static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
216 				    size_t count, loff_t *offset)
217 {
218 	return oprofilefs_ulong_to_user(0, buf, count, offset);
219 }
220 
hwsampler_zero_write(struct file * file,char const __user * buf,size_t count,loff_t * offset)221 static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
222 				     size_t count, loff_t *offset)
223 {
224 	unsigned long val;
225 	int retval;
226 
227 	if (*offset)
228 		return -EINVAL;
229 
230 	retval = oprofilefs_ulong_from_user(&val, buf, count);
231 	if (retval <= 0)
232 		return retval;
233 	if (val != 0)
234 		return -EINVAL;
235 	return count;
236 }
237 
238 static const struct file_operations zero_fops = {
239 	.read		= hwsampler_zero_read,
240 	.write		= hwsampler_zero_write,
241 };
242 
243 /* /dev/oprofile/0/kernel file ops.  */
244 
hwsampler_kernel_read(struct file * file,char __user * buf,size_t count,loff_t * offset)245 static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
246 				     size_t count, loff_t *offset)
247 {
248 	return oprofilefs_ulong_to_user(counter_config.kernel,
249 					buf, count, offset);
250 }
251 
hwsampler_kernel_write(struct file * file,char const __user * buf,size_t count,loff_t * offset)252 static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
253 				      size_t count, loff_t *offset)
254 {
255 	unsigned long val;
256 	int retval;
257 
258 	if (*offset)
259 		return -EINVAL;
260 
261 	retval = oprofilefs_ulong_from_user(&val, buf, count);
262 	if (retval <= 0)
263 		return retval;
264 
265 	if (val != 0 && val != 1)
266 		return -EINVAL;
267 
268 	counter_config.kernel = val;
269 
270 	return count;
271 }
272 
273 static const struct file_operations kernel_fops = {
274 	.read		= hwsampler_kernel_read,
275 	.write		= hwsampler_kernel_write,
276 };
277 
278 /* /dev/oprofile/0/user file ops. */
279 
hwsampler_user_read(struct file * file,char __user * buf,size_t count,loff_t * offset)280 static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
281 				   size_t count, loff_t *offset)
282 {
283 	return oprofilefs_ulong_to_user(counter_config.user,
284 					buf, count, offset);
285 }
286 
hwsampler_user_write(struct file * file,char const __user * buf,size_t count,loff_t * offset)287 static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
288 				    size_t count, loff_t *offset)
289 {
290 	unsigned long val;
291 	int retval;
292 
293 	if (*offset)
294 		return -EINVAL;
295 
296 	retval = oprofilefs_ulong_from_user(&val, buf, count);
297 	if (retval <= 0)
298 		return retval;
299 
300 	if (val != 0 && val != 1)
301 		return -EINVAL;
302 
303 	counter_config.user = val;
304 
305 	return count;
306 }
307 
308 static const struct file_operations user_fops = {
309 	.read		= hwsampler_user_read,
310 	.write		= hwsampler_user_write,
311 };
312 
313 
314 /*
315  * File ops used for: /dev/oprofile/timer/enabled
316  * The value always has to be the inverted value of hwsampler_enabled. So
317  * no separate variable is created. That way we do not need locking.
318  */
319 
timer_enabled_read(struct file * file,char __user * buf,size_t count,loff_t * offset)320 static ssize_t timer_enabled_read(struct file *file, char __user *buf,
321 				  size_t count, loff_t *offset)
322 {
323 	return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
324 }
325 
timer_enabled_write(struct file * file,char const __user * buf,size_t count,loff_t * offset)326 static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
327 				   size_t count, loff_t *offset)
328 {
329 	unsigned long val;
330 	int retval;
331 
332 	if (*offset)
333 		return -EINVAL;
334 
335 	retval = oprofilefs_ulong_from_user(&val, buf, count);
336 	if (retval <= 0)
337 		return retval;
338 
339 	if (val != 0 && val != 1)
340 		return -EINVAL;
341 
342 	/* Timer cannot be disabled without having hardware sampling.  */
343 	if (val == 0 && !hwsampler_available)
344 		return -EINVAL;
345 
346 	if (oprofile_started)
347 		/*
348 		 * save to do without locking as we set
349 		 * hwsampler_running in start() when start_mutex is
350 		 * held
351 		 */
352 		return -EBUSY;
353 
354 	hwsampler_enabled = !val;
355 
356 	return count;
357 }
358 
359 static const struct file_operations timer_enabled_fops = {
360 	.read		= timer_enabled_read,
361 	.write		= timer_enabled_write,
362 };
363 
364 
oprofile_create_hwsampling_files(struct dentry * root)365 static int oprofile_create_hwsampling_files(struct dentry *root)
366 {
367 	struct dentry *dir;
368 
369 	dir = oprofilefs_mkdir(root, "timer");
370 	if (!dir)
371 		return -EINVAL;
372 
373 	oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
374 
375 	if (!hwsampler_available)
376 		return 0;
377 
378 	/* reinitialize default values */
379 	hwsampler_enabled = 1;
380 	counter_config.kernel = 1;
381 	counter_config.user = 1;
382 
383 	if (!force_cpu_type) {
384 		/*
385 		 * Create the counter file system.  A single virtual
386 		 * counter is created which can be used to
387 		 * enable/disable hardware sampling dynamically from
388 		 * user space.  The user space will configure a single
389 		 * counter with a single event.  The value of 'event'
390 		 * and 'unit_mask' are not evaluated by the kernel code
391 		 * and can only be set to 0.
392 		 */
393 
394 		dir = oprofilefs_mkdir(root, "0");
395 		if (!dir)
396 			return -EINVAL;
397 
398 		oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
399 		oprofilefs_create_file(dir, "event", &zero_fops);
400 		oprofilefs_create_file(dir, "count", &hw_interval_fops);
401 		oprofilefs_create_file(dir, "unit_mask", &zero_fops);
402 		oprofilefs_create_file(dir, "kernel", &kernel_fops);
403 		oprofilefs_create_file(dir, "user", &user_fops);
404 		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
405 					&oprofile_sdbt_blocks);
406 
407 	} else {
408 		/*
409 		 * Hardware sampling can be used but the cpu_type is
410 		 * forced to timer in order to deal with legacy user
411 		 * space tools.  The /dev/oprofile/hwsampling fs is
412 		 * provided in that case.
413 		 */
414 		dir = oprofilefs_mkdir(root, "hwsampling");
415 		if (!dir)
416 			return -EINVAL;
417 
418 		oprofilefs_create_file(dir, "hwsampler",
419 				       &hwsampler_fops);
420 		oprofilefs_create_file(dir, "hw_interval",
421 				       &hw_interval_fops);
422 		oprofilefs_create_ro_ulong(dir, "hw_min_interval",
423 					   &oprofile_min_interval);
424 		oprofilefs_create_ro_ulong(dir, "hw_max_interval",
425 					   &oprofile_max_interval);
426 		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
427 					&oprofile_sdbt_blocks);
428 	}
429 	return 0;
430 }
431 
oprofile_hwsampler_init(struct oprofile_operations * ops)432 static int oprofile_hwsampler_init(struct oprofile_operations *ops)
433 {
434 	/*
435 	 * Initialize the timer mode infrastructure as well in order
436 	 * to be able to switch back dynamically.  oprofile_timer_init
437 	 * is not supposed to fail.
438 	 */
439 	if (oprofile_timer_init(ops))
440 		BUG();
441 
442 	memcpy(&timer_ops, ops, sizeof(timer_ops));
443 	ops->create_files = oprofile_create_hwsampling_files;
444 
445 	/*
446 	 * If the user space tools do not support newer cpu types,
447 	 * the force_cpu_type module parameter
448 	 * can be used to always return \"timer\" as cpu type.
449 	 */
450 	if (force_cpu_type != timer) {
451 		struct cpuid id;
452 
453 		get_cpu_id (&id);
454 
455 		switch (id.machine) {
456 		case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
457 		case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
458 		case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
459 		default: return -ENODEV;
460 		}
461 	}
462 
463 	if (hwsampler_setup())
464 		return -ENODEV;
465 
466 	/*
467 	 * Query the range for the sampling interval from the
468 	 * hardware.
469 	 */
470 	oprofile_min_interval = hwsampler_query_min_interval();
471 	if (oprofile_min_interval == 0)
472 		return -ENODEV;
473 	oprofile_max_interval = hwsampler_query_max_interval();
474 	if (oprofile_max_interval == 0)
475 		return -ENODEV;
476 
477 	/* The initial value should be sane */
478 	if (oprofile_hw_interval < oprofile_min_interval)
479 		oprofile_hw_interval = oprofile_min_interval;
480 	if (oprofile_hw_interval > oprofile_max_interval)
481 		oprofile_hw_interval = oprofile_max_interval;
482 
483 	printk(KERN_INFO "oprofile: System z hardware sampling "
484 	       "facility found.\n");
485 
486 	ops->start = oprofile_hwsampler_start;
487 	ops->stop = oprofile_hwsampler_stop;
488 
489 	return 0;
490 }
491 
oprofile_hwsampler_exit(void)492 static void oprofile_hwsampler_exit(void)
493 {
494 	hwsampler_shutdown();
495 }
496 
oprofile_arch_init(struct oprofile_operations * ops)497 int __init oprofile_arch_init(struct oprofile_operations *ops)
498 {
499 	ops->backtrace = s390_backtrace;
500 
501 	/*
502 	 * -ENODEV is not reported to the caller.  The module itself
503          * will use the timer mode sampling as fallback and this is
504          * always available.
505 	 */
506 	hwsampler_available = oprofile_hwsampler_init(ops) == 0;
507 
508 	return 0;
509 }
510 
oprofile_arch_exit(void)511 void oprofile_arch_exit(void)
512 {
513 	oprofile_hwsampler_exit();
514 }
515