1/* 2 * S390 Version 3 * Copyright IBM Corp. 2002, 2011 4 * Author(s): Thomas Spatzier (tspat@de.ibm.com) 5 * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) 6 * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) 7 * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) 8 * 9 * @remark Copyright 2002-2011 OProfile authors 10 */ 11 12#include <linux/oprofile.h> 13#include <linux/perf_event.h> 14#include <linux/init.h> 15#include <linux/errno.h> 16#include <linux/fs.h> 17#include <linux/module.h> 18#include <asm/processor.h> 19#include <asm/perf_event.h> 20 21#include "../../../drivers/oprofile/oprof.h" 22 23extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); 24 25#include "hwsampler.h" 26#include "op_counter.h" 27 28#define DEFAULT_INTERVAL 4127518 29 30#define DEFAULT_SDBT_BLOCKS 1 31#define DEFAULT_SDB_BLOCKS 511 32 33static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL; 34static unsigned long oprofile_min_interval; 35static unsigned long oprofile_max_interval; 36 37static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; 38static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; 39 40static int hwsampler_enabled; 41static int hwsampler_running; /* start_mutex must be held to change */ 42static int hwsampler_available; 43 44static struct oprofile_operations timer_ops; 45 46struct op_counter_config counter_config; 47 48enum __force_cpu_type { 49 reserved = 0, /* do not force */ 50 timer, 51}; 52static int force_cpu_type; 53 54static int set_cpu_type(const char *str, struct kernel_param *kp) 55{ 56 if (!strcmp(str, "timer")) { 57 force_cpu_type = timer; 58 printk(KERN_INFO "oprofile: forcing timer to be returned " 59 "as cpu type\n"); 60 } else { 61 force_cpu_type = 0; 62 } 63 64 return 0; 65} 66module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); 67MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" 68 "(report cpu_type \"timer\""); 69 70static int __oprofile_hwsampler_start(void) 71{ 72 int retval; 73 74 retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); 75 if (retval) 76 return retval; 77 78 retval = hwsampler_start_all(oprofile_hw_interval); 79 if (retval) 80 hwsampler_deallocate(); 81 82 return retval; 83} 84 85static int oprofile_hwsampler_start(void) 86{ 87 int retval; 88 89 hwsampler_running = hwsampler_enabled; 90 91 if (!hwsampler_running) 92 return timer_ops.start(); 93 94 retval = perf_reserve_sampling(); 95 if (retval) 96 return retval; 97 98 retval = __oprofile_hwsampler_start(); 99 if (retval) 100 perf_release_sampling(); 101 102 return retval; 103} 104 105static void oprofile_hwsampler_stop(void) 106{ 107 if (!hwsampler_running) { 108 timer_ops.stop(); 109 return; 110 } 111 112 hwsampler_stop_all(); 113 hwsampler_deallocate(); 114 perf_release_sampling(); 115 return; 116} 117 118/* 119 * File ops used for: 120 * /dev/oprofile/0/enabled 121 * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer) 122 */ 123 124static ssize_t hwsampler_read(struct file *file, char __user *buf, 125 size_t count, loff_t *offset) 126{ 127 return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset); 128} 129 130static ssize_t hwsampler_write(struct file *file, char const __user *buf, 131 size_t count, loff_t *offset) 132{ 133 unsigned long val; 134 int retval; 135 136 if (*offset) 137 return -EINVAL; 138 139 retval = oprofilefs_ulong_from_user(&val, buf, count); 140 if (retval <= 0) 141 return retval; 142 143 if (val != 0 && val != 1) 144 return -EINVAL; 145 146 if (oprofile_started) 147 /* 148 * save to do without locking as we set 149 * hwsampler_running in start() when start_mutex is 150 * held 151 */ 152 return -EBUSY; 153 154 hwsampler_enabled = val; 155 156 return count; 157} 158 159static const struct file_operations hwsampler_fops = { 160 .read = hwsampler_read, 161 .write = hwsampler_write, 162}; 163 164/* 165 * File ops used for: 166 * /dev/oprofile/0/count 167 * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer) 168 * 169 * Make sure that the value is within the hardware range. 170 */ 171 172static ssize_t hw_interval_read(struct file *file, char __user *buf, 173 size_t count, loff_t *offset) 174{ 175 return oprofilefs_ulong_to_user(oprofile_hw_interval, buf, 176 count, offset); 177} 178 179static ssize_t hw_interval_write(struct file *file, char const __user *buf, 180 size_t count, loff_t *offset) 181{ 182 unsigned long val; 183 int retval; 184 185 if (*offset) 186 return -EINVAL; 187 retval = oprofilefs_ulong_from_user(&val, buf, count); 188 if (retval <= 0) 189 return retval; 190 if (val < oprofile_min_interval) 191 oprofile_hw_interval = oprofile_min_interval; 192 else if (val > oprofile_max_interval) 193 oprofile_hw_interval = oprofile_max_interval; 194 else 195 oprofile_hw_interval = val; 196 197 return count; 198} 199 200static const struct file_operations hw_interval_fops = { 201 .read = hw_interval_read, 202 .write = hw_interval_write, 203}; 204 205/* 206 * File ops used for: 207 * /dev/oprofile/0/event 208 * Only a single event with number 0 is supported with this counter. 209 * 210 * /dev/oprofile/0/unit_mask 211 * This is a dummy file needed by the user space tools. 212 * No value other than 0 is accepted or returned. 213 */ 214 215static ssize_t hwsampler_zero_read(struct file *file, char __user *buf, 216 size_t count, loff_t *offset) 217{ 218 return oprofilefs_ulong_to_user(0, buf, count, offset); 219} 220 221static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf, 222 size_t count, loff_t *offset) 223{ 224 unsigned long val; 225 int retval; 226 227 if (*offset) 228 return -EINVAL; 229 230 retval = oprofilefs_ulong_from_user(&val, buf, count); 231 if (retval <= 0) 232 return retval; 233 if (val != 0) 234 return -EINVAL; 235 return count; 236} 237 238static const struct file_operations zero_fops = { 239 .read = hwsampler_zero_read, 240 .write = hwsampler_zero_write, 241}; 242 243/* /dev/oprofile/0/kernel file ops. */ 244 245static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf, 246 size_t count, loff_t *offset) 247{ 248 return oprofilefs_ulong_to_user(counter_config.kernel, 249 buf, count, offset); 250} 251 252static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf, 253 size_t count, loff_t *offset) 254{ 255 unsigned long val; 256 int retval; 257 258 if (*offset) 259 return -EINVAL; 260 261 retval = oprofilefs_ulong_from_user(&val, buf, count); 262 if (retval <= 0) 263 return retval; 264 265 if (val != 0 && val != 1) 266 return -EINVAL; 267 268 counter_config.kernel = val; 269 270 return count; 271} 272 273static const struct file_operations kernel_fops = { 274 .read = hwsampler_kernel_read, 275 .write = hwsampler_kernel_write, 276}; 277 278/* /dev/oprofile/0/user file ops. */ 279 280static ssize_t hwsampler_user_read(struct file *file, char __user *buf, 281 size_t count, loff_t *offset) 282{ 283 return oprofilefs_ulong_to_user(counter_config.user, 284 buf, count, offset); 285} 286 287static ssize_t hwsampler_user_write(struct file *file, char const __user *buf, 288 size_t count, loff_t *offset) 289{ 290 unsigned long val; 291 int retval; 292 293 if (*offset) 294 return -EINVAL; 295 296 retval = oprofilefs_ulong_from_user(&val, buf, count); 297 if (retval <= 0) 298 return retval; 299 300 if (val != 0 && val != 1) 301 return -EINVAL; 302 303 counter_config.user = val; 304 305 return count; 306} 307 308static const struct file_operations user_fops = { 309 .read = hwsampler_user_read, 310 .write = hwsampler_user_write, 311}; 312 313 314/* 315 * File ops used for: /dev/oprofile/timer/enabled 316 * The value always has to be the inverted value of hwsampler_enabled. So 317 * no separate variable is created. That way we do not need locking. 318 */ 319 320static ssize_t timer_enabled_read(struct file *file, char __user *buf, 321 size_t count, loff_t *offset) 322{ 323 return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset); 324} 325 326static ssize_t timer_enabled_write(struct file *file, char const __user *buf, 327 size_t count, loff_t *offset) 328{ 329 unsigned long val; 330 int retval; 331 332 if (*offset) 333 return -EINVAL; 334 335 retval = oprofilefs_ulong_from_user(&val, buf, count); 336 if (retval <= 0) 337 return retval; 338 339 if (val != 0 && val != 1) 340 return -EINVAL; 341 342 /* Timer cannot be disabled without having hardware sampling. */ 343 if (val == 0 && !hwsampler_available) 344 return -EINVAL; 345 346 if (oprofile_started) 347 /* 348 * save to do without locking as we set 349 * hwsampler_running in start() when start_mutex is 350 * held 351 */ 352 return -EBUSY; 353 354 hwsampler_enabled = !val; 355 356 return count; 357} 358 359static const struct file_operations timer_enabled_fops = { 360 .read = timer_enabled_read, 361 .write = timer_enabled_write, 362}; 363 364 365static int oprofile_create_hwsampling_files(struct dentry *root) 366{ 367 struct dentry *dir; 368 369 dir = oprofilefs_mkdir(root, "timer"); 370 if (!dir) 371 return -EINVAL; 372 373 oprofilefs_create_file(dir, "enabled", &timer_enabled_fops); 374 375 if (!hwsampler_available) 376 return 0; 377 378 /* reinitialize default values */ 379 hwsampler_enabled = 1; 380 counter_config.kernel = 1; 381 counter_config.user = 1; 382 383 if (!force_cpu_type) { 384 /* 385 * Create the counter file system. A single virtual 386 * counter is created which can be used to 387 * enable/disable hardware sampling dynamically from 388 * user space. The user space will configure a single 389 * counter with a single event. The value of 'event' 390 * and 'unit_mask' are not evaluated by the kernel code 391 * and can only be set to 0. 392 */ 393 394 dir = oprofilefs_mkdir(root, "0"); 395 if (!dir) 396 return -EINVAL; 397 398 oprofilefs_create_file(dir, "enabled", &hwsampler_fops); 399 oprofilefs_create_file(dir, "event", &zero_fops); 400 oprofilefs_create_file(dir, "count", &hw_interval_fops); 401 oprofilefs_create_file(dir, "unit_mask", &zero_fops); 402 oprofilefs_create_file(dir, "kernel", &kernel_fops); 403 oprofilefs_create_file(dir, "user", &user_fops); 404 oprofilefs_create_ulong(dir, "hw_sdbt_blocks", 405 &oprofile_sdbt_blocks); 406 407 } else { 408 /* 409 * Hardware sampling can be used but the cpu_type is 410 * forced to timer in order to deal with legacy user 411 * space tools. The /dev/oprofile/hwsampling fs is 412 * provided in that case. 413 */ 414 dir = oprofilefs_mkdir(root, "hwsampling"); 415 if (!dir) 416 return -EINVAL; 417 418 oprofilefs_create_file(dir, "hwsampler", 419 &hwsampler_fops); 420 oprofilefs_create_file(dir, "hw_interval", 421 &hw_interval_fops); 422 oprofilefs_create_ro_ulong(dir, "hw_min_interval", 423 &oprofile_min_interval); 424 oprofilefs_create_ro_ulong(dir, "hw_max_interval", 425 &oprofile_max_interval); 426 oprofilefs_create_ulong(dir, "hw_sdbt_blocks", 427 &oprofile_sdbt_blocks); 428 } 429 return 0; 430} 431 432static int oprofile_hwsampler_init(struct oprofile_operations *ops) 433{ 434 /* 435 * Initialize the timer mode infrastructure as well in order 436 * to be able to switch back dynamically. oprofile_timer_init 437 * is not supposed to fail. 438 */ 439 if (oprofile_timer_init(ops)) 440 BUG(); 441 442 memcpy(&timer_ops, ops, sizeof(timer_ops)); 443 ops->create_files = oprofile_create_hwsampling_files; 444 445 /* 446 * If the user space tools do not support newer cpu types, 447 * the force_cpu_type module parameter 448 * can be used to always return \"timer\" as cpu type. 449 */ 450 if (force_cpu_type != timer) { 451 struct cpuid id; 452 453 get_cpu_id (&id); 454 455 switch (id.machine) { 456 case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; 457 case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; 458 case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break; 459 default: return -ENODEV; 460 } 461 } 462 463 if (hwsampler_setup()) 464 return -ENODEV; 465 466 /* 467 * Query the range for the sampling interval from the 468 * hardware. 469 */ 470 oprofile_min_interval = hwsampler_query_min_interval(); 471 if (oprofile_min_interval == 0) 472 return -ENODEV; 473 oprofile_max_interval = hwsampler_query_max_interval(); 474 if (oprofile_max_interval == 0) 475 return -ENODEV; 476 477 /* The initial value should be sane */ 478 if (oprofile_hw_interval < oprofile_min_interval) 479 oprofile_hw_interval = oprofile_min_interval; 480 if (oprofile_hw_interval > oprofile_max_interval) 481 oprofile_hw_interval = oprofile_max_interval; 482 483 printk(KERN_INFO "oprofile: System z hardware sampling " 484 "facility found.\n"); 485 486 ops->start = oprofile_hwsampler_start; 487 ops->stop = oprofile_hwsampler_stop; 488 489 return 0; 490} 491 492static void oprofile_hwsampler_exit(void) 493{ 494 hwsampler_shutdown(); 495} 496 497int __init oprofile_arch_init(struct oprofile_operations *ops) 498{ 499 ops->backtrace = s390_backtrace; 500 501 /* 502 * -ENODEV is not reported to the caller. The module itself 503 * will use the timer mode sampling as fallback and this is 504 * always available. 505 */ 506 hwsampler_available = oprofile_hwsampler_init(ops) == 0; 507 508 return 0; 509} 510 511void oprofile_arch_exit(void) 512{ 513 oprofile_hwsampler_exit(); 514} 515