1/* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 4 */ 5 6#define KMSG_COMPONENT "cpu" 7#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 8 9#include <linux/workqueue.h> 10#include <linux/cpuset.h> 11#include <linux/device.h> 12#include <linux/export.h> 13#include <linux/kernel.h> 14#include <linux/sched.h> 15#include <linux/delay.h> 16#include <linux/init.h> 17#include <linux/slab.h> 18#include <linux/cpu.h> 19#include <linux/smp.h> 20#include <linux/mm.h> 21#include <linux/nodemask.h> 22#include <linux/node.h> 23#include <asm/sysinfo.h> 24#include <asm/numa.h> 25 26#define PTF_HORIZONTAL (0UL) 27#define PTF_VERTICAL (1UL) 28#define PTF_CHECK (2UL) 29 30struct mask_info { 31 struct mask_info *next; 32 unsigned char id; 33 cpumask_t mask; 34}; 35 36static void set_topology_timer(void); 37static void topology_work_fn(struct work_struct *work); 38static struct sysinfo_15_1_x *tl_info; 39 40static int topology_enabled = 1; 41static DECLARE_WORK(topology_work, topology_work_fn); 42 43/* 44 * Socket/Book linked lists and per_cpu(cpu_topology) updates are 45 * protected by "sched_domains_mutex". 46 */ 47static struct mask_info socket_info; 48static struct mask_info book_info; 49 50DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology); 51EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology); 52 53static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) 54{ 55 cpumask_t mask; 56 57 cpumask_copy(&mask, cpumask_of(cpu)); 58 if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) 59 return mask; 60 for (; info; info = info->next) { 61 if (cpumask_test_cpu(cpu, &info->mask)) 62 return info->mask; 63 } 64 return mask; 65} 66 67static cpumask_t cpu_thread_map(unsigned int cpu) 68{ 69 cpumask_t mask; 70 int i; 71 72 cpumask_copy(&mask, cpumask_of(cpu)); 73 if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) 74 return mask; 75 cpu -= cpu % (smp_cpu_mtid + 1); 76 for (i = 0; i <= smp_cpu_mtid; i++) 77 if (cpu_present(cpu + i)) 78 cpumask_set_cpu(cpu + i, &mask); 79 return mask; 80} 81 82static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core, 83 struct mask_info *book, 84 struct mask_info *socket, 85 int one_socket_per_cpu) 86{ 87 struct cpu_topology_s390 *topo; 88 unsigned int core; 89 90 for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) { 91 unsigned int rcore; 92 int lcpu, i; 93 94 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 95 lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 96 if (lcpu < 0) 97 continue; 98 for (i = 0; i <= smp_cpu_mtid; i++) { 99 topo = &per_cpu(cpu_topology, lcpu + i); 100 topo->book_id = book->id; 101 topo->core_id = rcore; 102 topo->thread_id = lcpu + i; 103 cpumask_set_cpu(lcpu + i, &book->mask); 104 cpumask_set_cpu(lcpu + i, &socket->mask); 105 if (one_socket_per_cpu) 106 topo->socket_id = rcore; 107 else 108 topo->socket_id = socket->id; 109 smp_cpu_set_polarization(lcpu + i, tl_core->pp); 110 } 111 if (one_socket_per_cpu) 112 socket = socket->next; 113 } 114 return socket; 115} 116 117static void clear_masks(void) 118{ 119 struct mask_info *info; 120 121 info = &socket_info; 122 while (info) { 123 cpumask_clear(&info->mask); 124 info = info->next; 125 } 126 info = &book_info; 127 while (info) { 128 cpumask_clear(&info->mask); 129 info = info->next; 130 } 131} 132 133static union topology_entry *next_tle(union topology_entry *tle) 134{ 135 if (!tle->nl) 136 return (union topology_entry *)((struct topology_core *)tle + 1); 137 return (union topology_entry *)((struct topology_container *)tle + 1); 138} 139 140static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) 141{ 142 struct mask_info *socket = &socket_info; 143 struct mask_info *book = &book_info; 144 union topology_entry *tle, *end; 145 146 tle = info->tle; 147 end = (union topology_entry *)((unsigned long)info + info->length); 148 while (tle < end) { 149 switch (tle->nl) { 150 case 2: 151 book = book->next; 152 book->id = tle->container.id; 153 break; 154 case 1: 155 socket = socket->next; 156 socket->id = tle->container.id; 157 break; 158 case 0: 159 add_cpus_to_mask(&tle->cpu, book, socket, 0); 160 break; 161 default: 162 clear_masks(); 163 return; 164 } 165 tle = next_tle(tle); 166 } 167} 168 169static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) 170{ 171 struct mask_info *socket = &socket_info; 172 struct mask_info *book = &book_info; 173 union topology_entry *tle, *end; 174 175 tle = info->tle; 176 end = (union topology_entry *)((unsigned long)info + info->length); 177 while (tle < end) { 178 switch (tle->nl) { 179 case 1: 180 book = book->next; 181 book->id = tle->container.id; 182 break; 183 case 0: 184 socket = add_cpus_to_mask(&tle->cpu, book, socket, 1); 185 break; 186 default: 187 clear_masks(); 188 return; 189 } 190 tle = next_tle(tle); 191 } 192} 193 194static void tl_to_masks(struct sysinfo_15_1_x *info) 195{ 196 struct cpuid cpu_id; 197 198 get_cpu_id(&cpu_id); 199 clear_masks(); 200 switch (cpu_id.machine) { 201 case 0x2097: 202 case 0x2098: 203 __tl_to_masks_z10(info); 204 break; 205 default: 206 __tl_to_masks_generic(info); 207 } 208} 209 210static void topology_update_polarization_simple(void) 211{ 212 int cpu; 213 214 mutex_lock(&smp_cpu_state_mutex); 215 for_each_possible_cpu(cpu) 216 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 217 mutex_unlock(&smp_cpu_state_mutex); 218} 219 220static int ptf(unsigned long fc) 221{ 222 int rc; 223 224 asm volatile( 225 " .insn rre,0xb9a20000,%1,%1\n" 226 " ipm %0\n" 227 " srl %0,28\n" 228 : "=d" (rc) 229 : "d" (fc) : "cc"); 230 return rc; 231} 232 233int topology_set_cpu_management(int fc) 234{ 235 int cpu, rc; 236 237 if (!MACHINE_HAS_TOPOLOGY) 238 return -EOPNOTSUPP; 239 if (fc) 240 rc = ptf(PTF_VERTICAL); 241 else 242 rc = ptf(PTF_HORIZONTAL); 243 if (rc) 244 return -EBUSY; 245 for_each_possible_cpu(cpu) 246 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 247 return rc; 248} 249 250static void update_cpu_masks(void) 251{ 252 struct cpu_topology_s390 *topo; 253 int cpu; 254 255 for_each_possible_cpu(cpu) { 256 topo = &per_cpu(cpu_topology, cpu); 257 topo->thread_mask = cpu_thread_map(cpu); 258 topo->core_mask = cpu_group_map(&socket_info, cpu); 259 topo->book_mask = cpu_group_map(&book_info, cpu); 260 if (!MACHINE_HAS_TOPOLOGY) { 261 topo->thread_id = cpu; 262 topo->core_id = cpu; 263 topo->socket_id = cpu; 264 topo->book_id = cpu; 265 } 266 } 267 numa_update_cpu_topology(); 268} 269 270void store_topology(struct sysinfo_15_1_x *info) 271{ 272 if (topology_max_mnest >= 3) 273 stsi(info, 15, 1, 3); 274 else 275 stsi(info, 15, 1, 2); 276} 277 278int arch_update_cpu_topology(void) 279{ 280 struct sysinfo_15_1_x *info = tl_info; 281 struct device *dev; 282 int cpu, rc = 0; 283 284 if (MACHINE_HAS_TOPOLOGY) { 285 rc = 1; 286 store_topology(info); 287 tl_to_masks(info); 288 } 289 update_cpu_masks(); 290 if (!MACHINE_HAS_TOPOLOGY) 291 topology_update_polarization_simple(); 292 for_each_online_cpu(cpu) { 293 dev = get_cpu_device(cpu); 294 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 295 } 296 return rc; 297} 298 299static void topology_work_fn(struct work_struct *work) 300{ 301 rebuild_sched_domains(); 302} 303 304void topology_schedule_update(void) 305{ 306 schedule_work(&topology_work); 307} 308 309static void topology_timer_fn(unsigned long ignored) 310{ 311 if (ptf(PTF_CHECK)) 312 topology_schedule_update(); 313 set_topology_timer(); 314} 315 316static struct timer_list topology_timer = 317 TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); 318 319static atomic_t topology_poll = ATOMIC_INIT(0); 320 321static void set_topology_timer(void) 322{ 323 if (atomic_add_unless(&topology_poll, -1, 0)) 324 mod_timer(&topology_timer, jiffies + HZ / 10); 325 else 326 mod_timer(&topology_timer, jiffies + HZ * 60); 327} 328 329void topology_expect_change(void) 330{ 331 if (!MACHINE_HAS_TOPOLOGY) 332 return; 333 /* This is racy, but it doesn't matter since it is just a heuristic. 334 * Worst case is that we poll in a higher frequency for a bit longer. 335 */ 336 if (atomic_read(&topology_poll) > 60) 337 return; 338 atomic_add(60, &topology_poll); 339 set_topology_timer(); 340} 341 342static int cpu_management; 343 344static ssize_t dispatching_show(struct device *dev, 345 struct device_attribute *attr, 346 char *buf) 347{ 348 ssize_t count; 349 350 mutex_lock(&smp_cpu_state_mutex); 351 count = sprintf(buf, "%d\n", cpu_management); 352 mutex_unlock(&smp_cpu_state_mutex); 353 return count; 354} 355 356static ssize_t dispatching_store(struct device *dev, 357 struct device_attribute *attr, 358 const char *buf, 359 size_t count) 360{ 361 int val, rc; 362 char delim; 363 364 if (sscanf(buf, "%d %c", &val, &delim) != 1) 365 return -EINVAL; 366 if (val != 0 && val != 1) 367 return -EINVAL; 368 rc = 0; 369 get_online_cpus(); 370 mutex_lock(&smp_cpu_state_mutex); 371 if (cpu_management == val) 372 goto out; 373 rc = topology_set_cpu_management(val); 374 if (rc) 375 goto out; 376 cpu_management = val; 377 topology_expect_change(); 378out: 379 mutex_unlock(&smp_cpu_state_mutex); 380 put_online_cpus(); 381 return rc ? rc : count; 382} 383static DEVICE_ATTR(dispatching, 0644, dispatching_show, 384 dispatching_store); 385 386static ssize_t cpu_polarization_show(struct device *dev, 387 struct device_attribute *attr, char *buf) 388{ 389 int cpu = dev->id; 390 ssize_t count; 391 392 mutex_lock(&smp_cpu_state_mutex); 393 switch (smp_cpu_get_polarization(cpu)) { 394 case POLARIZATION_HRZ: 395 count = sprintf(buf, "horizontal\n"); 396 break; 397 case POLARIZATION_VL: 398 count = sprintf(buf, "vertical:low\n"); 399 break; 400 case POLARIZATION_VM: 401 count = sprintf(buf, "vertical:medium\n"); 402 break; 403 case POLARIZATION_VH: 404 count = sprintf(buf, "vertical:high\n"); 405 break; 406 default: 407 count = sprintf(buf, "unknown\n"); 408 break; 409 } 410 mutex_unlock(&smp_cpu_state_mutex); 411 return count; 412} 413static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 414 415static struct attribute *topology_cpu_attrs[] = { 416 &dev_attr_polarization.attr, 417 NULL, 418}; 419 420static struct attribute_group topology_cpu_attr_group = { 421 .attrs = topology_cpu_attrs, 422}; 423 424int topology_cpu_init(struct cpu *cpu) 425{ 426 return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 427} 428 429static const struct cpumask *cpu_thread_mask(int cpu) 430{ 431 return &per_cpu(cpu_topology, cpu).thread_mask; 432} 433 434 435const struct cpumask *cpu_coregroup_mask(int cpu) 436{ 437 return &per_cpu(cpu_topology, cpu).core_mask; 438} 439 440static const struct cpumask *cpu_book_mask(int cpu) 441{ 442 return &per_cpu(cpu_topology, cpu).book_mask; 443} 444 445static int __init early_parse_topology(char *p) 446{ 447 if (strncmp(p, "off", 3)) 448 return 0; 449 topology_enabled = 0; 450 return 0; 451} 452early_param("topology", early_parse_topology); 453 454static struct sched_domain_topology_level s390_topology[] = { 455 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 456 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 457 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 458 { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 459 { NULL, }, 460}; 461 462static void __init alloc_masks(struct sysinfo_15_1_x *info, 463 struct mask_info *mask, int offset) 464{ 465 int i, nr_masks; 466 467 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 468 for (i = 0; i < info->mnest - offset; i++) 469 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 470 nr_masks = max(nr_masks, 1); 471 for (i = 0; i < nr_masks; i++) { 472 mask->next = kzalloc(sizeof(*mask->next), GFP_KERNEL); 473 mask = mask->next; 474 } 475} 476 477static int __init s390_topology_init(void) 478{ 479 struct sysinfo_15_1_x *info; 480 int i; 481 482 if (!MACHINE_HAS_TOPOLOGY) 483 return 0; 484 tl_info = (struct sysinfo_15_1_x *)__get_free_page(GFP_KERNEL); 485 info = tl_info; 486 store_topology(info); 487 pr_info("The CPU configuration topology of the machine is:"); 488 for (i = 0; i < TOPOLOGY_NR_MAG; i++) 489 printk(KERN_CONT " %d", info->mag[i]); 490 printk(KERN_CONT " / %d\n", info->mnest); 491 alloc_masks(info, &socket_info, 1); 492 alloc_masks(info, &book_info, 2); 493 set_sched_topology(s390_topology); 494 return 0; 495} 496early_initcall(s390_topology_init); 497 498static int __init topology_init(void) 499{ 500 if (MACHINE_HAS_TOPOLOGY) 501 set_topology_timer(); 502 else 503 topology_update_polarization_simple(); 504 return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); 505} 506device_initcall(topology_init); 507