root/arch/x86/xen/smp_pv.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. cpu_bringup
  2. cpu_bringup_and_idle
  3. xen_smp_intr_free_pv
  4. xen_smp_intr_init_pv
  5. xen_fill_possible_map
  6. xen_filter_cpu_maps
  7. xen_pv_smp_prepare_boot_cpu
  8. xen_pv_smp_prepare_cpus
  9. cpu_initialize_context
  10. xen_pv_cpu_up
  11. xen_pv_cpu_disable
  12. xen_pv_cpu_die
  13. xen_pv_play_dead
  14. xen_pv_cpu_disable
  15. xen_pv_cpu_die
  16. xen_pv_play_dead
  17. stop_self
  18. xen_pv_stop_other_cpus
  19. xen_irq_work_interrupt
  20. xen_smp_init

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Xen SMP support
   4  *
   5  * This file implements the Xen versions of smp_ops.  SMP under Xen is
   6  * very straightforward.  Bringing a CPU up is simply a matter of
   7  * loading its initial context and setting it running.
   8  *
   9  * IPIs are handled through the Xen event mechanism.
  10  *
  11  * Because virtual CPUs can be scheduled onto any real CPU, there's no
  12  * useful topology information for the kernel to make use of.  As a
  13  * result, all CPUs are treated as if they're single-core and
  14  * single-threaded.
  15  */
  16 #include <linux/sched.h>
  17 #include <linux/sched/task_stack.h>
  18 #include <linux/err.h>
  19 #include <linux/slab.h>
  20 #include <linux/smp.h>
  21 #include <linux/irq_work.h>
  22 #include <linux/tick.h>
  23 #include <linux/nmi.h>
  24 #include <linux/cpuhotplug.h>
  25 #include <linux/stackprotector.h>
  26 
  27 #include <asm/paravirt.h>
  28 #include <asm/desc.h>
  29 #include <asm/pgtable.h>
  30 #include <asm/cpu.h>
  31 
  32 #include <xen/interface/xen.h>
  33 #include <xen/interface/vcpu.h>
  34 #include <xen/interface/xenpmu.h>
  35 
  36 #include <asm/spec-ctrl.h>
  37 #include <asm/xen/interface.h>
  38 #include <asm/xen/hypercall.h>
  39 
  40 #include <xen/xen.h>
  41 #include <xen/page.h>
  42 #include <xen/events.h>
  43 
  44 #include <xen/hvc-console.h>
  45 #include "xen-ops.h"
  46 #include "mmu.h"
  47 #include "smp.h"
  48 #include "pmu.h"
  49 
  50 cpumask_var_t xen_cpu_initialized_map;
  51 
  52 static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
  53 static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
  54 
  55 static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id);
  56 
  57 static void cpu_bringup(void)
  58 {
  59         int cpu;
  60 
  61         cr4_init();
  62         cpu_init();
  63         touch_softlockup_watchdog();
  64         preempt_disable();
  65 
  66         /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
  67         if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
  68                 xen_enable_sysenter();
  69                 xen_enable_syscall();
  70         }
  71         cpu = smp_processor_id();
  72         smp_store_cpu_info(cpu);
  73         cpu_data(cpu).x86_max_cores = 1;
  74         set_cpu_sibling_map(cpu);
  75 
  76         speculative_store_bypass_ht_init();
  77 
  78         xen_setup_cpu_clockevents();
  79 
  80         notify_cpu_starting(cpu);
  81 
  82         set_cpu_online(cpu, true);
  83 
  84         cpu_set_state_online(cpu);  /* Implies full memory barrier. */
  85 
  86         /* We can take interrupts now: we're officially "up". */
  87         local_irq_enable();
  88 }
  89 
  90 asmlinkage __visible void cpu_bringup_and_idle(void)
  91 {
  92         cpu_bringup();
  93         boot_init_stack_canary();
  94         cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
  95         prevent_tail_call_optimization();
  96 }
  97 
  98 void xen_smp_intr_free_pv(unsigned int cpu)
  99 {
 100         if (per_cpu(xen_irq_work, cpu).irq >= 0) {
 101                 unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL);
 102                 per_cpu(xen_irq_work, cpu).irq = -1;
 103                 kfree(per_cpu(xen_irq_work, cpu).name);
 104                 per_cpu(xen_irq_work, cpu).name = NULL;
 105         }
 106 
 107         if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
 108                 unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
 109                 per_cpu(xen_pmu_irq, cpu).irq = -1;
 110                 kfree(per_cpu(xen_pmu_irq, cpu).name);
 111                 per_cpu(xen_pmu_irq, cpu).name = NULL;
 112         }
 113 }
 114 
 115 int xen_smp_intr_init_pv(unsigned int cpu)
 116 {
 117         int rc;
 118         char *callfunc_name, *pmu_name;
 119 
 120         callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
 121         rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
 122                                     cpu,
 123                                     xen_irq_work_interrupt,
 124                                     IRQF_PERCPU|IRQF_NOBALANCING,
 125                                     callfunc_name,
 126                                     NULL);
 127         if (rc < 0)
 128                 goto fail;
 129         per_cpu(xen_irq_work, cpu).irq = rc;
 130         per_cpu(xen_irq_work, cpu).name = callfunc_name;
 131 
 132         if (is_xen_pmu(cpu)) {
 133                 pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
 134                 rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
 135                                              xen_pmu_irq_handler,
 136                                              IRQF_PERCPU|IRQF_NOBALANCING,
 137                                              pmu_name, NULL);
 138                 if (rc < 0)
 139                         goto fail;
 140                 per_cpu(xen_pmu_irq, cpu).irq = rc;
 141                 per_cpu(xen_pmu_irq, cpu).name = pmu_name;
 142         }
 143 
 144         return 0;
 145 
 146  fail:
 147         xen_smp_intr_free_pv(cpu);
 148         return rc;
 149 }
 150 
 151 static void __init xen_fill_possible_map(void)
 152 {
 153         int i, rc;
 154 
 155         if (xen_initial_domain())
 156                 return;
 157 
 158         for (i = 0; i < nr_cpu_ids; i++) {
 159                 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 160                 if (rc >= 0) {
 161                         num_processors++;
 162                         set_cpu_possible(i, true);
 163                 }
 164         }
 165 }
 166 
 167 static void __init xen_filter_cpu_maps(void)
 168 {
 169         int i, rc;
 170         unsigned int subtract = 0;
 171 
 172         if (!xen_initial_domain())
 173                 return;
 174 
 175         num_processors = 0;
 176         disabled_cpus = 0;
 177         for (i = 0; i < nr_cpu_ids; i++) {
 178                 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 179                 if (rc >= 0) {
 180                         num_processors++;
 181                         set_cpu_possible(i, true);
 182                 } else {
 183                         set_cpu_possible(i, false);
 184                         set_cpu_present(i, false);
 185                         subtract++;
 186                 }
 187         }
 188 #ifdef CONFIG_HOTPLUG_CPU
 189         /* This is akin to using 'nr_cpus' on the Linux command line.
 190          * Which is OK as when we use 'dom0_max_vcpus=X' we can only
 191          * have up to X, while nr_cpu_ids is greater than X. This
 192          * normally is not a problem, except when CPU hotplugging
 193          * is involved and then there might be more than X CPUs
 194          * in the guest - which will not work as there is no
 195          * hypercall to expand the max number of VCPUs an already
 196          * running guest has. So cap it up to X. */
 197         if (subtract)
 198                 nr_cpu_ids = nr_cpu_ids - subtract;
 199 #endif
 200 
 201 }
 202 
 203 static void __init xen_pv_smp_prepare_boot_cpu(void)
 204 {
 205         BUG_ON(smp_processor_id() != 0);
 206         native_smp_prepare_boot_cpu();
 207 
 208         if (!xen_feature(XENFEAT_writable_page_tables))
 209                 /* We've switched to the "real" per-cpu gdt, so make
 210                  * sure the old memory can be recycled. */
 211                 make_lowmem_page_readwrite(xen_initial_gdt);
 212 
 213 #ifdef CONFIG_X86_32
 214         /*
 215          * Xen starts us with XEN_FLAT_RING1_DS, but linux code
 216          * expects __USER_DS
 217          */
 218         loadsegment(ds, __USER_DS);
 219         loadsegment(es, __USER_DS);
 220 #endif
 221 
 222         xen_filter_cpu_maps();
 223         xen_setup_vcpu_info_placement();
 224 
 225         /*
 226          * The alternative logic (which patches the unlock/lock) runs before
 227          * the smp bootup up code is activated. Hence we need to set this up
 228          * the core kernel is being patched. Otherwise we will have only
 229          * modules patched but not core code.
 230          */
 231         xen_init_spinlocks();
 232 }
 233 
 234 static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
 235 {
 236         unsigned cpu;
 237         unsigned int i;
 238 
 239         if (skip_ioapic_setup) {
 240                 char *m = (max_cpus == 0) ?
 241                         "The nosmp parameter is incompatible with Xen; " \
 242                         "use Xen dom0_max_vcpus=1 parameter" :
 243                         "The noapic parameter is incompatible with Xen";
 244 
 245                 xen_raw_printk(m);
 246                 panic(m);
 247         }
 248         xen_init_lock_cpu(0);
 249 
 250         smp_store_boot_cpu_info();
 251         cpu_data(0).x86_max_cores = 1;
 252 
 253         for_each_possible_cpu(i) {
 254                 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
 255                 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
 256                 zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
 257                 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
 258         }
 259         set_cpu_sibling_map(0);
 260 
 261         speculative_store_bypass_ht_init();
 262 
 263         xen_pmu_init(0);
 264 
 265         if (xen_smp_intr_init(0) || xen_smp_intr_init_pv(0))
 266                 BUG();
 267 
 268         if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
 269                 panic("could not allocate xen_cpu_initialized_map\n");
 270 
 271         cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
 272 
 273         /* Restrict the possible_map according to max_cpus. */
 274         while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
 275                 for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
 276                         continue;
 277                 set_cpu_possible(cpu, false);
 278         }
 279 
 280         for_each_possible_cpu(cpu)
 281                 set_cpu_present(cpu, true);
 282 }
 283 
 284 static int
 285 cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 286 {
 287         struct vcpu_guest_context *ctxt;
 288         struct desc_struct *gdt;
 289         unsigned long gdt_mfn;
 290 
 291         /* used to tell cpu_init() that it can proceed with initialization */
 292         cpumask_set_cpu(cpu, cpu_callout_mask);
 293         if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
 294                 return 0;
 295 
 296         ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 297         if (ctxt == NULL)
 298                 return -ENOMEM;
 299 
 300         gdt = get_cpu_gdt_rw(cpu);
 301 
 302 #ifdef CONFIG_X86_32
 303         ctxt->user_regs.fs = __KERNEL_PERCPU;
 304         ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
 305 #endif
 306         memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
 307 
 308         /*
 309          * Bring up the CPU in cpu_bringup_and_idle() with the stack
 310          * pointing just below where pt_regs would be if it were a normal
 311          * kernel entry.
 312          */
 313         ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
 314         ctxt->flags = VGCF_IN_KERNEL;
 315         ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
 316         ctxt->user_regs.ds = __USER_DS;
 317         ctxt->user_regs.es = __USER_DS;
 318         ctxt->user_regs.ss = __KERNEL_DS;
 319         ctxt->user_regs.cs = __KERNEL_CS;
 320         ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
 321 
 322         xen_copy_trap_info(ctxt->trap_ctxt);
 323 
 324         ctxt->ldt_ents = 0;
 325 
 326         BUG_ON((unsigned long)gdt & ~PAGE_MASK);
 327 
 328         gdt_mfn = arbitrary_virt_to_mfn(gdt);
 329         make_lowmem_page_readonly(gdt);
 330         make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
 331 
 332         ctxt->gdt_frames[0] = gdt_mfn;
 333         ctxt->gdt_ents      = GDT_ENTRIES;
 334 
 335         /*
 336          * Set SS:SP that Xen will use when entering guest kernel mode
 337          * from guest user mode.  Subsequent calls to load_sp0() can
 338          * change this value.
 339          */
 340         ctxt->kernel_ss = __KERNEL_DS;
 341         ctxt->kernel_sp = task_top_of_stack(idle);
 342 
 343 #ifdef CONFIG_X86_32
 344         ctxt->event_callback_cs     = __KERNEL_CS;
 345         ctxt->failsafe_callback_cs  = __KERNEL_CS;
 346 #else
 347         ctxt->gs_base_kernel = per_cpu_offset(cpu);
 348 #endif
 349         ctxt->event_callback_eip    =
 350                 (unsigned long)xen_hypervisor_callback;
 351         ctxt->failsafe_callback_eip =
 352                 (unsigned long)xen_failsafe_callback;
 353         per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
 354 
 355         ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
 356         if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
 357                 BUG();
 358 
 359         kfree(ctxt);
 360         return 0;
 361 }
 362 
 363 static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
 364 {
 365         int rc;
 366 
 367         rc = common_cpu_up(cpu, idle);
 368         if (rc)
 369                 return rc;
 370 
 371         xen_setup_runstate_info(cpu);
 372 
 373         /*
 374          * PV VCPUs are always successfully taken down (see 'while' loop
 375          * in xen_cpu_die()), so -EBUSY is an error.
 376          */
 377         rc = cpu_check_up_prepare(cpu);
 378         if (rc)
 379                 return rc;
 380 
 381         /* make sure interrupts start blocked */
 382         per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
 383 
 384         rc = cpu_initialize_context(cpu, idle);
 385         if (rc)
 386                 return rc;
 387 
 388         xen_pmu_init(cpu);
 389 
 390         rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL);
 391         BUG_ON(rc);
 392 
 393         while (cpu_report_state(cpu) != CPU_ONLINE)
 394                 HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
 395 
 396         return 0;
 397 }
 398 
 399 #ifdef CONFIG_HOTPLUG_CPU
 400 static int xen_pv_cpu_disable(void)
 401 {
 402         unsigned int cpu = smp_processor_id();
 403         if (cpu == 0)
 404                 return -EBUSY;
 405 
 406         cpu_disable_common();
 407 
 408         load_cr3(swapper_pg_dir);
 409         return 0;
 410 }
 411 
 412 static void xen_pv_cpu_die(unsigned int cpu)
 413 {
 414         while (HYPERVISOR_vcpu_op(VCPUOP_is_up,
 415                                   xen_vcpu_nr(cpu), NULL)) {
 416                 __set_current_state(TASK_UNINTERRUPTIBLE);
 417                 schedule_timeout(HZ/10);
 418         }
 419 
 420         if (common_cpu_die(cpu) == 0) {
 421                 xen_smp_intr_free(cpu);
 422                 xen_uninit_lock_cpu(cpu);
 423                 xen_teardown_timer(cpu);
 424                 xen_pmu_finish(cpu);
 425         }
 426 }
 427 
 428 static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
 429 {
 430         play_dead_common();
 431         HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL);
 432         cpu_bringup();
 433         /*
 434          * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
 435          * clears certain data that the cpu_idle loop (which called us
 436          * and that we return from) expects. The only way to get that
 437          * data back is to call:
 438          */
 439         tick_nohz_idle_enter();
 440         tick_nohz_idle_stop_tick_protected();
 441 
 442         cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE);
 443 }
 444 
 445 #else /* !CONFIG_HOTPLUG_CPU */
 446 static int xen_pv_cpu_disable(void)
 447 {
 448         return -ENOSYS;
 449 }
 450 
 451 static void xen_pv_cpu_die(unsigned int cpu)
 452 {
 453         BUG();
 454 }
 455 
 456 static void xen_pv_play_dead(void)
 457 {
 458         BUG();
 459 }
 460 
 461 #endif
 462 static void stop_self(void *v)
 463 {
 464         int cpu = smp_processor_id();
 465 
 466         /* make sure we're not pinning something down */
 467         load_cr3(swapper_pg_dir);
 468         /* should set up a minimal gdt */
 469 
 470         set_cpu_online(cpu, false);
 471 
 472         HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL);
 473         BUG();
 474 }
 475 
 476 static void xen_pv_stop_other_cpus(int wait)
 477 {
 478         smp_call_function(stop_self, NULL, wait);
 479 }
 480 
 481 static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
 482 {
 483         irq_enter();
 484         irq_work_run();
 485         inc_irq_stat(apic_irq_work_irqs);
 486         irq_exit();
 487 
 488         return IRQ_HANDLED;
 489 }
 490 
 491 static const struct smp_ops xen_smp_ops __initconst = {
 492         .smp_prepare_boot_cpu = xen_pv_smp_prepare_boot_cpu,
 493         .smp_prepare_cpus = xen_pv_smp_prepare_cpus,
 494         .smp_cpus_done = xen_smp_cpus_done,
 495 
 496         .cpu_up = xen_pv_cpu_up,
 497         .cpu_die = xen_pv_cpu_die,
 498         .cpu_disable = xen_pv_cpu_disable,
 499         .play_dead = xen_pv_play_dead,
 500 
 501         .stop_other_cpus = xen_pv_stop_other_cpus,
 502         .smp_send_reschedule = xen_smp_send_reschedule,
 503 
 504         .send_call_func_ipi = xen_smp_send_call_function_ipi,
 505         .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
 506 };
 507 
 508 void __init xen_smp_init(void)
 509 {
 510         smp_ops = xen_smp_ops;
 511         xen_fill_possible_map();
 512 }

/* [<][>][^][v][top][bottom][index][help] */