root/arch/powerpc/kernel/machine_kexec_64.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. default_machine_kexec_prepare
  2. copy_segments
  3. kexec_copy_flush
  4. kexec_smp_down
  5. kexec_prepare_cpus_wait
  6. wake_offline_cpus
  7. kexec_prepare_cpus
  8. kexec_prepare_cpus
  9. kexec_sequence
  10. export_htab_values

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * PPC64 code to handle Linux booting another kernel.
   4  *
   5  * Copyright (C) 2004-2005, IBM Corp.
   6  *
   7  * Created by: Milton D Miller II
   8  */
   9 
  10 
  11 #include <linux/kexec.h>
  12 #include <linux/smp.h>
  13 #include <linux/thread_info.h>
  14 #include <linux/init_task.h>
  15 #include <linux/errno.h>
  16 #include <linux/kernel.h>
  17 #include <linux/cpu.h>
  18 #include <linux/hardirq.h>
  19 
  20 #include <asm/page.h>
  21 #include <asm/current.h>
  22 #include <asm/machdep.h>
  23 #include <asm/cacheflush.h>
  24 #include <asm/firmware.h>
  25 #include <asm/paca.h>
  26 #include <asm/mmu.h>
  27 #include <asm/sections.h>       /* _end */
  28 #include <asm/prom.h>
  29 #include <asm/smp.h>
  30 #include <asm/hw_breakpoint.h>
  31 #include <asm/asm-prototypes.h>
  32 #include <asm/svm.h>
  33 #include <asm/ultravisor.h>
  34 
  35 int default_machine_kexec_prepare(struct kimage *image)
  36 {
  37         int i;
  38         unsigned long begin, end;       /* limits of segment */
  39         unsigned long low, high;        /* limits of blocked memory range */
  40         struct device_node *node;
  41         const unsigned long *basep;
  42         const unsigned int *sizep;
  43 
  44         /*
  45          * Since we use the kernel fault handlers and paging code to
  46          * handle the virtual mode, we must make sure no destination
  47          * overlaps kernel static data or bss.
  48          */
  49         for (i = 0; i < image->nr_segments; i++)
  50                 if (image->segment[i].mem < __pa(_end))
  51                         return -ETXTBSY;
  52 
  53         /* We also should not overwrite the tce tables */
  54         for_each_node_by_type(node, "pci") {
  55                 basep = of_get_property(node, "linux,tce-base", NULL);
  56                 sizep = of_get_property(node, "linux,tce-size", NULL);
  57                 if (basep == NULL || sizep == NULL)
  58                         continue;
  59 
  60                 low = *basep;
  61                 high = low + (*sizep);
  62 
  63                 for (i = 0; i < image->nr_segments; i++) {
  64                         begin = image->segment[i].mem;
  65                         end = begin + image->segment[i].memsz;
  66 
  67                         if ((begin < high) && (end > low))
  68                                 return -ETXTBSY;
  69                 }
  70         }
  71 
  72         return 0;
  73 }
  74 
  75 static void copy_segments(unsigned long ind)
  76 {
  77         unsigned long entry;
  78         unsigned long *ptr;
  79         void *dest;
  80         void *addr;
  81 
  82         /*
  83          * We rely on kexec_load to create a lists that properly
  84          * initializes these pointers before they are used.
  85          * We will still crash if the list is wrong, but at least
  86          * the compiler will be quiet.
  87          */
  88         ptr = NULL;
  89         dest = NULL;
  90 
  91         for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
  92                 addr = __va(entry & PAGE_MASK);
  93 
  94                 switch (entry & IND_FLAGS) {
  95                 case IND_DESTINATION:
  96                         dest = addr;
  97                         break;
  98                 case IND_INDIRECTION:
  99                         ptr = addr;
 100                         break;
 101                 case IND_SOURCE:
 102                         copy_page(dest, addr);
 103                         dest += PAGE_SIZE;
 104                 }
 105         }
 106 }
 107 
 108 void kexec_copy_flush(struct kimage *image)
 109 {
 110         long i, nr_segments = image->nr_segments;
 111         struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
 112 
 113         /* save the ranges on the stack to efficiently flush the icache */
 114         memcpy(ranges, image->segment, sizeof(ranges));
 115 
 116         /*
 117          * After this call we may not use anything allocated in dynamic
 118          * memory, including *image.
 119          *
 120          * Only globals and the stack are allowed.
 121          */
 122         copy_segments(image->head);
 123 
 124         /*
 125          * we need to clear the icache for all dest pages sometime,
 126          * including ones that were in place on the original copy
 127          */
 128         for (i = 0; i < nr_segments; i++)
 129                 flush_icache_range((unsigned long)__va(ranges[i].mem),
 130                         (unsigned long)__va(ranges[i].mem + ranges[i].memsz));
 131 }
 132 
 133 #ifdef CONFIG_SMP
 134 
 135 static int kexec_all_irq_disabled = 0;
 136 
 137 static void kexec_smp_down(void *arg)
 138 {
 139         local_irq_disable();
 140         hard_irq_disable();
 141 
 142         mb(); /* make sure our irqs are disabled before we say they are */
 143         get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
 144         while(kexec_all_irq_disabled == 0)
 145                 cpu_relax();
 146         mb(); /* make sure all irqs are disabled before this */
 147         hw_breakpoint_disable();
 148         /*
 149          * Now every CPU has IRQs off, we can clear out any pending
 150          * IPIs and be sure that no more will come in after this.
 151          */
 152         if (ppc_md.kexec_cpu_down)
 153                 ppc_md.kexec_cpu_down(0, 1);
 154 
 155         kexec_smp_wait();
 156         /* NOTREACHED */
 157 }
 158 
 159 static void kexec_prepare_cpus_wait(int wait_state)
 160 {
 161         int my_cpu, i, notified=-1;
 162 
 163         hw_breakpoint_disable();
 164         my_cpu = get_cpu();
 165         /* Make sure each CPU has at least made it to the state we need.
 166          *
 167          * FIXME: There is a (slim) chance of a problem if not all of the CPUs
 168          * are correctly onlined.  If somehow we start a CPU on boot with RTAS
 169          * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
 170          * time, the boot CPU will timeout.  If it does eventually execute
 171          * stuff, the secondary will start up (paca_ptrs[]->cpu_start was
 172          * written) and get into a peculiar state.
 173          * If the platform supports smp_ops->take_timebase(), the secondary CPU
 174          * will probably be spinning in there.  If not (i.e. pseries), the
 175          * secondary will continue on and try to online itself/idle/etc. If it
 176          * survives that, we need to find these
 177          * possible-but-not-online-but-should-be CPUs and chaperone them into
 178          * kexec_smp_wait().
 179          */
 180         for_each_online_cpu(i) {
 181                 if (i == my_cpu)
 182                         continue;
 183 
 184                 while (paca_ptrs[i]->kexec_state < wait_state) {
 185                         barrier();
 186                         if (i != notified) {
 187                                 printk(KERN_INFO "kexec: waiting for cpu %d "
 188                                        "(physical %d) to enter %i state\n",
 189                                        i, paca_ptrs[i]->hw_cpu_id, wait_state);
 190                                 notified = i;
 191                         }
 192                 }
 193         }
 194         mb();
 195 }
 196 
 197 /*
 198  * We need to make sure each present CPU is online.  The next kernel will scan
 199  * the device tree and assume primary threads are online and query secondary
 200  * threads via RTAS to online them if required.  If we don't online primary
 201  * threads, they will be stuck.  However, we also online secondary threads as we
 202  * may be using 'cede offline'.  In this case RTAS doesn't see the secondary
 203  * threads as offline -- and again, these CPUs will be stuck.
 204  *
 205  * So, we online all CPUs that should be running, including secondary threads.
 206  */
 207 static void wake_offline_cpus(void)
 208 {
 209         int cpu = 0;
 210 
 211         for_each_present_cpu(cpu) {
 212                 if (!cpu_online(cpu)) {
 213                         printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
 214                                cpu);
 215                         WARN_ON(cpu_up(cpu));
 216                 }
 217         }
 218 }
 219 
 220 static void kexec_prepare_cpus(void)
 221 {
 222         wake_offline_cpus();
 223         smp_call_function(kexec_smp_down, NULL, /* wait */0);
 224         local_irq_disable();
 225         hard_irq_disable();
 226 
 227         mb(); /* make sure IRQs are disabled before we say they are */
 228         get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
 229 
 230         kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF);
 231         /* we are sure every CPU has IRQs off at this point */
 232         kexec_all_irq_disabled = 1;
 233 
 234         /*
 235          * Before removing MMU mappings make sure all CPUs have entered real
 236          * mode:
 237          */
 238         kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
 239 
 240         /* after we tell the others to go down */
 241         if (ppc_md.kexec_cpu_down)
 242                 ppc_md.kexec_cpu_down(0, 0);
 243 
 244         put_cpu();
 245 }
 246 
 247 #else /* ! SMP */
 248 
 249 static void kexec_prepare_cpus(void)
 250 {
 251         /*
 252          * move the secondarys to us so that we can copy
 253          * the new kernel 0-0x100 safely
 254          *
 255          * do this if kexec in setup.c ?
 256          *
 257          * We need to release the cpus if we are ever going from an
 258          * UP to an SMP kernel.
 259          */
 260         smp_release_cpus();
 261         if (ppc_md.kexec_cpu_down)
 262                 ppc_md.kexec_cpu_down(0, 0);
 263         local_irq_disable();
 264         hard_irq_disable();
 265 }
 266 
 267 #endif /* SMP */
 268 
 269 /*
 270  * kexec thread structure and stack.
 271  *
 272  * We need to make sure that this is 16384-byte aligned due to the
 273  * way process stacks are handled.  It also must be statically allocated
 274  * or allocated as part of the kimage, because everything else may be
 275  * overwritten when we copy the kexec image.  We piggyback on the
 276  * "init_task" linker section here to statically allocate a stack.
 277  *
 278  * We could use a smaller stack if we don't care about anything using
 279  * current, but that audit has not been performed.
 280  */
 281 static union thread_union kexec_stack __init_task_data =
 282         { };
 283 
 284 /*
 285  * For similar reasons to the stack above, the kexecing CPU needs to be on a
 286  * static PACA; we switch to kexec_paca.
 287  */
 288 struct paca_struct kexec_paca;
 289 
 290 /* Our assembly helper, in misc_64.S */
 291 extern void kexec_sequence(void *newstack, unsigned long start,
 292                            void *image, void *control,
 293                            void (*clear_all)(void),
 294                            bool copy_with_mmu_off) __noreturn;
 295 
 296 /* too late to fail here */
 297 void default_machine_kexec(struct kimage *image)
 298 {
 299         bool copy_with_mmu_off;
 300 
 301         /* prepare control code if any */
 302 
 303         /*
 304         * If the kexec boot is the normal one, need to shutdown other cpus
 305         * into our wait loop and quiesce interrupts.
 306         * Otherwise, in the case of crashed mode (crashing_cpu >= 0),
 307         * stopping other CPUs and collecting their pt_regs is done before
 308         * using debugger IPI.
 309         */
 310 
 311         if (!kdump_in_progress())
 312                 kexec_prepare_cpus();
 313 
 314         printk("kexec: Starting switchover sequence.\n");
 315 
 316         /* switch to a staticly allocated stack.  Based on irq stack code.
 317          * We setup preempt_count to avoid using VMX in memcpy.
 318          * XXX: the task struct will likely be invalid once we do the copy!
 319          */
 320         current_thread_info()->flags = 0;
 321         current_thread_info()->preempt_count = HARDIRQ_OFFSET;
 322 
 323         /* We need a static PACA, too; copy this CPU's PACA over and switch to
 324          * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
 325          * non-static data.
 326          */
 327         memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
 328         kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
 329 #ifdef CONFIG_PPC_PSERIES
 330         kexec_paca.lppaca_ptr = NULL;
 331 #endif
 332 
 333         if (is_secure_guest() && !(image->preserve_context ||
 334                                    image->type == KEXEC_TYPE_CRASH)) {
 335                 uv_unshare_all_pages();
 336                 printk("kexec: Unshared all shared pages.\n");
 337         }
 338 
 339         paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
 340 
 341         setup_paca(&kexec_paca);
 342 
 343         /*
 344          * The lppaca should be unregistered at this point so the HV won't
 345          * touch it. In the case of a crash, none of the lppacas are
 346          * unregistered so there is not much we can do about it here.
 347          */
 348 
 349         /*
 350          * On Book3S, the copy must happen with the MMU off if we are either
 351          * using Radix page tables or we are not in an LPAR since we can
 352          * overwrite the page tables while copying.
 353          *
 354          * In an LPAR, we keep the MMU on otherwise we can't access beyond
 355          * the RMA. On BookE there is no real MMU off mode, so we have to
 356          * keep it enabled as well (but then we have bolted TLB entries).
 357          */
 358 #ifdef CONFIG_PPC_BOOK3E
 359         copy_with_mmu_off = false;
 360 #else
 361         copy_with_mmu_off = radix_enabled() ||
 362                 !(firmware_has_feature(FW_FEATURE_LPAR) ||
 363                   firmware_has_feature(FW_FEATURE_PS3_LV1));
 364 #endif
 365 
 366         /* Some things are best done in assembly.  Finding globals with
 367          * a toc is easier in C, so pass in what we can.
 368          */
 369         kexec_sequence(&kexec_stack, image->start, image,
 370                        page_address(image->control_code_page),
 371                        mmu_cleanup_all, copy_with_mmu_off);
 372         /* NOTREACHED */
 373 }
 374 
 375 #ifdef CONFIG_PPC_BOOK3S_64
 376 /* Values we need to export to the second kernel via the device tree. */
 377 static unsigned long htab_base;
 378 static unsigned long htab_size;
 379 
 380 static struct property htab_base_prop = {
 381         .name = "linux,htab-base",
 382         .length = sizeof(unsigned long),
 383         .value = &htab_base,
 384 };
 385 
 386 static struct property htab_size_prop = {
 387         .name = "linux,htab-size",
 388         .length = sizeof(unsigned long),
 389         .value = &htab_size,
 390 };
 391 
 392 static int __init export_htab_values(void)
 393 {
 394         struct device_node *node;
 395 
 396         /* On machines with no htab htab_address is NULL */
 397         if (!htab_address)
 398                 return -ENODEV;
 399 
 400         node = of_find_node_by_path("/chosen");
 401         if (!node)
 402                 return -ENODEV;
 403 
 404         /* remove any stale propertys so ours can be found */
 405         of_remove_property(node, of_find_property(node, htab_base_prop.name, NULL));
 406         of_remove_property(node, of_find_property(node, htab_size_prop.name, NULL));
 407 
 408         htab_base = cpu_to_be64(__pa(htab_address));
 409         of_add_property(node, &htab_base_prop);
 410         htab_size = cpu_to_be64(htab_size_bytes);
 411         of_add_property(node, &htab_size_prop);
 412 
 413         of_node_put(node);
 414         return 0;
 415 }
 416 late_initcall(export_htab_values);
 417 #endif /* CONFIG_PPC_BOOK3S_64 */

/* [<][>][^][v][top][bottom][index][help] */