root/drivers/base/memory.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. base_memory_block_id
  2. pfn_to_block_id
  3. phys_to_block_id
  4. register_memory_notifier
  5. unregister_memory_notifier
  6. register_memory_isolate_notifier
  7. unregister_memory_isolate_notifier
  8. memory_block_release
  9. memory_block_size_bytes
  10. phys_index_show
  11. removable_show
  12. state_show
  13. memory_notify
  14. memory_isolate_notify
  15. pages_correctly_probed
  16. memory_block_action
  17. memory_block_change_state
  18. memory_subsys_online
  19. memory_subsys_offline
  20. state_store
  21. phys_device_show
  22. print_allowed_zone
  23. valid_zones_show
  24. block_size_bytes_show
  25. auto_online_blocks_show
  26. auto_online_blocks_store
  27. probe_store
  28. soft_offline_page_store
  29. hard_offline_page_store
  30. arch_get_memory_phys_device
  31. find_memory_block_by_id
  32. find_memory_block
  33. register_memory
  34. init_memory_block
  35. add_memory_block
  36. unregister_memory
  37. create_memory_block_devices
  38. remove_memory_block_devices
  39. is_memblock_offlined
  40. memory_dev_init
  41. walk_memory_blocks
  42. for_each_memory_block_cb
  43. for_each_memory_block

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Memory subsystem support
   4  *
   5  * Written by Matt Tolentino <matthew.e.tolentino@intel.com>
   6  *            Dave Hansen <haveblue@us.ibm.com>
   7  *
   8  * This file provides the necessary infrastructure to represent
   9  * a SPARSEMEM-memory-model system's physical memory in /sysfs.
  10  * All arch-independent code that assumes MEMORY_HOTPLUG requires
  11  * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.
  12  */
  13 
  14 #include <linux/module.h>
  15 #include <linux/init.h>
  16 #include <linux/topology.h>
  17 #include <linux/capability.h>
  18 #include <linux/device.h>
  19 #include <linux/memory.h>
  20 #include <linux/memory_hotplug.h>
  21 #include <linux/mm.h>
  22 #include <linux/mutex.h>
  23 #include <linux/stat.h>
  24 #include <linux/slab.h>
  25 
  26 #include <linux/atomic.h>
  27 #include <linux/uaccess.h>
  28 
  29 static DEFINE_MUTEX(mem_sysfs_mutex);
  30 
  31 #define MEMORY_CLASS_NAME       "memory"
  32 
  33 #define to_memory_block(dev) container_of(dev, struct memory_block, dev)
  34 
  35 static int sections_per_block;
  36 
  37 static inline unsigned long base_memory_block_id(unsigned long section_nr)
  38 {
  39         return section_nr / sections_per_block;
  40 }
  41 
  42 static inline unsigned long pfn_to_block_id(unsigned long pfn)
  43 {
  44         return base_memory_block_id(pfn_to_section_nr(pfn));
  45 }
  46 
  47 static inline unsigned long phys_to_block_id(unsigned long phys)
  48 {
  49         return pfn_to_block_id(PFN_DOWN(phys));
  50 }
  51 
  52 static int memory_subsys_online(struct device *dev);
  53 static int memory_subsys_offline(struct device *dev);
  54 
  55 static struct bus_type memory_subsys = {
  56         .name = MEMORY_CLASS_NAME,
  57         .dev_name = MEMORY_CLASS_NAME,
  58         .online = memory_subsys_online,
  59         .offline = memory_subsys_offline,
  60 };
  61 
  62 static BLOCKING_NOTIFIER_HEAD(memory_chain);
  63 
  64 int register_memory_notifier(struct notifier_block *nb)
  65 {
  66         return blocking_notifier_chain_register(&memory_chain, nb);
  67 }
  68 EXPORT_SYMBOL(register_memory_notifier);
  69 
  70 void unregister_memory_notifier(struct notifier_block *nb)
  71 {
  72         blocking_notifier_chain_unregister(&memory_chain, nb);
  73 }
  74 EXPORT_SYMBOL(unregister_memory_notifier);
  75 
  76 static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain);
  77 
  78 int register_memory_isolate_notifier(struct notifier_block *nb)
  79 {
  80         return atomic_notifier_chain_register(&memory_isolate_chain, nb);
  81 }
  82 EXPORT_SYMBOL(register_memory_isolate_notifier);
  83 
  84 void unregister_memory_isolate_notifier(struct notifier_block *nb)
  85 {
  86         atomic_notifier_chain_unregister(&memory_isolate_chain, nb);
  87 }
  88 EXPORT_SYMBOL(unregister_memory_isolate_notifier);
  89 
  90 static void memory_block_release(struct device *dev)
  91 {
  92         struct memory_block *mem = to_memory_block(dev);
  93 
  94         kfree(mem);
  95 }
  96 
  97 unsigned long __weak memory_block_size_bytes(void)
  98 {
  99         return MIN_MEMORY_BLOCK_SIZE;
 100 }
 101 EXPORT_SYMBOL_GPL(memory_block_size_bytes);
 102 
 103 /*
 104  * Show the first physical section index (number) of this memory block.
 105  */
 106 static ssize_t phys_index_show(struct device *dev,
 107                                struct device_attribute *attr, char *buf)
 108 {
 109         struct memory_block *mem = to_memory_block(dev);
 110         unsigned long phys_index;
 111 
 112         phys_index = mem->start_section_nr / sections_per_block;
 113         return sprintf(buf, "%08lx\n", phys_index);
 114 }
 115 
 116 /*
 117  * Legacy interface that we cannot remove. Always indicate "removable"
 118  * with CONFIG_MEMORY_HOTREMOVE - bad heuristic.
 119  */
 120 static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
 121                               char *buf)
 122 {
 123         return sprintf(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE));
 124 }
 125 
 126 /*
 127  * online, offline, going offline, etc.
 128  */
 129 static ssize_t state_show(struct device *dev, struct device_attribute *attr,
 130                           char *buf)
 131 {
 132         struct memory_block *mem = to_memory_block(dev);
 133         ssize_t len = 0;
 134 
 135         /*
 136          * We can probably put these states in a nice little array
 137          * so that they're not open-coded
 138          */
 139         switch (mem->state) {
 140         case MEM_ONLINE:
 141                 len = sprintf(buf, "online\n");
 142                 break;
 143         case MEM_OFFLINE:
 144                 len = sprintf(buf, "offline\n");
 145                 break;
 146         case MEM_GOING_OFFLINE:
 147                 len = sprintf(buf, "going-offline\n");
 148                 break;
 149         default:
 150                 len = sprintf(buf, "ERROR-UNKNOWN-%ld\n",
 151                                 mem->state);
 152                 WARN_ON(1);
 153                 break;
 154         }
 155 
 156         return len;
 157 }
 158 
 159 int memory_notify(unsigned long val, void *v)
 160 {
 161         return blocking_notifier_call_chain(&memory_chain, val, v);
 162 }
 163 
 164 int memory_isolate_notify(unsigned long val, void *v)
 165 {
 166         return atomic_notifier_call_chain(&memory_isolate_chain, val, v);
 167 }
 168 
 169 /*
 170  * The probe routines leave the pages uninitialized, just as the bootmem code
 171  * does. Make sure we do not access them, but instead use only information from
 172  * within sections.
 173  */
 174 static bool pages_correctly_probed(unsigned long start_pfn)
 175 {
 176         unsigned long section_nr = pfn_to_section_nr(start_pfn);
 177         unsigned long section_nr_end = section_nr + sections_per_block;
 178         unsigned long pfn = start_pfn;
 179 
 180         /*
 181          * memmap between sections is not contiguous except with
 182          * SPARSEMEM_VMEMMAP. We lookup the page once per section
 183          * and assume memmap is contiguous within each section
 184          */
 185         for (; section_nr < section_nr_end; section_nr++) {
 186                 if (WARN_ON_ONCE(!pfn_valid(pfn)))
 187                         return false;
 188 
 189                 if (!present_section_nr(section_nr)) {
 190                         pr_warn("section %ld pfn[%lx, %lx) not present\n",
 191                                 section_nr, pfn, pfn + PAGES_PER_SECTION);
 192                         return false;
 193                 } else if (!valid_section_nr(section_nr)) {
 194                         pr_warn("section %ld pfn[%lx, %lx) no valid memmap\n",
 195                                 section_nr, pfn, pfn + PAGES_PER_SECTION);
 196                         return false;
 197                 } else if (online_section_nr(section_nr)) {
 198                         pr_warn("section %ld pfn[%lx, %lx) is already online\n",
 199                                 section_nr, pfn, pfn + PAGES_PER_SECTION);
 200                         return false;
 201                 }
 202                 pfn += PAGES_PER_SECTION;
 203         }
 204 
 205         return true;
 206 }
 207 
 208 /*
 209  * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
 210  * OK to have direct references to sparsemem variables in here.
 211  */
 212 static int
 213 memory_block_action(unsigned long start_section_nr, unsigned long action,
 214                     int online_type)
 215 {
 216         unsigned long start_pfn;
 217         unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
 218         int ret;
 219 
 220         start_pfn = section_nr_to_pfn(start_section_nr);
 221 
 222         switch (action) {
 223         case MEM_ONLINE:
 224                 if (!pages_correctly_probed(start_pfn))
 225                         return -EBUSY;
 226 
 227                 ret = online_pages(start_pfn, nr_pages, online_type);
 228                 break;
 229         case MEM_OFFLINE:
 230                 ret = offline_pages(start_pfn, nr_pages);
 231                 break;
 232         default:
 233                 WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
 234                      "%ld\n", __func__, start_section_nr, action, action);
 235                 ret = -EINVAL;
 236         }
 237 
 238         return ret;
 239 }
 240 
 241 static int memory_block_change_state(struct memory_block *mem,
 242                 unsigned long to_state, unsigned long from_state_req)
 243 {
 244         int ret = 0;
 245 
 246         if (mem->state != from_state_req)
 247                 return -EINVAL;
 248 
 249         if (to_state == MEM_OFFLINE)
 250                 mem->state = MEM_GOING_OFFLINE;
 251 
 252         ret = memory_block_action(mem->start_section_nr, to_state,
 253                                 mem->online_type);
 254 
 255         mem->state = ret ? from_state_req : to_state;
 256 
 257         return ret;
 258 }
 259 
 260 /* The device lock serializes operations on memory_subsys_[online|offline] */
 261 static int memory_subsys_online(struct device *dev)
 262 {
 263         struct memory_block *mem = to_memory_block(dev);
 264         int ret;
 265 
 266         if (mem->state == MEM_ONLINE)
 267                 return 0;
 268 
 269         /*
 270          * If we are called from state_store(), online_type will be
 271          * set >= 0 Otherwise we were called from the device online
 272          * attribute and need to set the online_type.
 273          */
 274         if (mem->online_type < 0)
 275                 mem->online_type = MMOP_ONLINE_KEEP;
 276 
 277         ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
 278 
 279         /* clear online_type */
 280         mem->online_type = -1;
 281 
 282         return ret;
 283 }
 284 
 285 static int memory_subsys_offline(struct device *dev)
 286 {
 287         struct memory_block *mem = to_memory_block(dev);
 288 
 289         if (mem->state == MEM_OFFLINE)
 290                 return 0;
 291 
 292         /* Can't offline block with non-present sections */
 293         if (mem->section_count != sections_per_block)
 294                 return -EINVAL;
 295 
 296         return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
 297 }
 298 
 299 static ssize_t state_store(struct device *dev, struct device_attribute *attr,
 300                            const char *buf, size_t count)
 301 {
 302         struct memory_block *mem = to_memory_block(dev);
 303         int ret, online_type;
 304 
 305         ret = lock_device_hotplug_sysfs();
 306         if (ret)
 307                 return ret;
 308 
 309         if (sysfs_streq(buf, "online_kernel"))
 310                 online_type = MMOP_ONLINE_KERNEL;
 311         else if (sysfs_streq(buf, "online_movable"))
 312                 online_type = MMOP_ONLINE_MOVABLE;
 313         else if (sysfs_streq(buf, "online"))
 314                 online_type = MMOP_ONLINE_KEEP;
 315         else if (sysfs_streq(buf, "offline"))
 316                 online_type = MMOP_OFFLINE;
 317         else {
 318                 ret = -EINVAL;
 319                 goto err;
 320         }
 321 
 322         switch (online_type) {
 323         case MMOP_ONLINE_KERNEL:
 324         case MMOP_ONLINE_MOVABLE:
 325         case MMOP_ONLINE_KEEP:
 326                 /* mem->online_type is protected by device_hotplug_lock */
 327                 mem->online_type = online_type;
 328                 ret = device_online(&mem->dev);
 329                 break;
 330         case MMOP_OFFLINE:
 331                 ret = device_offline(&mem->dev);
 332                 break;
 333         default:
 334                 ret = -EINVAL; /* should never happen */
 335         }
 336 
 337 err:
 338         unlock_device_hotplug();
 339 
 340         if (ret < 0)
 341                 return ret;
 342         if (ret)
 343                 return -EINVAL;
 344 
 345         return count;
 346 }
 347 
 348 /*
 349  * phys_device is a bad name for this.  What I really want
 350  * is a way to differentiate between memory ranges that
 351  * are part of physical devices that constitute
 352  * a complete removable unit or fru.
 353  * i.e. do these ranges belong to the same physical device,
 354  * s.t. if I offline all of these sections I can then
 355  * remove the physical device?
 356  */
 357 static ssize_t phys_device_show(struct device *dev,
 358                                 struct device_attribute *attr, char *buf)
 359 {
 360         struct memory_block *mem = to_memory_block(dev);
 361         return sprintf(buf, "%d\n", mem->phys_device);
 362 }
 363 
 364 #ifdef CONFIG_MEMORY_HOTREMOVE
 365 static void print_allowed_zone(char *buf, int nid, unsigned long start_pfn,
 366                 unsigned long nr_pages, int online_type,
 367                 struct zone *default_zone)
 368 {
 369         struct zone *zone;
 370 
 371         zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
 372         if (zone != default_zone) {
 373                 strcat(buf, " ");
 374                 strcat(buf, zone->name);
 375         }
 376 }
 377 
 378 static ssize_t valid_zones_show(struct device *dev,
 379                                 struct device_attribute *attr, char *buf)
 380 {
 381         struct memory_block *mem = to_memory_block(dev);
 382         unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
 383         unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
 384         unsigned long valid_start_pfn, valid_end_pfn;
 385         struct zone *default_zone;
 386         int nid;
 387 
 388         /*
 389          * Check the existing zone. Make sure that we do that only on the
 390          * online nodes otherwise the page_zone is not reliable
 391          */
 392         if (mem->state == MEM_ONLINE) {
 393                 /*
 394                  * The block contains more than one zone can not be offlined.
 395                  * This can happen e.g. for ZONE_DMA and ZONE_DMA32
 396                  */
 397                 if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages,
 398                                           &valid_start_pfn, &valid_end_pfn))
 399                         return sprintf(buf, "none\n");
 400                 start_pfn = valid_start_pfn;
 401                 strcat(buf, page_zone(pfn_to_page(start_pfn))->name);
 402                 goto out;
 403         }
 404 
 405         nid = mem->nid;
 406         default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, nr_pages);
 407         strcat(buf, default_zone->name);
 408 
 409         print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL,
 410                         default_zone);
 411         print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE,
 412                         default_zone);
 413 out:
 414         strcat(buf, "\n");
 415 
 416         return strlen(buf);
 417 }
 418 static DEVICE_ATTR_RO(valid_zones);
 419 #endif
 420 
 421 static DEVICE_ATTR_RO(phys_index);
 422 static DEVICE_ATTR_RW(state);
 423 static DEVICE_ATTR_RO(phys_device);
 424 static DEVICE_ATTR_RO(removable);
 425 
 426 /*
 427  * Show the memory block size (shared by all memory blocks).
 428  */
 429 static ssize_t block_size_bytes_show(struct device *dev,
 430                                      struct device_attribute *attr, char *buf)
 431 {
 432         return sprintf(buf, "%lx\n", memory_block_size_bytes());
 433 }
 434 
 435 static DEVICE_ATTR_RO(block_size_bytes);
 436 
 437 /*
 438  * Memory auto online policy.
 439  */
 440 
 441 static ssize_t auto_online_blocks_show(struct device *dev,
 442                                        struct device_attribute *attr, char *buf)
 443 {
 444         if (memhp_auto_online)
 445                 return sprintf(buf, "online\n");
 446         else
 447                 return sprintf(buf, "offline\n");
 448 }
 449 
 450 static ssize_t auto_online_blocks_store(struct device *dev,
 451                                         struct device_attribute *attr,
 452                                         const char *buf, size_t count)
 453 {
 454         if (sysfs_streq(buf, "online"))
 455                 memhp_auto_online = true;
 456         else if (sysfs_streq(buf, "offline"))
 457                 memhp_auto_online = false;
 458         else
 459                 return -EINVAL;
 460 
 461         return count;
 462 }
 463 
 464 static DEVICE_ATTR_RW(auto_online_blocks);
 465 
 466 /*
 467  * Some architectures will have custom drivers to do this, and
 468  * will not need to do it from userspace.  The fake hot-add code
 469  * as well as ppc64 will do all of their discovery in userspace
 470  * and will require this interface.
 471  */
 472 #ifdef CONFIG_ARCH_MEMORY_PROBE
 473 static ssize_t probe_store(struct device *dev, struct device_attribute *attr,
 474                            const char *buf, size_t count)
 475 {
 476         u64 phys_addr;
 477         int nid, ret;
 478         unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
 479 
 480         ret = kstrtoull(buf, 0, &phys_addr);
 481         if (ret)
 482                 return ret;
 483 
 484         if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
 485                 return -EINVAL;
 486 
 487         ret = lock_device_hotplug_sysfs();
 488         if (ret)
 489                 return ret;
 490 
 491         nid = memory_add_physaddr_to_nid(phys_addr);
 492         ret = __add_memory(nid, phys_addr,
 493                            MIN_MEMORY_BLOCK_SIZE * sections_per_block);
 494 
 495         if (ret)
 496                 goto out;
 497 
 498         ret = count;
 499 out:
 500         unlock_device_hotplug();
 501         return ret;
 502 }
 503 
 504 static DEVICE_ATTR_WO(probe);
 505 #endif
 506 
 507 #ifdef CONFIG_MEMORY_FAILURE
 508 /*
 509  * Support for offlining pages of memory
 510  */
 511 
 512 /* Soft offline a page */
 513 static ssize_t soft_offline_page_store(struct device *dev,
 514                                        struct device_attribute *attr,
 515                                        const char *buf, size_t count)
 516 {
 517         int ret;
 518         u64 pfn;
 519         if (!capable(CAP_SYS_ADMIN))
 520                 return -EPERM;
 521         if (kstrtoull(buf, 0, &pfn) < 0)
 522                 return -EINVAL;
 523         pfn >>= PAGE_SHIFT;
 524         if (!pfn_valid(pfn))
 525                 return -ENXIO;
 526         /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */
 527         if (!pfn_to_online_page(pfn))
 528                 return -EIO;
 529         ret = soft_offline_page(pfn_to_page(pfn), 0);
 530         return ret == 0 ? count : ret;
 531 }
 532 
 533 /* Forcibly offline a page, including killing processes. */
 534 static ssize_t hard_offline_page_store(struct device *dev,
 535                                        struct device_attribute *attr,
 536                                        const char *buf, size_t count)
 537 {
 538         int ret;
 539         u64 pfn;
 540         if (!capable(CAP_SYS_ADMIN))
 541                 return -EPERM;
 542         if (kstrtoull(buf, 0, &pfn) < 0)
 543                 return -EINVAL;
 544         pfn >>= PAGE_SHIFT;
 545         ret = memory_failure(pfn, 0);
 546         return ret ? ret : count;
 547 }
 548 
 549 static DEVICE_ATTR_WO(soft_offline_page);
 550 static DEVICE_ATTR_WO(hard_offline_page);
 551 #endif
 552 
 553 /*
 554  * Note that phys_device is optional.  It is here to allow for
 555  * differentiation between which *physical* devices each
 556  * section belongs to...
 557  */
 558 int __weak arch_get_memory_phys_device(unsigned long start_pfn)
 559 {
 560         return 0;
 561 }
 562 
 563 /* A reference for the returned memory block device is acquired. */
 564 static struct memory_block *find_memory_block_by_id(unsigned long block_id)
 565 {
 566         struct device *dev;
 567 
 568         dev = subsys_find_device_by_id(&memory_subsys, block_id, NULL);
 569         return dev ? to_memory_block(dev) : NULL;
 570 }
 571 
 572 /*
 573  * For now, we have a linear search to go find the appropriate
 574  * memory_block corresponding to a particular phys_index. If
 575  * this gets to be a real problem, we can always use a radix
 576  * tree or something here.
 577  *
 578  * This could be made generic for all device subsystems.
 579  */
 580 struct memory_block *find_memory_block(struct mem_section *section)
 581 {
 582         unsigned long block_id = base_memory_block_id(__section_nr(section));
 583 
 584         return find_memory_block_by_id(block_id);
 585 }
 586 
 587 static struct attribute *memory_memblk_attrs[] = {
 588         &dev_attr_phys_index.attr,
 589         &dev_attr_state.attr,
 590         &dev_attr_phys_device.attr,
 591         &dev_attr_removable.attr,
 592 #ifdef CONFIG_MEMORY_HOTREMOVE
 593         &dev_attr_valid_zones.attr,
 594 #endif
 595         NULL
 596 };
 597 
 598 static struct attribute_group memory_memblk_attr_group = {
 599         .attrs = memory_memblk_attrs,
 600 };
 601 
 602 static const struct attribute_group *memory_memblk_attr_groups[] = {
 603         &memory_memblk_attr_group,
 604         NULL,
 605 };
 606 
 607 /*
 608  * register_memory - Setup a sysfs device for a memory block
 609  */
 610 static
 611 int register_memory(struct memory_block *memory)
 612 {
 613         int ret;
 614 
 615         memory->dev.bus = &memory_subsys;
 616         memory->dev.id = memory->start_section_nr / sections_per_block;
 617         memory->dev.release = memory_block_release;
 618         memory->dev.groups = memory_memblk_attr_groups;
 619         memory->dev.offline = memory->state == MEM_OFFLINE;
 620 
 621         ret = device_register(&memory->dev);
 622         if (ret)
 623                 put_device(&memory->dev);
 624 
 625         return ret;
 626 }
 627 
 628 static int init_memory_block(struct memory_block **memory,
 629                              unsigned long block_id, unsigned long state)
 630 {
 631         struct memory_block *mem;
 632         unsigned long start_pfn;
 633         int ret = 0;
 634 
 635         mem = find_memory_block_by_id(block_id);
 636         if (mem) {
 637                 put_device(&mem->dev);
 638                 return -EEXIST;
 639         }
 640         mem = kzalloc(sizeof(*mem), GFP_KERNEL);
 641         if (!mem)
 642                 return -ENOMEM;
 643 
 644         mem->start_section_nr = block_id * sections_per_block;
 645         mem->state = state;
 646         start_pfn = section_nr_to_pfn(mem->start_section_nr);
 647         mem->phys_device = arch_get_memory_phys_device(start_pfn);
 648         mem->nid = NUMA_NO_NODE;
 649 
 650         ret = register_memory(mem);
 651 
 652         *memory = mem;
 653         return ret;
 654 }
 655 
 656 static int add_memory_block(unsigned long base_section_nr)
 657 {
 658         int ret, section_count = 0;
 659         struct memory_block *mem;
 660         unsigned long nr;
 661 
 662         for (nr = base_section_nr; nr < base_section_nr + sections_per_block;
 663              nr++)
 664                 if (present_section_nr(nr))
 665                         section_count++;
 666 
 667         if (section_count == 0)
 668                 return 0;
 669         ret = init_memory_block(&mem, base_memory_block_id(base_section_nr),
 670                                 MEM_ONLINE);
 671         if (ret)
 672                 return ret;
 673         mem->section_count = section_count;
 674         return 0;
 675 }
 676 
 677 static void unregister_memory(struct memory_block *memory)
 678 {
 679         if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
 680                 return;
 681 
 682         /* drop the ref. we got via find_memory_block() */
 683         put_device(&memory->dev);
 684         device_unregister(&memory->dev);
 685 }
 686 
 687 /*
 688  * Create memory block devices for the given memory area. Start and size
 689  * have to be aligned to memory block granularity. Memory block devices
 690  * will be initialized as offline.
 691  */
 692 int create_memory_block_devices(unsigned long start, unsigned long size)
 693 {
 694         const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
 695         unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
 696         struct memory_block *mem;
 697         unsigned long block_id;
 698         int ret = 0;
 699 
 700         if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
 701                          !IS_ALIGNED(size, memory_block_size_bytes())))
 702                 return -EINVAL;
 703 
 704         mutex_lock(&mem_sysfs_mutex);
 705         for (block_id = start_block_id; block_id != end_block_id; block_id++) {
 706                 ret = init_memory_block(&mem, block_id, MEM_OFFLINE);
 707                 if (ret)
 708                         break;
 709                 mem->section_count = sections_per_block;
 710         }
 711         if (ret) {
 712                 end_block_id = block_id;
 713                 for (block_id = start_block_id; block_id != end_block_id;
 714                      block_id++) {
 715                         mem = find_memory_block_by_id(block_id);
 716                         mem->section_count = 0;
 717                         unregister_memory(mem);
 718                 }
 719         }
 720         mutex_unlock(&mem_sysfs_mutex);
 721         return ret;
 722 }
 723 
 724 /*
 725  * Remove memory block devices for the given memory area. Start and size
 726  * have to be aligned to memory block granularity. Memory block devices
 727  * have to be offline.
 728  */
 729 void remove_memory_block_devices(unsigned long start, unsigned long size)
 730 {
 731         const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
 732         const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
 733         struct memory_block *mem;
 734         unsigned long block_id;
 735 
 736         if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
 737                          !IS_ALIGNED(size, memory_block_size_bytes())))
 738                 return;
 739 
 740         mutex_lock(&mem_sysfs_mutex);
 741         for (block_id = start_block_id; block_id != end_block_id; block_id++) {
 742                 mem = find_memory_block_by_id(block_id);
 743                 if (WARN_ON_ONCE(!mem))
 744                         continue;
 745                 mem->section_count = 0;
 746                 unregister_memory_block_under_nodes(mem);
 747                 unregister_memory(mem);
 748         }
 749         mutex_unlock(&mem_sysfs_mutex);
 750 }
 751 
 752 /* return true if the memory block is offlined, otherwise, return false */
 753 bool is_memblock_offlined(struct memory_block *mem)
 754 {
 755         return mem->state == MEM_OFFLINE;
 756 }
 757 
 758 static struct attribute *memory_root_attrs[] = {
 759 #ifdef CONFIG_ARCH_MEMORY_PROBE
 760         &dev_attr_probe.attr,
 761 #endif
 762 
 763 #ifdef CONFIG_MEMORY_FAILURE
 764         &dev_attr_soft_offline_page.attr,
 765         &dev_attr_hard_offline_page.attr,
 766 #endif
 767 
 768         &dev_attr_block_size_bytes.attr,
 769         &dev_attr_auto_online_blocks.attr,
 770         NULL
 771 };
 772 
 773 static struct attribute_group memory_root_attr_group = {
 774         .attrs = memory_root_attrs,
 775 };
 776 
 777 static const struct attribute_group *memory_root_attr_groups[] = {
 778         &memory_root_attr_group,
 779         NULL,
 780 };
 781 
 782 /*
 783  * Initialize the sysfs support for memory devices...
 784  */
 785 void __init memory_dev_init(void)
 786 {
 787         int ret;
 788         int err;
 789         unsigned long block_sz, nr;
 790 
 791         /* Validate the configured memory block size */
 792         block_sz = memory_block_size_bytes();
 793         if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE)
 794                 panic("Memory block size not suitable: 0x%lx\n", block_sz);
 795         sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
 796 
 797         ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);
 798         if (ret)
 799                 goto out;
 800 
 801         /*
 802          * Create entries for memory sections that were found
 803          * during boot and have been initialized
 804          */
 805         mutex_lock(&mem_sysfs_mutex);
 806         for (nr = 0; nr <= __highest_present_section_nr;
 807              nr += sections_per_block) {
 808                 err = add_memory_block(nr);
 809                 if (!ret)
 810                         ret = err;
 811         }
 812         mutex_unlock(&mem_sysfs_mutex);
 813 
 814 out:
 815         if (ret)
 816                 panic("%s() failed: %d\n", __func__, ret);
 817 }
 818 
 819 /**
 820  * walk_memory_blocks - walk through all present memory blocks overlapped
 821  *                      by the range [start, start + size)
 822  *
 823  * @start: start address of the memory range
 824  * @size: size of the memory range
 825  * @arg: argument passed to func
 826  * @func: callback for each memory section walked
 827  *
 828  * This function walks through all present memory blocks overlapped by the
 829  * range [start, start + size), calling func on each memory block.
 830  *
 831  * In case func() returns an error, walking is aborted and the error is
 832  * returned.
 833  */
 834 int walk_memory_blocks(unsigned long start, unsigned long size,
 835                        void *arg, walk_memory_blocks_func_t func)
 836 {
 837         const unsigned long start_block_id = phys_to_block_id(start);
 838         const unsigned long end_block_id = phys_to_block_id(start + size - 1);
 839         struct memory_block *mem;
 840         unsigned long block_id;
 841         int ret = 0;
 842 
 843         if (!size)
 844                 return 0;
 845 
 846         for (block_id = start_block_id; block_id <= end_block_id; block_id++) {
 847                 mem = find_memory_block_by_id(block_id);
 848                 if (!mem)
 849                         continue;
 850 
 851                 ret = func(mem, arg);
 852                 put_device(&mem->dev);
 853                 if (ret)
 854                         break;
 855         }
 856         return ret;
 857 }
 858 
 859 struct for_each_memory_block_cb_data {
 860         walk_memory_blocks_func_t func;
 861         void *arg;
 862 };
 863 
 864 static int for_each_memory_block_cb(struct device *dev, void *data)
 865 {
 866         struct memory_block *mem = to_memory_block(dev);
 867         struct for_each_memory_block_cb_data *cb_data = data;
 868 
 869         return cb_data->func(mem, cb_data->arg);
 870 }
 871 
 872 /**
 873  * for_each_memory_block - walk through all present memory blocks
 874  *
 875  * @arg: argument passed to func
 876  * @func: callback for each memory block walked
 877  *
 878  * This function walks through all present memory blocks, calling func on
 879  * each memory block.
 880  *
 881  * In case func() returns an error, walking is aborted and the error is
 882  * returned.
 883  */
 884 int for_each_memory_block(void *arg, walk_memory_blocks_func_t func)
 885 {
 886         struct for_each_memory_block_cb_data cb_data = {
 887                 .func = func,
 888                 .arg = arg,
 889         };
 890 
 891         return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
 892                                 for_each_memory_block_cb);
 893 }

/* [<][>][^][v][top][bottom][index][help] */