root/arch/powerpc/platforms/pseries/iommu.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. iommu_pseries_alloc_group
  2. iommu_pseries_free_group
  3. tce_build_pSeries
  4. tce_free_pSeries
  5. tce_get_pseries
  6. tce_build_pSeriesLP
  7. tce_buildmulti_pSeriesLP
  8. tce_free_pSeriesLP
  9. tce_freemulti_pSeriesLP
  10. tce_get_pSeriesLP
  11. tce_clearrange_multi_pSeriesLP
  12. tce_setrange_multi_pSeriesLP
  13. tce_setrange_multi_pSeriesLP_walk
  14. iommu_table_setparms
  15. iommu_table_setparms_lpar
  16. pci_dma_bus_setup_pSeries
  17. tce_exchange_pseries
  18. pci_dma_bus_setup_pSeriesLP
  19. pci_dma_dev_setup_pSeries
  20. disable_ddw_setup
  21. remove_ddw
  22. find_existing_ddw
  23. find_existing_ddw_windows
  24. query_ddw
  25. create_ddw
  26. ddw_memory_hotplug_max
  27. enable_ddw
  28. pci_dma_dev_setup_pSeriesLP
  29. iommu_bypass_supported_pSeriesLP
  30. iommu_mem_notifier
  31. iommu_reconfig_notifier
  32. iommu_init_early_pSeries
  33. disable_multitce
  34. tce_iommu_bus_notifier
  35. tce_iommu_bus_notifier_init

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
   4  *
   5  * Rewrite, cleanup:
   6  *
   7  * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
   8  * Copyright (C) 2006 Olof Johansson <olof@lixom.net>
   9  *
  10  * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
  11  */
  12 
  13 #include <linux/init.h>
  14 #include <linux/types.h>
  15 #include <linux/slab.h>
  16 #include <linux/mm.h>
  17 #include <linux/memblock.h>
  18 #include <linux/spinlock.h>
  19 #include <linux/string.h>
  20 #include <linux/pci.h>
  21 #include <linux/dma-mapping.h>
  22 #include <linux/crash_dump.h>
  23 #include <linux/memory.h>
  24 #include <linux/of.h>
  25 #include <linux/iommu.h>
  26 #include <linux/rculist.h>
  27 #include <asm/io.h>
  28 #include <asm/prom.h>
  29 #include <asm/rtas.h>
  30 #include <asm/iommu.h>
  31 #include <asm/pci-bridge.h>
  32 #include <asm/machdep.h>
  33 #include <asm/firmware.h>
  34 #include <asm/tce.h>
  35 #include <asm/ppc-pci.h>
  36 #include <asm/udbg.h>
  37 #include <asm/mmzone.h>
  38 #include <asm/plpar_wrappers.h>
  39 
  40 #include "pseries.h"
  41 
  42 static struct iommu_table_group *iommu_pseries_alloc_group(int node)
  43 {
  44         struct iommu_table_group *table_group;
  45         struct iommu_table *tbl;
  46 
  47         table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
  48                            node);
  49         if (!table_group)
  50                 return NULL;
  51 
  52         tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
  53         if (!tbl)
  54                 goto free_group;
  55 
  56         INIT_LIST_HEAD_RCU(&tbl->it_group_list);
  57         kref_init(&tbl->it_kref);
  58 
  59         table_group->tables[0] = tbl;
  60 
  61         return table_group;
  62 
  63 free_group:
  64         kfree(table_group);
  65         return NULL;
  66 }
  67 
  68 static void iommu_pseries_free_group(struct iommu_table_group *table_group,
  69                 const char *node_name)
  70 {
  71         struct iommu_table *tbl;
  72 
  73         if (!table_group)
  74                 return;
  75 
  76         tbl = table_group->tables[0];
  77 #ifdef CONFIG_IOMMU_API
  78         if (table_group->group) {
  79                 iommu_group_put(table_group->group);
  80                 BUG_ON(table_group->group);
  81         }
  82 #endif
  83         iommu_tce_table_put(tbl);
  84 
  85         kfree(table_group);
  86 }
  87 
  88 static int tce_build_pSeries(struct iommu_table *tbl, long index,
  89                               long npages, unsigned long uaddr,
  90                               enum dma_data_direction direction,
  91                               unsigned long attrs)
  92 {
  93         u64 proto_tce;
  94         __be64 *tcep;
  95         u64 rpn;
  96 
  97         proto_tce = TCE_PCI_READ; // Read allowed
  98 
  99         if (direction != DMA_TO_DEVICE)
 100                 proto_tce |= TCE_PCI_WRITE;
 101 
 102         tcep = ((__be64 *)tbl->it_base) + index;
 103 
 104         while (npages--) {
 105                 /* can't move this out since we might cross MEMBLOCK boundary */
 106                 rpn = __pa(uaddr) >> TCE_SHIFT;
 107                 *tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
 108 
 109                 uaddr += TCE_PAGE_SIZE;
 110                 tcep++;
 111         }
 112         return 0;
 113 }
 114 
 115 
 116 static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
 117 {
 118         __be64 *tcep;
 119 
 120         tcep = ((__be64 *)tbl->it_base) + index;
 121 
 122         while (npages--)
 123                 *(tcep++) = 0;
 124 }
 125 
 126 static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
 127 {
 128         __be64 *tcep;
 129 
 130         tcep = ((__be64 *)tbl->it_base) + index;
 131 
 132         return be64_to_cpu(*tcep);
 133 }
 134 
 135 static void tce_free_pSeriesLP(unsigned long liobn, long, long);
 136 static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
 137 
 138 static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
 139                                 long npages, unsigned long uaddr,
 140                                 enum dma_data_direction direction,
 141                                 unsigned long attrs)
 142 {
 143         u64 rc = 0;
 144         u64 proto_tce, tce;
 145         u64 rpn;
 146         int ret = 0;
 147         long tcenum_start = tcenum, npages_start = npages;
 148 
 149         rpn = __pa(uaddr) >> tceshift;
 150         proto_tce = TCE_PCI_READ;
 151         if (direction != DMA_TO_DEVICE)
 152                 proto_tce |= TCE_PCI_WRITE;
 153 
 154         while (npages--) {
 155                 tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
 156                 rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
 157 
 158                 if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
 159                         ret = (int)rc;
 160                         tce_free_pSeriesLP(liobn, tcenum_start,
 161                                            (npages_start - (npages + 1)));
 162                         break;
 163                 }
 164 
 165                 if (rc && printk_ratelimit()) {
 166                         printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
 167                         printk("\tindex   = 0x%llx\n", (u64)liobn);
 168                         printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
 169                         printk("\ttce val = 0x%llx\n", tce );
 170                         dump_stack();
 171                 }
 172 
 173                 tcenum++;
 174                 rpn++;
 175         }
 176         return ret;
 177 }
 178 
 179 static DEFINE_PER_CPU(__be64 *, tce_page);
 180 
 181 static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 182                                      long npages, unsigned long uaddr,
 183                                      enum dma_data_direction direction,
 184                                      unsigned long attrs)
 185 {
 186         u64 rc = 0;
 187         u64 proto_tce;
 188         __be64 *tcep;
 189         u64 rpn;
 190         long l, limit;
 191         long tcenum_start = tcenum, npages_start = npages;
 192         int ret = 0;
 193         unsigned long flags;
 194 
 195         if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
 196                 return tce_build_pSeriesLP(tbl->it_index, tcenum,
 197                                            tbl->it_page_shift, npages, uaddr,
 198                                            direction, attrs);
 199         }
 200 
 201         local_irq_save(flags);  /* to protect tcep and the page behind it */
 202 
 203         tcep = __this_cpu_read(tce_page);
 204 
 205         /* This is safe to do since interrupts are off when we're called
 206          * from iommu_alloc{,_sg}()
 207          */
 208         if (!tcep) {
 209                 tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
 210                 /* If allocation fails, fall back to the loop implementation */
 211                 if (!tcep) {
 212                         local_irq_restore(flags);
 213                         return tce_build_pSeriesLP(tbl->it_index, tcenum,
 214                                         tbl->it_page_shift,
 215                                         npages, uaddr, direction, attrs);
 216                 }
 217                 __this_cpu_write(tce_page, tcep);
 218         }
 219 
 220         rpn = __pa(uaddr) >> TCE_SHIFT;
 221         proto_tce = TCE_PCI_READ;
 222         if (direction != DMA_TO_DEVICE)
 223                 proto_tce |= TCE_PCI_WRITE;
 224 
 225         /* We can map max one pageful of TCEs at a time */
 226         do {
 227                 /*
 228                  * Set up the page with TCE data, looping through and setting
 229                  * the values.
 230                  */
 231                 limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE);
 232 
 233                 for (l = 0; l < limit; l++) {
 234                         tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
 235                         rpn++;
 236                 }
 237 
 238                 rc = plpar_tce_put_indirect((u64)tbl->it_index,
 239                                             (u64)tcenum << 12,
 240                                             (u64)__pa(tcep),
 241                                             limit);
 242 
 243                 npages -= limit;
 244                 tcenum += limit;
 245         } while (npages > 0 && !rc);
 246 
 247         local_irq_restore(flags);
 248 
 249         if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
 250                 ret = (int)rc;
 251                 tce_freemulti_pSeriesLP(tbl, tcenum_start,
 252                                         (npages_start - (npages + limit)));
 253                 return ret;
 254         }
 255 
 256         if (rc && printk_ratelimit()) {
 257                 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
 258                 printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
 259                 printk("\tnpages  = 0x%llx\n", (u64)npages);
 260                 printk("\ttce[0] val = 0x%llx\n", tcep[0]);
 261                 dump_stack();
 262         }
 263         return ret;
 264 }
 265 
 266 static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages)
 267 {
 268         u64 rc;
 269 
 270         while (npages--) {
 271                 rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0);
 272 
 273                 if (rc && printk_ratelimit()) {
 274                         printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
 275                         printk("\tindex   = 0x%llx\n", (u64)liobn);
 276                         printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
 277                         dump_stack();
 278                 }
 279 
 280                 tcenum++;
 281         }
 282 }
 283 
 284 
 285 static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
 286 {
 287         u64 rc;
 288 
 289         if (!firmware_has_feature(FW_FEATURE_MULTITCE))
 290                 return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
 291 
 292         rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
 293 
 294         if (rc && printk_ratelimit()) {
 295                 printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
 296                 printk("\trc      = %lld\n", rc);
 297                 printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
 298                 printk("\tnpages  = 0x%llx\n", (u64)npages);
 299                 dump_stack();
 300         }
 301 }
 302 
 303 static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
 304 {
 305         u64 rc;
 306         unsigned long tce_ret;
 307 
 308         rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret);
 309 
 310         if (rc && printk_ratelimit()) {
 311                 printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);
 312                 printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
 313                 printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
 314                 dump_stack();
 315         }
 316 
 317         return tce_ret;
 318 }
 319 
 320 /* this is compatible with cells for the device tree property */
 321 struct dynamic_dma_window_prop {
 322         __be32  liobn;          /* tce table number */
 323         __be64  dma_base;       /* address hi,lo */
 324         __be32  tce_shift;      /* ilog2(tce_page_size) */
 325         __be32  window_shift;   /* ilog2(tce_window_size) */
 326 };
 327 
 328 struct direct_window {
 329         struct device_node *device;
 330         const struct dynamic_dma_window_prop *prop;
 331         struct list_head list;
 332 };
 333 
 334 /* Dynamic DMA Window support */
 335 struct ddw_query_response {
 336         u32 windows_available;
 337         u32 largest_available_block;
 338         u32 page_size;
 339         u32 migration_capable;
 340 };
 341 
 342 struct ddw_create_response {
 343         u32 liobn;
 344         u32 addr_hi;
 345         u32 addr_lo;
 346 };
 347 
 348 static LIST_HEAD(direct_window_list);
 349 /* prevents races between memory on/offline and window creation */
 350 static DEFINE_SPINLOCK(direct_window_list_lock);
 351 /* protects initializing window twice for same device */
 352 static DEFINE_MUTEX(direct_window_init_mutex);
 353 #define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
 354 
 355 static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
 356                                         unsigned long num_pfn, const void *arg)
 357 {
 358         const struct dynamic_dma_window_prop *maprange = arg;
 359         int rc;
 360         u64 tce_size, num_tce, dma_offset, next;
 361         u32 tce_shift;
 362         long limit;
 363 
 364         tce_shift = be32_to_cpu(maprange->tce_shift);
 365         tce_size = 1ULL << tce_shift;
 366         next = start_pfn << PAGE_SHIFT;
 367         num_tce = num_pfn << PAGE_SHIFT;
 368 
 369         /* round back to the beginning of the tce page size */
 370         num_tce += next & (tce_size - 1);
 371         next &= ~(tce_size - 1);
 372 
 373         /* covert to number of tces */
 374         num_tce |= tce_size - 1;
 375         num_tce >>= tce_shift;
 376 
 377         do {
 378                 /*
 379                  * Set up the page with TCE data, looping through and setting
 380                  * the values.
 381                  */
 382                 limit = min_t(long, num_tce, 512);
 383                 dma_offset = next + be64_to_cpu(maprange->dma_base);
 384 
 385                 rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn),
 386                                              dma_offset,
 387                                              0, limit);
 388                 next += limit * tce_size;
 389                 num_tce -= limit;
 390         } while (num_tce > 0 && !rc);
 391 
 392         return rc;
 393 }
 394 
 395 static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
 396                                         unsigned long num_pfn, const void *arg)
 397 {
 398         const struct dynamic_dma_window_prop *maprange = arg;
 399         u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn;
 400         __be64 *tcep;
 401         u32 tce_shift;
 402         u64 rc = 0;
 403         long l, limit;
 404 
 405         if (!firmware_has_feature(FW_FEATURE_MULTITCE)) {
 406                 unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
 407                 unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
 408                                 be64_to_cpu(maprange->dma_base);
 409                 unsigned long tcenum = dmastart >> tceshift;
 410                 unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift;
 411                 void *uaddr = __va(start_pfn << PAGE_SHIFT);
 412 
 413                 return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn),
 414                                 tcenum, tceshift, npages, (unsigned long) uaddr,
 415                                 DMA_BIDIRECTIONAL, 0);
 416         }
 417 
 418         local_irq_disable();    /* to protect tcep and the page behind it */
 419         tcep = __this_cpu_read(tce_page);
 420 
 421         if (!tcep) {
 422                 tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
 423                 if (!tcep) {
 424                         local_irq_enable();
 425                         return -ENOMEM;
 426                 }
 427                 __this_cpu_write(tce_page, tcep);
 428         }
 429 
 430         proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
 431 
 432         liobn = (u64)be32_to_cpu(maprange->liobn);
 433         tce_shift = be32_to_cpu(maprange->tce_shift);
 434         tce_size = 1ULL << tce_shift;
 435         next = start_pfn << PAGE_SHIFT;
 436         num_tce = num_pfn << PAGE_SHIFT;
 437 
 438         /* round back to the beginning of the tce page size */
 439         num_tce += next & (tce_size - 1);
 440         next &= ~(tce_size - 1);
 441 
 442         /* covert to number of tces */
 443         num_tce |= tce_size - 1;
 444         num_tce >>= tce_shift;
 445 
 446         /* We can map max one pageful of TCEs at a time */
 447         do {
 448                 /*
 449                  * Set up the page with TCE data, looping through and setting
 450                  * the values.
 451                  */
 452                 limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE);
 453                 dma_offset = next + be64_to_cpu(maprange->dma_base);
 454 
 455                 for (l = 0; l < limit; l++) {
 456                         tcep[l] = cpu_to_be64(proto_tce | next);
 457                         next += tce_size;
 458                 }
 459 
 460                 rc = plpar_tce_put_indirect(liobn,
 461                                             dma_offset,
 462                                             (u64)__pa(tcep),
 463                                             limit);
 464 
 465                 num_tce -= limit;
 466         } while (num_tce > 0 && !rc);
 467 
 468         /* error cleanup: caller will clear whole range */
 469 
 470         local_irq_enable();
 471         return rc;
 472 }
 473 
 474 static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
 475                 unsigned long num_pfn, void *arg)
 476 {
 477         return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
 478 }
 479 
 480 static void iommu_table_setparms(struct pci_controller *phb,
 481                                  struct device_node *dn,
 482                                  struct iommu_table *tbl)
 483 {
 484         struct device_node *node;
 485         const unsigned long *basep;
 486         const u32 *sizep;
 487 
 488         node = phb->dn;
 489 
 490         basep = of_get_property(node, "linux,tce-base", NULL);
 491         sizep = of_get_property(node, "linux,tce-size", NULL);
 492         if (basep == NULL || sizep == NULL) {
 493                 printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %pOF has "
 494                                 "missing tce entries !\n", dn);
 495                 return;
 496         }
 497 
 498         tbl->it_base = (unsigned long)__va(*basep);
 499 
 500         if (!is_kdump_kernel())
 501                 memset((void *)tbl->it_base, 0, *sizep);
 502 
 503         tbl->it_busno = phb->bus->number;
 504         tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
 505 
 506         /* Units of tce entries */
 507         tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift;
 508 
 509         /* Test if we are going over 2GB of DMA space */
 510         if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
 511                 udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
 512                 panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
 513         }
 514 
 515         phb->dma_window_base_cur += phb->dma_window_size;
 516 
 517         /* Set the tce table size - measured in entries */
 518         tbl->it_size = phb->dma_window_size >> tbl->it_page_shift;
 519 
 520         tbl->it_index = 0;
 521         tbl->it_blocksize = 16;
 522         tbl->it_type = TCE_PCI;
 523 }
 524 
 525 /*
 526  * iommu_table_setparms_lpar
 527  *
 528  * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
 529  */
 530 static void iommu_table_setparms_lpar(struct pci_controller *phb,
 531                                       struct device_node *dn,
 532                                       struct iommu_table *tbl,
 533                                       struct iommu_table_group *table_group,
 534                                       const __be32 *dma_window)
 535 {
 536         unsigned long offset, size;
 537 
 538         of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size);
 539 
 540         tbl->it_busno = phb->bus->number;
 541         tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
 542         tbl->it_base   = 0;
 543         tbl->it_blocksize  = 16;
 544         tbl->it_type = TCE_PCI;
 545         tbl->it_offset = offset >> tbl->it_page_shift;
 546         tbl->it_size = size >> tbl->it_page_shift;
 547 
 548         table_group->tce32_start = offset;
 549         table_group->tce32_size = size;
 550 }
 551 
 552 struct iommu_table_ops iommu_table_pseries_ops = {
 553         .set = tce_build_pSeries,
 554         .clear = tce_free_pSeries,
 555         .get = tce_get_pseries
 556 };
 557 
 558 static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
 559 {
 560         struct device_node *dn;
 561         struct iommu_table *tbl;
 562         struct device_node *isa_dn, *isa_dn_orig;
 563         struct device_node *tmp;
 564         struct pci_dn *pci;
 565         int children;
 566 
 567         dn = pci_bus_to_OF_node(bus);
 568 
 569         pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n", dn);
 570 
 571         if (bus->self) {
 572                 /* This is not a root bus, any setup will be done for the
 573                  * device-side of the bridge in iommu_dev_setup_pSeries().
 574                  */
 575                 return;
 576         }
 577         pci = PCI_DN(dn);
 578 
 579         /* Check if the ISA bus on the system is under
 580          * this PHB.
 581          */
 582         isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa");
 583 
 584         while (isa_dn && isa_dn != dn)
 585                 isa_dn = isa_dn->parent;
 586 
 587         of_node_put(isa_dn_orig);
 588 
 589         /* Count number of direct PCI children of the PHB. */
 590         for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling)
 591                 children++;
 592 
 593         pr_debug("Children: %d\n", children);
 594 
 595         /* Calculate amount of DMA window per slot. Each window must be
 596          * a power of two (due to pci_alloc_consistent requirements).
 597          *
 598          * Keep 256MB aside for PHBs with ISA.
 599          */
 600 
 601         if (!isa_dn) {
 602                 /* No ISA/IDE - just set window size and return */
 603                 pci->phb->dma_window_size = 0x80000000ul; /* To be divided */
 604 
 605                 while (pci->phb->dma_window_size * children > 0x80000000ul)
 606                         pci->phb->dma_window_size >>= 1;
 607                 pr_debug("No ISA/IDE, window size is 0x%llx\n",
 608                          pci->phb->dma_window_size);
 609                 pci->phb->dma_window_base_cur = 0;
 610 
 611                 return;
 612         }
 613 
 614         /* If we have ISA, then we probably have an IDE
 615          * controller too. Allocate a 128MB table but
 616          * skip the first 128MB to avoid stepping on ISA
 617          * space.
 618          */
 619         pci->phb->dma_window_size = 0x8000000ul;
 620         pci->phb->dma_window_base_cur = 0x8000000ul;
 621 
 622         pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
 623         tbl = pci->table_group->tables[0];
 624 
 625         iommu_table_setparms(pci->phb, dn, tbl);
 626         tbl->it_ops = &iommu_table_pseries_ops;
 627         iommu_init_table(tbl, pci->phb->node, 0, 0);
 628 
 629         /* Divide the rest (1.75GB) among the children */
 630         pci->phb->dma_window_size = 0x80000000ul;
 631         while (pci->phb->dma_window_size * children > 0x70000000ul)
 632                 pci->phb->dma_window_size >>= 1;
 633 
 634         pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size);
 635 }
 636 
 637 #ifdef CONFIG_IOMMU_API
 638 static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
 639                                 long *tce, enum dma_data_direction *direction,
 640                                 bool realmode)
 641 {
 642         long rc;
 643         unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
 644         unsigned long flags, oldtce = 0;
 645         u64 proto_tce = iommu_direction_to_tce_perm(*direction);
 646         unsigned long newtce = *tce | proto_tce;
 647 
 648         spin_lock_irqsave(&tbl->large_pool.lock, flags);
 649 
 650         rc = plpar_tce_get((u64)tbl->it_index, ioba, &oldtce);
 651         if (!rc)
 652                 rc = plpar_tce_put((u64)tbl->it_index, ioba, newtce);
 653 
 654         if (!rc) {
 655                 *direction = iommu_tce_direction(oldtce);
 656                 *tce = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
 657         }
 658 
 659         spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
 660 
 661         return rc;
 662 }
 663 #endif
 664 
 665 struct iommu_table_ops iommu_table_lpar_multi_ops = {
 666         .set = tce_buildmulti_pSeriesLP,
 667 #ifdef CONFIG_IOMMU_API
 668         .xchg_no_kill = tce_exchange_pseries,
 669 #endif
 670         .clear = tce_freemulti_pSeriesLP,
 671         .get = tce_get_pSeriesLP
 672 };
 673 
 674 static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 675 {
 676         struct iommu_table *tbl;
 677         struct device_node *dn, *pdn;
 678         struct pci_dn *ppci;
 679         const __be32 *dma_window = NULL;
 680 
 681         dn = pci_bus_to_OF_node(bus);
 682 
 683         pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
 684                  dn);
 685 
 686         /* Find nearest ibm,dma-window, walking up the device tree */
 687         for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
 688                 dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
 689                 if (dma_window != NULL)
 690                         break;
 691         }
 692 
 693         if (dma_window == NULL) {
 694                 pr_debug("  no ibm,dma-window property !\n");
 695                 return;
 696         }
 697 
 698         ppci = PCI_DN(pdn);
 699 
 700         pr_debug("  parent is %pOF, iommu_table: 0x%p\n",
 701                  pdn, ppci->table_group);
 702 
 703         if (!ppci->table_group) {
 704                 ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
 705                 tbl = ppci->table_group->tables[0];
 706                 iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
 707                                 ppci->table_group, dma_window);
 708                 tbl->it_ops = &iommu_table_lpar_multi_ops;
 709                 iommu_init_table(tbl, ppci->phb->node, 0, 0);
 710                 iommu_register_group(ppci->table_group,
 711                                 pci_domain_nr(bus), 0);
 712                 pr_debug("  created table: %p\n", ppci->table_group);
 713         }
 714 }
 715 
 716 
 717 static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
 718 {
 719         struct device_node *dn;
 720         struct iommu_table *tbl;
 721 
 722         pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev));
 723 
 724         dn = dev->dev.of_node;
 725 
 726         /* If we're the direct child of a root bus, then we need to allocate
 727          * an iommu table ourselves. The bus setup code should have setup
 728          * the window sizes already.
 729          */
 730         if (!dev->bus->self) {
 731                 struct pci_controller *phb = PCI_DN(dn)->phb;
 732 
 733                 pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
 734                 PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node);
 735                 tbl = PCI_DN(dn)->table_group->tables[0];
 736                 iommu_table_setparms(phb, dn, tbl);
 737                 tbl->it_ops = &iommu_table_pseries_ops;
 738                 iommu_init_table(tbl, phb->node, 0, 0);
 739                 set_iommu_table_base(&dev->dev, tbl);
 740                 return;
 741         }
 742 
 743         /* If this device is further down the bus tree, search upwards until
 744          * an already allocated iommu table is found and use that.
 745          */
 746 
 747         while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL)
 748                 dn = dn->parent;
 749 
 750         if (dn && PCI_DN(dn))
 751                 set_iommu_table_base(&dev->dev,
 752                                 PCI_DN(dn)->table_group->tables[0]);
 753         else
 754                 printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
 755                        pci_name(dev));
 756 }
 757 
 758 static int __read_mostly disable_ddw;
 759 
 760 static int __init disable_ddw_setup(char *str)
 761 {
 762         disable_ddw = 1;
 763         printk(KERN_INFO "ppc iommu: disabling ddw.\n");
 764 
 765         return 0;
 766 }
 767 
 768 early_param("disable_ddw", disable_ddw_setup);
 769 
 770 static void remove_ddw(struct device_node *np, bool remove_prop)
 771 {
 772         struct dynamic_dma_window_prop *dwp;
 773         struct property *win64;
 774         u32 ddw_avail[3];
 775         u64 liobn;
 776         int ret = 0;
 777 
 778         ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
 779                                          &ddw_avail[0], 3);
 780 
 781         win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
 782         if (!win64)
 783                 return;
 784 
 785         if (ret || win64->length < sizeof(*dwp))
 786                 goto delprop;
 787 
 788         dwp = win64->value;
 789         liobn = (u64)be32_to_cpu(dwp->liobn);
 790 
 791         /* clear the whole window, note the arg is in kernel pages */
 792         ret = tce_clearrange_multi_pSeriesLP(0,
 793                 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
 794         if (ret)
 795                 pr_warn("%pOF failed to clear tces in window.\n",
 796                         np);
 797         else
 798                 pr_debug("%pOF successfully cleared tces in window.\n",
 799                          np);
 800 
 801         ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
 802         if (ret)
 803                 pr_warn("%pOF: failed to remove direct window: rtas returned "
 804                         "%d to ibm,remove-pe-dma-window(%x) %llx\n",
 805                         np, ret, ddw_avail[2], liobn);
 806         else
 807                 pr_debug("%pOF: successfully removed direct window: rtas returned "
 808                         "%d to ibm,remove-pe-dma-window(%x) %llx\n",
 809                         np, ret, ddw_avail[2], liobn);
 810 
 811 delprop:
 812         if (remove_prop)
 813                 ret = of_remove_property(np, win64);
 814         if (ret)
 815                 pr_warn("%pOF: failed to remove direct window property: %d\n",
 816                         np, ret);
 817 }
 818 
 819 static u64 find_existing_ddw(struct device_node *pdn)
 820 {
 821         struct direct_window *window;
 822         const struct dynamic_dma_window_prop *direct64;
 823         u64 dma_addr = 0;
 824 
 825         spin_lock(&direct_window_list_lock);
 826         /* check if we already created a window and dupe that config if so */
 827         list_for_each_entry(window, &direct_window_list, list) {
 828                 if (window->device == pdn) {
 829                         direct64 = window->prop;
 830                         dma_addr = be64_to_cpu(direct64->dma_base);
 831                         break;
 832                 }
 833         }
 834         spin_unlock(&direct_window_list_lock);
 835 
 836         return dma_addr;
 837 }
 838 
 839 static int find_existing_ddw_windows(void)
 840 {
 841         int len;
 842         struct device_node *pdn;
 843         struct direct_window *window;
 844         const struct dynamic_dma_window_prop *direct64;
 845 
 846         if (!firmware_has_feature(FW_FEATURE_LPAR))
 847                 return 0;
 848 
 849         for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
 850                 direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
 851                 if (!direct64)
 852                         continue;
 853 
 854                 window = kzalloc(sizeof(*window), GFP_KERNEL);
 855                 if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
 856                         kfree(window);
 857                         remove_ddw(pdn, true);
 858                         continue;
 859                 }
 860 
 861                 window->device = pdn;
 862                 window->prop = direct64;
 863                 spin_lock(&direct_window_list_lock);
 864                 list_add(&window->list, &direct_window_list);
 865                 spin_unlock(&direct_window_list_lock);
 866         }
 867 
 868         return 0;
 869 }
 870 machine_arch_initcall(pseries, find_existing_ddw_windows);
 871 
 872 static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 873                         struct ddw_query_response *query)
 874 {
 875         struct device_node *dn;
 876         struct pci_dn *pdn;
 877         u32 cfg_addr;
 878         u64 buid;
 879         int ret;
 880 
 881         /*
 882          * Get the config address and phb buid of the PE window.
 883          * Rely on eeh to retrieve this for us.
 884          * Retrieve them from the pci device, not the node with the
 885          * dma-window property
 886          */
 887         dn = pci_device_to_OF_node(dev);
 888         pdn = PCI_DN(dn);
 889         buid = pdn->phb->buid;
 890         cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
 891 
 892         ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
 893                   cfg_addr, BUID_HI(buid), BUID_LO(buid));
 894         dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
 895                 " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
 896                 BUID_LO(buid), ret);
 897         return ret;
 898 }
 899 
 900 static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 901                         struct ddw_create_response *create, int page_shift,
 902                         int window_shift)
 903 {
 904         struct device_node *dn;
 905         struct pci_dn *pdn;
 906         u32 cfg_addr;
 907         u64 buid;
 908         int ret;
 909 
 910         /*
 911          * Get the config address and phb buid of the PE window.
 912          * Rely on eeh to retrieve this for us.
 913          * Retrieve them from the pci device, not the node with the
 914          * dma-window property
 915          */
 916         dn = pci_device_to_OF_node(dev);
 917         pdn = PCI_DN(dn);
 918         buid = pdn->phb->buid;
 919         cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
 920 
 921         do {
 922                 /* extra outputs are LIOBN and dma-addr (hi, lo) */
 923                 ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create,
 924                                 cfg_addr, BUID_HI(buid), BUID_LO(buid),
 925                                 page_shift, window_shift);
 926         } while (rtas_busy_delay(ret));
 927         dev_info(&dev->dev,
 928                 "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
 929                 "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
 930                  cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
 931                  window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
 932 
 933         return ret;
 934 }
 935 
 936 struct failed_ddw_pdn {
 937         struct device_node *pdn;
 938         struct list_head list;
 939 };
 940 
 941 static LIST_HEAD(failed_ddw_pdn_list);
 942 
 943 static phys_addr_t ddw_memory_hotplug_max(void)
 944 {
 945         phys_addr_t max_addr = memory_hotplug_max();
 946         struct device_node *memory;
 947 
 948         for_each_node_by_type(memory, "memory") {
 949                 unsigned long start, size;
 950                 int n_mem_addr_cells, n_mem_size_cells, len;
 951                 const __be32 *memcell_buf;
 952 
 953                 memcell_buf = of_get_property(memory, "reg", &len);
 954                 if (!memcell_buf || len <= 0)
 955                         continue;
 956 
 957                 n_mem_addr_cells = of_n_addr_cells(memory);
 958                 n_mem_size_cells = of_n_size_cells(memory);
 959 
 960                 start = of_read_number(memcell_buf, n_mem_addr_cells);
 961                 memcell_buf += n_mem_addr_cells;
 962                 size = of_read_number(memcell_buf, n_mem_size_cells);
 963                 memcell_buf += n_mem_size_cells;
 964 
 965                 max_addr = max_t(phys_addr_t, max_addr, start + size);
 966         }
 967 
 968         return max_addr;
 969 }
 970 
 971 /*
 972  * If the PE supports dynamic dma windows, and there is space for a table
 973  * that can map all pages in a linear offset, then setup such a table,
 974  * and record the dma-offset in the struct device.
 975  *
 976  * dev: the pci device we are checking
 977  * pdn: the parent pe node with the ibm,dma_window property
 978  * Future: also check if we can remap the base window for our base page size
 979  *
 980  * returns the dma offset for use by the direct mapped DMA code.
 981  */
 982 static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 983 {
 984         int len, ret;
 985         struct ddw_query_response query;
 986         struct ddw_create_response create;
 987         int page_shift;
 988         u64 dma_addr, max_addr;
 989         struct device_node *dn;
 990         u32 ddw_avail[3];
 991         struct direct_window *window;
 992         struct property *win64;
 993         struct dynamic_dma_window_prop *ddwprop;
 994         struct failed_ddw_pdn *fpdn;
 995 
 996         mutex_lock(&direct_window_init_mutex);
 997 
 998         dma_addr = find_existing_ddw(pdn);
 999         if (dma_addr != 0)
1000                 goto out_unlock;
1001 
1002         /*
1003          * If we already went through this for a previous function of
1004          * the same device and failed, we don't want to muck with the
1005          * DMA window again, as it will race with in-flight operations
1006          * and can lead to EEHs. The above mutex protects access to the
1007          * list.
1008          */
1009         list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) {
1010                 if (fpdn->pdn == pdn)
1011                         goto out_unlock;
1012         }
1013 
1014         /*
1015          * the ibm,ddw-applicable property holds the tokens for:
1016          * ibm,query-pe-dma-window
1017          * ibm,create-pe-dma-window
1018          * ibm,remove-pe-dma-window
1019          * for the given node in that order.
1020          * the property is actually in the parent, not the PE
1021          */
1022         ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
1023                                          &ddw_avail[0], 3);
1024         if (ret)
1025                 goto out_failed;
1026 
1027        /*
1028          * Query if there is a second window of size to map the
1029          * whole partition.  Query returns number of windows, largest
1030          * block assigned to PE (partition endpoint), and two bitmasks
1031          * of page sizes: supported and supported for migrate-dma.
1032          */
1033         dn = pci_device_to_OF_node(dev);
1034         ret = query_ddw(dev, ddw_avail, &query);
1035         if (ret != 0)
1036                 goto out_failed;
1037 
1038         if (query.windows_available == 0) {
1039                 /*
1040                  * no additional windows are available for this device.
1041                  * We might be able to reallocate the existing window,
1042                  * trading in for a larger page size.
1043                  */
1044                 dev_dbg(&dev->dev, "no free dynamic windows");
1045                 goto out_failed;
1046         }
1047         if (query.page_size & 4) {
1048                 page_shift = 24; /* 16MB */
1049         } else if (query.page_size & 2) {
1050                 page_shift = 16; /* 64kB */
1051         } else if (query.page_size & 1) {
1052                 page_shift = 12; /* 4kB */
1053         } else {
1054                 dev_dbg(&dev->dev, "no supported direct page size in mask %x",
1055                           query.page_size);
1056                 goto out_failed;
1057         }
1058         /* verify the window * number of ptes will map the partition */
1059         /* check largest block * page size > max memory hotplug addr */
1060         max_addr = ddw_memory_hotplug_max();
1061         if (query.largest_available_block < (max_addr >> page_shift)) {
1062                 dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u "
1063                           "%llu-sized pages\n", max_addr,  query.largest_available_block,
1064                           1ULL << page_shift);
1065                 goto out_failed;
1066         }
1067         len = order_base_2(max_addr);
1068         win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
1069         if (!win64) {
1070                 dev_info(&dev->dev,
1071                         "couldn't allocate property for 64bit dma window\n");
1072                 goto out_failed;
1073         }
1074         win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
1075         win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
1076         win64->length = sizeof(*ddwprop);
1077         if (!win64->name || !win64->value) {
1078                 dev_info(&dev->dev,
1079                         "couldn't allocate property name and value\n");
1080                 goto out_free_prop;
1081         }
1082 
1083         ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
1084         if (ret != 0)
1085                 goto out_free_prop;
1086 
1087         ddwprop->liobn = cpu_to_be32(create.liobn);
1088         ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) |
1089                         create.addr_lo);
1090         ddwprop->tce_shift = cpu_to_be32(page_shift);
1091         ddwprop->window_shift = cpu_to_be32(len);
1092 
1093         dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n",
1094                   create.liobn, dn);
1095 
1096         window = kzalloc(sizeof(*window), GFP_KERNEL);
1097         if (!window)
1098                 goto out_clear_window;
1099 
1100         ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
1101                         win64->value, tce_setrange_multi_pSeriesLP_walk);
1102         if (ret) {
1103                 dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
1104                          dn, ret);
1105                 goto out_free_window;
1106         }
1107 
1108         ret = of_add_property(pdn, win64);
1109         if (ret) {
1110                 dev_err(&dev->dev, "unable to add dma window property for %pOF: %d",
1111                          pdn, ret);
1112                 goto out_free_window;
1113         }
1114 
1115         window->device = pdn;
1116         window->prop = ddwprop;
1117         spin_lock(&direct_window_list_lock);
1118         list_add(&window->list, &direct_window_list);
1119         spin_unlock(&direct_window_list_lock);
1120 
1121         dma_addr = be64_to_cpu(ddwprop->dma_base);
1122         goto out_unlock;
1123 
1124 out_free_window:
1125         kfree(window);
1126 
1127 out_clear_window:
1128         remove_ddw(pdn, true);
1129 
1130 out_free_prop:
1131         kfree(win64->name);
1132         kfree(win64->value);
1133         kfree(win64);
1134 
1135 out_failed:
1136 
1137         fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
1138         if (!fpdn)
1139                 goto out_unlock;
1140         fpdn->pdn = pdn;
1141         list_add(&fpdn->list, &failed_ddw_pdn_list);
1142 
1143 out_unlock:
1144         mutex_unlock(&direct_window_init_mutex);
1145         return dma_addr;
1146 }
1147 
1148 static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
1149 {
1150         struct device_node *pdn, *dn;
1151         struct iommu_table *tbl;
1152         const __be32 *dma_window = NULL;
1153         struct pci_dn *pci;
1154 
1155         pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
1156 
1157         /* dev setup for LPAR is a little tricky, since the device tree might
1158          * contain the dma-window properties per-device and not necessarily
1159          * for the bus. So we need to search upwards in the tree until we
1160          * either hit a dma-window property, OR find a parent with a table
1161          * already allocated.
1162          */
1163         dn = pci_device_to_OF_node(dev);
1164         pr_debug("  node is %pOF\n", dn);
1165 
1166         for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
1167              pdn = pdn->parent) {
1168                 dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1169                 if (dma_window)
1170                         break;
1171         }
1172 
1173         if (!pdn || !PCI_DN(pdn)) {
1174                 printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
1175                        "no DMA window found for pci dev=%s dn=%pOF\n",
1176                                  pci_name(dev), dn);
1177                 return;
1178         }
1179         pr_debug("  parent is %pOF\n", pdn);
1180 
1181         pci = PCI_DN(pdn);
1182         if (!pci->table_group) {
1183                 pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
1184                 tbl = pci->table_group->tables[0];
1185                 iommu_table_setparms_lpar(pci->phb, pdn, tbl,
1186                                 pci->table_group, dma_window);
1187                 tbl->it_ops = &iommu_table_lpar_multi_ops;
1188                 iommu_init_table(tbl, pci->phb->node, 0, 0);
1189                 iommu_register_group(pci->table_group,
1190                                 pci_domain_nr(pci->phb->bus), 0);
1191                 pr_debug("  created table: %p\n", pci->table_group);
1192         } else {
1193                 pr_debug("  found DMA window, table: %p\n", pci->table_group);
1194         }
1195 
1196         set_iommu_table_base(&dev->dev, pci->table_group->tables[0]);
1197         iommu_add_device(pci->table_group, &dev->dev);
1198 }
1199 
1200 static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
1201 {
1202         struct device_node *dn = pci_device_to_OF_node(pdev), *pdn;
1203         const __be32 *dma_window = NULL;
1204 
1205         /* only attempt to use a new window if 64-bit DMA is requested */
1206         if (dma_mask < DMA_BIT_MASK(64))
1207                 return false;
1208 
1209         dev_dbg(&pdev->dev, "node is %pOF\n", dn);
1210 
1211         /*
1212          * the device tree might contain the dma-window properties
1213          * per-device and not necessarily for the bus. So we need to
1214          * search upwards in the tree until we either hit a dma-window
1215          * property, OR find a parent with a table already allocated.
1216          */
1217         for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
1218                         pdn = pdn->parent) {
1219                 dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1220                 if (dma_window)
1221                         break;
1222         }
1223 
1224         if (pdn && PCI_DN(pdn)) {
1225                 pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn);
1226                 if (pdev->dev.archdata.dma_offset)
1227                         return true;
1228         }
1229 
1230         return false;
1231 }
1232 
1233 static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
1234                 void *data)
1235 {
1236         struct direct_window *window;
1237         struct memory_notify *arg = data;
1238         int ret = 0;
1239 
1240         switch (action) {
1241         case MEM_GOING_ONLINE:
1242                 spin_lock(&direct_window_list_lock);
1243                 list_for_each_entry(window, &direct_window_list, list) {
1244                         ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
1245                                         arg->nr_pages, window->prop);
1246                         /* XXX log error */
1247                 }
1248                 spin_unlock(&direct_window_list_lock);
1249                 break;
1250         case MEM_CANCEL_ONLINE:
1251         case MEM_OFFLINE:
1252                 spin_lock(&direct_window_list_lock);
1253                 list_for_each_entry(window, &direct_window_list, list) {
1254                         ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
1255                                         arg->nr_pages, window->prop);
1256                         /* XXX log error */
1257                 }
1258                 spin_unlock(&direct_window_list_lock);
1259                 break;
1260         default:
1261                 break;
1262         }
1263         if (ret && action != MEM_CANCEL_ONLINE)
1264                 return NOTIFY_BAD;
1265 
1266         return NOTIFY_OK;
1267 }
1268 
1269 static struct notifier_block iommu_mem_nb = {
1270         .notifier_call = iommu_mem_notifier,
1271 };
1272 
1273 static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
1274 {
1275         int err = NOTIFY_OK;
1276         struct of_reconfig_data *rd = data;
1277         struct device_node *np = rd->dn;
1278         struct pci_dn *pci = PCI_DN(np);
1279         struct direct_window *window;
1280 
1281         switch (action) {
1282         case OF_RECONFIG_DETACH_NODE:
1283                 /*
1284                  * Removing the property will invoke the reconfig
1285                  * notifier again, which causes dead-lock on the
1286                  * read-write semaphore of the notifier chain. So
1287                  * we have to remove the property when releasing
1288                  * the device node.
1289                  */
1290                 remove_ddw(np, false);
1291                 if (pci && pci->table_group)
1292                         iommu_pseries_free_group(pci->table_group,
1293                                         np->full_name);
1294 
1295                 spin_lock(&direct_window_list_lock);
1296                 list_for_each_entry(window, &direct_window_list, list) {
1297                         if (window->device == np) {
1298                                 list_del(&window->list);
1299                                 kfree(window);
1300                                 break;
1301                         }
1302                 }
1303                 spin_unlock(&direct_window_list_lock);
1304                 break;
1305         default:
1306                 err = NOTIFY_DONE;
1307                 break;
1308         }
1309         return err;
1310 }
1311 
1312 static struct notifier_block iommu_reconfig_nb = {
1313         .notifier_call = iommu_reconfig_notifier,
1314 };
1315 
1316 /* These are called very early. */
1317 void iommu_init_early_pSeries(void)
1318 {
1319         if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL))
1320                 return;
1321 
1322         if (firmware_has_feature(FW_FEATURE_LPAR)) {
1323                 pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
1324                 pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
1325                 if (!disable_ddw)
1326                         pseries_pci_controller_ops.iommu_bypass_supported =
1327                                 iommu_bypass_supported_pSeriesLP;
1328         } else {
1329                 pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
1330                 pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
1331         }
1332 
1333 
1334         of_reconfig_notifier_register(&iommu_reconfig_nb);
1335         register_memory_notifier(&iommu_mem_nb);
1336 
1337         set_pci_dma_ops(&dma_iommu_ops);
1338 }
1339 
1340 static int __init disable_multitce(char *str)
1341 {
1342         if (strcmp(str, "off") == 0 &&
1343             firmware_has_feature(FW_FEATURE_LPAR) &&
1344             firmware_has_feature(FW_FEATURE_MULTITCE)) {
1345                 printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
1346                 powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
1347         }
1348         return 1;
1349 }
1350 
1351 __setup("multitce=", disable_multitce);
1352 
1353 static int tce_iommu_bus_notifier(struct notifier_block *nb,
1354                 unsigned long action, void *data)
1355 {
1356         struct device *dev = data;
1357 
1358         switch (action) {
1359         case BUS_NOTIFY_DEL_DEVICE:
1360                 iommu_del_device(dev);
1361                 return 0;
1362         default:
1363                 return 0;
1364         }
1365 }
1366 
1367 static struct notifier_block tce_iommu_bus_nb = {
1368         .notifier_call = tce_iommu_bus_notifier,
1369 };
1370 
1371 static int __init tce_iommu_bus_notifier_init(void)
1372 {
1373         bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
1374         return 0;
1375 }
1376 machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);

/* [<][>][^][v][top][bottom][index][help] */