root/arch/x86/kernel/amd_gart_64.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. alloc_iommu
  2. free_iommu
  3. flush_gart
  4. dump_leak
  5. iommu_full
  6. need_iommu
  7. nonforced_iommu
  8. dma_map_area
  9. gart_map_page
  10. gart_unmap_page
  11. gart_unmap_sg
  12. dma_map_sg_nonforce
  13. __dma_map_cont
  14. dma_map_cont
  15. gart_map_sg
  16. gart_alloc_coherent
  17. gart_free_coherent
  18. check_iommu_size
  19. read_aperture
  20. enable_gart_translations
  21. set_up_gart_resume
  22. gart_fixup_northbridges
  23. gart_resume
  24. init_amd_gatt
  25. gart_iommu_shutdown
  26. gart_iommu_init
  27. gart_parse_options

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Dynamic DMA mapping support for AMD Hammer.
   4  *
   5  * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
   6  * This allows to use PCI devices that only support 32bit addresses on systems
   7  * with more than 4GB.
   8  *
   9  * See Documentation/DMA-API-HOWTO.txt for the interface specification.
  10  *
  11  * Copyright 2002 Andi Kleen, SuSE Labs.
  12  */
  13 
  14 #include <linux/types.h>
  15 #include <linux/ctype.h>
  16 #include <linux/agp_backend.h>
  17 #include <linux/init.h>
  18 #include <linux/mm.h>
  19 #include <linux/sched.h>
  20 #include <linux/sched/debug.h>
  21 #include <linux/string.h>
  22 #include <linux/spinlock.h>
  23 #include <linux/pci.h>
  24 #include <linux/topology.h>
  25 #include <linux/interrupt.h>
  26 #include <linux/bitmap.h>
  27 #include <linux/kdebug.h>
  28 #include <linux/scatterlist.h>
  29 #include <linux/iommu-helper.h>
  30 #include <linux/syscore_ops.h>
  31 #include <linux/io.h>
  32 #include <linux/gfp.h>
  33 #include <linux/atomic.h>
  34 #include <linux/dma-direct.h>
  35 #include <asm/mtrr.h>
  36 #include <asm/pgtable.h>
  37 #include <asm/proto.h>
  38 #include <asm/iommu.h>
  39 #include <asm/gart.h>
  40 #include <asm/set_memory.h>
  41 #include <asm/swiotlb.h>
  42 #include <asm/dma.h>
  43 #include <asm/amd_nb.h>
  44 #include <asm/x86_init.h>
  45 #include <asm/iommu_table.h>
  46 
  47 static unsigned long iommu_bus_base;    /* GART remapping area (physical) */
  48 static unsigned long iommu_size;        /* size of remapping area bytes */
  49 static unsigned long iommu_pages;       /* .. and in pages */
  50 
  51 static u32 *iommu_gatt_base;            /* Remapping table */
  52 
  53 /*
  54  * If this is disabled the IOMMU will use an optimized flushing strategy
  55  * of only flushing when an mapping is reused. With it true the GART is
  56  * flushed for every mapping. Problem is that doing the lazy flush seems
  57  * to trigger bugs with some popular PCI cards, in particular 3ware (but
  58  * has been also also seen with Qlogic at least).
  59  */
  60 static int iommu_fullflush = 1;
  61 
  62 /* Allocation bitmap for the remapping area: */
  63 static DEFINE_SPINLOCK(iommu_bitmap_lock);
  64 /* Guarded by iommu_bitmap_lock: */
  65 static unsigned long *iommu_gart_bitmap;
  66 
  67 static u32 gart_unmapped_entry;
  68 
  69 #define GPTE_VALID    1
  70 #define GPTE_COHERENT 2
  71 #define GPTE_ENCODE(x) \
  72         (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
  73 #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
  74 
  75 #ifdef CONFIG_AGP
  76 #define AGPEXTERN extern
  77 #else
  78 #define AGPEXTERN
  79 #endif
  80 
  81 /* GART can only remap to physical addresses < 1TB */
  82 #define GART_MAX_PHYS_ADDR      (1ULL << 40)
  83 
  84 /* backdoor interface to AGP driver */
  85 AGPEXTERN int agp_memory_reserved;
  86 AGPEXTERN __u32 *agp_gatt_table;
  87 
  88 static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
  89 static bool need_flush;         /* global flush state. set for each gart wrap */
  90 
  91 static unsigned long alloc_iommu(struct device *dev, int size,
  92                                  unsigned long align_mask)
  93 {
  94         unsigned long offset, flags;
  95         unsigned long boundary_size;
  96         unsigned long base_index;
  97 
  98         base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
  99                            PAGE_SIZE) >> PAGE_SHIFT;
 100         boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1,
 101                               PAGE_SIZE) >> PAGE_SHIFT;
 102 
 103         spin_lock_irqsave(&iommu_bitmap_lock, flags);
 104         offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
 105                                   size, base_index, boundary_size, align_mask);
 106         if (offset == -1) {
 107                 need_flush = true;
 108                 offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
 109                                           size, base_index, boundary_size,
 110                                           align_mask);
 111         }
 112         if (offset != -1) {
 113                 next_bit = offset+size;
 114                 if (next_bit >= iommu_pages) {
 115                         next_bit = 0;
 116                         need_flush = true;
 117                 }
 118         }
 119         if (iommu_fullflush)
 120                 need_flush = true;
 121         spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 122 
 123         return offset;
 124 }
 125 
 126 static void free_iommu(unsigned long offset, int size)
 127 {
 128         unsigned long flags;
 129 
 130         spin_lock_irqsave(&iommu_bitmap_lock, flags);
 131         bitmap_clear(iommu_gart_bitmap, offset, size);
 132         if (offset >= next_bit)
 133                 next_bit = offset + size;
 134         spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 135 }
 136 
 137 /*
 138  * Use global flush state to avoid races with multiple flushers.
 139  */
 140 static void flush_gart(void)
 141 {
 142         unsigned long flags;
 143 
 144         spin_lock_irqsave(&iommu_bitmap_lock, flags);
 145         if (need_flush) {
 146                 amd_flush_garts();
 147                 need_flush = false;
 148         }
 149         spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 150 }
 151 
 152 #ifdef CONFIG_IOMMU_LEAK
 153 /* Debugging aid for drivers that don't free their IOMMU tables */
 154 static void dump_leak(void)
 155 {
 156         static int dump;
 157 
 158         if (dump)
 159                 return;
 160         dump = 1;
 161 
 162         show_stack(NULL, NULL);
 163         debug_dma_dump_mappings(NULL);
 164 }
 165 #endif
 166 
 167 static void iommu_full(struct device *dev, size_t size, int dir)
 168 {
 169         /*
 170          * Ran out of IOMMU space for this operation. This is very bad.
 171          * Unfortunately the drivers cannot handle this operation properly.
 172          * Return some non mapped prereserved space in the aperture and
 173          * let the Northbridge deal with it. This will result in garbage
 174          * in the IO operation. When the size exceeds the prereserved space
 175          * memory corruption will occur or random memory will be DMAed
 176          * out. Hopefully no network devices use single mappings that big.
 177          */
 178 
 179         dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size);
 180 #ifdef CONFIG_IOMMU_LEAK
 181         dump_leak();
 182 #endif
 183 }
 184 
 185 static inline int
 186 need_iommu(struct device *dev, unsigned long addr, size_t size)
 187 {
 188         return force_iommu || !dma_capable(dev, addr, size);
 189 }
 190 
 191 static inline int
 192 nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
 193 {
 194         return !dma_capable(dev, addr, size);
 195 }
 196 
 197 /* Map a single continuous physical area into the IOMMU.
 198  * Caller needs to check if the iommu is needed and flush.
 199  */
 200 static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
 201                                 size_t size, int dir, unsigned long align_mask)
 202 {
 203         unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE);
 204         unsigned long iommu_page;
 205         int i;
 206 
 207         if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR))
 208                 return DMA_MAPPING_ERROR;
 209 
 210         iommu_page = alloc_iommu(dev, npages, align_mask);
 211         if (iommu_page == -1) {
 212                 if (!nonforced_iommu(dev, phys_mem, size))
 213                         return phys_mem;
 214                 if (panic_on_overflow)
 215                         panic("dma_map_area overflow %lu bytes\n", size);
 216                 iommu_full(dev, size, dir);
 217                 return DMA_MAPPING_ERROR;
 218         }
 219 
 220         for (i = 0; i < npages; i++) {
 221                 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
 222                 phys_mem += PAGE_SIZE;
 223         }
 224         return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
 225 }
 226 
 227 /* Map a single area into the IOMMU */
 228 static dma_addr_t gart_map_page(struct device *dev, struct page *page,
 229                                 unsigned long offset, size_t size,
 230                                 enum dma_data_direction dir,
 231                                 unsigned long attrs)
 232 {
 233         unsigned long bus;
 234         phys_addr_t paddr = page_to_phys(page) + offset;
 235 
 236         if (!need_iommu(dev, paddr, size))
 237                 return paddr;
 238 
 239         bus = dma_map_area(dev, paddr, size, dir, 0);
 240         flush_gart();
 241 
 242         return bus;
 243 }
 244 
 245 /*
 246  * Free a DMA mapping.
 247  */
 248 static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
 249                             size_t size, enum dma_data_direction dir,
 250                             unsigned long attrs)
 251 {
 252         unsigned long iommu_page;
 253         int npages;
 254         int i;
 255 
 256         if (WARN_ON_ONCE(dma_addr == DMA_MAPPING_ERROR))
 257                 return;
 258 
 259         /*
 260          * This driver will not always use a GART mapping, but might have
 261          * created a direct mapping instead.  If that is the case there is
 262          * nothing to unmap here.
 263          */
 264         if (dma_addr < iommu_bus_base ||
 265             dma_addr >= iommu_bus_base + iommu_size)
 266                 return;
 267 
 268         iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
 269         npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
 270         for (i = 0; i < npages; i++) {
 271                 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
 272         }
 273         free_iommu(iommu_page, npages);
 274 }
 275 
 276 /*
 277  * Wrapper for pci_unmap_single working with scatterlists.
 278  */
 279 static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 280                           enum dma_data_direction dir, unsigned long attrs)
 281 {
 282         struct scatterlist *s;
 283         int i;
 284 
 285         for_each_sg(sg, s, nents, i) {
 286                 if (!s->dma_length || !s->length)
 287                         break;
 288                 gart_unmap_page(dev, s->dma_address, s->dma_length, dir, 0);
 289         }
 290 }
 291 
 292 /* Fallback for dma_map_sg in case of overflow */
 293 static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
 294                                int nents, int dir)
 295 {
 296         struct scatterlist *s;
 297         int i;
 298 
 299 #ifdef CONFIG_IOMMU_DEBUG
 300         pr_debug("dma_map_sg overflow\n");
 301 #endif
 302 
 303         for_each_sg(sg, s, nents, i) {
 304                 unsigned long addr = sg_phys(s);
 305 
 306                 if (nonforced_iommu(dev, addr, s->length)) {
 307                         addr = dma_map_area(dev, addr, s->length, dir, 0);
 308                         if (addr == DMA_MAPPING_ERROR) {
 309                                 if (i > 0)
 310                                         gart_unmap_sg(dev, sg, i, dir, 0);
 311                                 nents = 0;
 312                                 sg[0].dma_length = 0;
 313                                 break;
 314                         }
 315                 }
 316                 s->dma_address = addr;
 317                 s->dma_length = s->length;
 318         }
 319         flush_gart();
 320 
 321         return nents;
 322 }
 323 
 324 /* Map multiple scatterlist entries continuous into the first. */
 325 static int __dma_map_cont(struct device *dev, struct scatterlist *start,
 326                           int nelems, struct scatterlist *sout,
 327                           unsigned long pages)
 328 {
 329         unsigned long iommu_start = alloc_iommu(dev, pages, 0);
 330         unsigned long iommu_page = iommu_start;
 331         struct scatterlist *s;
 332         int i;
 333 
 334         if (iommu_start == -1)
 335                 return -1;
 336 
 337         for_each_sg(start, s, nelems, i) {
 338                 unsigned long pages, addr;
 339                 unsigned long phys_addr = s->dma_address;
 340 
 341                 BUG_ON(s != start && s->offset);
 342                 if (s == start) {
 343                         sout->dma_address = iommu_bus_base;
 344                         sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
 345                         sout->dma_length = s->length;
 346                 } else {
 347                         sout->dma_length += s->length;
 348                 }
 349 
 350                 addr = phys_addr;
 351                 pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
 352                 while (pages--) {
 353                         iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
 354                         addr += PAGE_SIZE;
 355                         iommu_page++;
 356                 }
 357         }
 358         BUG_ON(iommu_page - iommu_start != pages);
 359 
 360         return 0;
 361 }
 362 
 363 static inline int
 364 dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
 365              struct scatterlist *sout, unsigned long pages, int need)
 366 {
 367         if (!need) {
 368                 BUG_ON(nelems != 1);
 369                 sout->dma_address = start->dma_address;
 370                 sout->dma_length = start->length;
 371                 return 0;
 372         }
 373         return __dma_map_cont(dev, start, nelems, sout, pages);
 374 }
 375 
 376 /*
 377  * DMA map all entries in a scatterlist.
 378  * Merge chunks that have page aligned sizes into a continuous mapping.
 379  */
 380 static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 381                        enum dma_data_direction dir, unsigned long attrs)
 382 {
 383         struct scatterlist *s, *ps, *start_sg, *sgmap;
 384         int need = 0, nextneed, i, out, start;
 385         unsigned long pages = 0;
 386         unsigned int seg_size;
 387         unsigned int max_seg_size;
 388 
 389         if (nents == 0)
 390                 return 0;
 391 
 392         out             = 0;
 393         start           = 0;
 394         start_sg        = sg;
 395         sgmap           = sg;
 396         seg_size        = 0;
 397         max_seg_size    = dma_get_max_seg_size(dev);
 398         ps              = NULL; /* shut up gcc */
 399 
 400         for_each_sg(sg, s, nents, i) {
 401                 dma_addr_t addr = sg_phys(s);
 402 
 403                 s->dma_address = addr;
 404                 BUG_ON(s->length == 0);
 405 
 406                 nextneed = need_iommu(dev, addr, s->length);
 407 
 408                 /* Handle the previous not yet processed entries */
 409                 if (i > start) {
 410                         /*
 411                          * Can only merge when the last chunk ends on a
 412                          * page boundary and the new one doesn't have an
 413                          * offset.
 414                          */
 415                         if (!iommu_merge || !nextneed || !need || s->offset ||
 416                             (s->length + seg_size > max_seg_size) ||
 417                             (ps->offset + ps->length) % PAGE_SIZE) {
 418                                 if (dma_map_cont(dev, start_sg, i - start,
 419                                                  sgmap, pages, need) < 0)
 420                                         goto error;
 421                                 out++;
 422 
 423                                 seg_size        = 0;
 424                                 sgmap           = sg_next(sgmap);
 425                                 pages           = 0;
 426                                 start           = i;
 427                                 start_sg        = s;
 428                         }
 429                 }
 430 
 431                 seg_size += s->length;
 432                 need = nextneed;
 433                 pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE);
 434                 ps = s;
 435         }
 436         if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
 437                 goto error;
 438         out++;
 439         flush_gart();
 440         if (out < nents) {
 441                 sgmap = sg_next(sgmap);
 442                 sgmap->dma_length = 0;
 443         }
 444         return out;
 445 
 446 error:
 447         flush_gart();
 448         gart_unmap_sg(dev, sg, out, dir, 0);
 449 
 450         /* When it was forced or merged try again in a dumb way */
 451         if (force_iommu || iommu_merge) {
 452                 out = dma_map_sg_nonforce(dev, sg, nents, dir);
 453                 if (out > 0)
 454                         return out;
 455         }
 456         if (panic_on_overflow)
 457                 panic("dma_map_sg: overflow on %lu pages\n", pages);
 458 
 459         iommu_full(dev, pages << PAGE_SHIFT, dir);
 460         for_each_sg(sg, s, nents, i)
 461                 s->dma_address = DMA_MAPPING_ERROR;
 462         return 0;
 463 }
 464 
 465 /* allocate and map a coherent mapping */
 466 static void *
 467 gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
 468                     gfp_t flag, unsigned long attrs)
 469 {
 470         void *vaddr;
 471 
 472         vaddr = dma_direct_alloc_pages(dev, size, dma_addr, flag, attrs);
 473         if (!vaddr ||
 474             !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24))
 475                 return vaddr;
 476 
 477         *dma_addr = dma_map_area(dev, virt_to_phys(vaddr), size,
 478                         DMA_BIDIRECTIONAL, (1UL << get_order(size)) - 1);
 479         flush_gart();
 480         if (unlikely(*dma_addr == DMA_MAPPING_ERROR))
 481                 goto out_free;
 482         return vaddr;
 483 out_free:
 484         dma_direct_free_pages(dev, size, vaddr, *dma_addr, attrs);
 485         return NULL;
 486 }
 487 
 488 /* free a coherent mapping */
 489 static void
 490 gart_free_coherent(struct device *dev, size_t size, void *vaddr,
 491                    dma_addr_t dma_addr, unsigned long attrs)
 492 {
 493         gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
 494         dma_direct_free_pages(dev, size, vaddr, dma_addr, attrs);
 495 }
 496 
 497 static int no_agp;
 498 
 499 static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
 500 {
 501         unsigned long a;
 502 
 503         if (!iommu_size) {
 504                 iommu_size = aper_size;
 505                 if (!no_agp)
 506                         iommu_size /= 2;
 507         }
 508 
 509         a = aper + iommu_size;
 510         iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
 511 
 512         if (iommu_size < 64*1024*1024) {
 513                 pr_warning(
 514                         "PCI-DMA: Warning: Small IOMMU %luMB."
 515                         " Consider increasing the AGP aperture in BIOS\n",
 516                                 iommu_size >> 20);
 517         }
 518 
 519         return iommu_size;
 520 }
 521 
 522 static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
 523 {
 524         unsigned aper_size = 0, aper_base_32, aper_order;
 525         u64 aper_base;
 526 
 527         pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32);
 528         pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order);
 529         aper_order = (aper_order >> 1) & 7;
 530 
 531         aper_base = aper_base_32 & 0x7fff;
 532         aper_base <<= 25;
 533 
 534         aper_size = (32 * 1024 * 1024) << aper_order;
 535         if (aper_base + aper_size > 0x100000000UL || !aper_size)
 536                 aper_base = 0;
 537 
 538         *size = aper_size;
 539         return aper_base;
 540 }
 541 
 542 static void enable_gart_translations(void)
 543 {
 544         int i;
 545 
 546         if (!amd_nb_has_feature(AMD_NB_GART))
 547                 return;
 548 
 549         for (i = 0; i < amd_nb_num(); i++) {
 550                 struct pci_dev *dev = node_to_amd_nb(i)->misc;
 551 
 552                 enable_gart_translation(dev, __pa(agp_gatt_table));
 553         }
 554 
 555         /* Flush the GART-TLB to remove stale entries */
 556         amd_flush_garts();
 557 }
 558 
 559 /*
 560  * If fix_up_north_bridges is set, the north bridges have to be fixed up on
 561  * resume in the same way as they are handled in gart_iommu_hole_init().
 562  */
 563 static bool fix_up_north_bridges;
 564 static u32 aperture_order;
 565 static u32 aperture_alloc;
 566 
 567 void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
 568 {
 569         fix_up_north_bridges = true;
 570         aperture_order = aper_order;
 571         aperture_alloc = aper_alloc;
 572 }
 573 
 574 static void gart_fixup_northbridges(void)
 575 {
 576         int i;
 577 
 578         if (!fix_up_north_bridges)
 579                 return;
 580 
 581         if (!amd_nb_has_feature(AMD_NB_GART))
 582                 return;
 583 
 584         pr_info("PCI-DMA: Restoring GART aperture settings\n");
 585 
 586         for (i = 0; i < amd_nb_num(); i++) {
 587                 struct pci_dev *dev = node_to_amd_nb(i)->misc;
 588 
 589                 /*
 590                  * Don't enable translations just yet.  That is the next
 591                  * step.  Restore the pre-suspend aperture settings.
 592                  */
 593                 gart_set_size_and_enable(dev, aperture_order);
 594                 pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25);
 595         }
 596 }
 597 
 598 static void gart_resume(void)
 599 {
 600         pr_info("PCI-DMA: Resuming GART IOMMU\n");
 601 
 602         gart_fixup_northbridges();
 603 
 604         enable_gart_translations();
 605 }
 606 
 607 static struct syscore_ops gart_syscore_ops = {
 608         .resume         = gart_resume,
 609 
 610 };
 611 
 612 /*
 613  * Private Northbridge GATT initialization in case we cannot use the
 614  * AGP driver for some reason.
 615  */
 616 static __init int init_amd_gatt(struct agp_kern_info *info)
 617 {
 618         unsigned aper_size, gatt_size, new_aper_size;
 619         unsigned aper_base, new_aper_base;
 620         struct pci_dev *dev;
 621         void *gatt;
 622         int i;
 623 
 624         pr_info("PCI-DMA: Disabling AGP.\n");
 625 
 626         aper_size = aper_base = info->aper_size = 0;
 627         dev = NULL;
 628         for (i = 0; i < amd_nb_num(); i++) {
 629                 dev = node_to_amd_nb(i)->misc;
 630                 new_aper_base = read_aperture(dev, &new_aper_size);
 631                 if (!new_aper_base)
 632                         goto nommu;
 633 
 634                 if (!aper_base) {
 635                         aper_size = new_aper_size;
 636                         aper_base = new_aper_base;
 637                 }
 638                 if (aper_size != new_aper_size || aper_base != new_aper_base)
 639                         goto nommu;
 640         }
 641         if (!aper_base)
 642                 goto nommu;
 643 
 644         info->aper_base = aper_base;
 645         info->aper_size = aper_size >> 20;
 646 
 647         gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
 648         gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 649                                         get_order(gatt_size));
 650         if (!gatt)
 651                 panic("Cannot allocate GATT table");
 652         if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
 653                 panic("Could not set GART PTEs to uncacheable pages");
 654 
 655         agp_gatt_table = gatt;
 656 
 657         register_syscore_ops(&gart_syscore_ops);
 658 
 659         flush_gart();
 660 
 661         pr_info("PCI-DMA: aperture base @ %x size %u KB\n",
 662                aper_base, aper_size>>10);
 663 
 664         return 0;
 665 
 666  nommu:
 667         /* Should not happen anymore */
 668         pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n"
 669                "falling back to iommu=soft.\n");
 670         return -1;
 671 }
 672 
 673 static const struct dma_map_ops gart_dma_ops = {
 674         .map_sg                         = gart_map_sg,
 675         .unmap_sg                       = gart_unmap_sg,
 676         .map_page                       = gart_map_page,
 677         .unmap_page                     = gart_unmap_page,
 678         .alloc                          = gart_alloc_coherent,
 679         .free                           = gart_free_coherent,
 680         .mmap                           = dma_common_mmap,
 681         .get_sgtable                    = dma_common_get_sgtable,
 682         .dma_supported                  = dma_direct_supported,
 683         .get_required_mask              = dma_direct_get_required_mask,
 684 };
 685 
 686 static void gart_iommu_shutdown(void)
 687 {
 688         struct pci_dev *dev;
 689         int i;
 690 
 691         /* don't shutdown it if there is AGP installed */
 692         if (!no_agp)
 693                 return;
 694 
 695         if (!amd_nb_has_feature(AMD_NB_GART))
 696                 return;
 697 
 698         for (i = 0; i < amd_nb_num(); i++) {
 699                 u32 ctl;
 700 
 701                 dev = node_to_amd_nb(i)->misc;
 702                 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
 703 
 704                 ctl &= ~GARTEN;
 705 
 706                 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
 707         }
 708 }
 709 
 710 int __init gart_iommu_init(void)
 711 {
 712         struct agp_kern_info info;
 713         unsigned long iommu_start;
 714         unsigned long aper_base, aper_size;
 715         unsigned long start_pfn, end_pfn;
 716         unsigned long scratch;
 717 
 718         if (!amd_nb_has_feature(AMD_NB_GART))
 719                 return 0;
 720 
 721 #ifndef CONFIG_AGP_AMD64
 722         no_agp = 1;
 723 #else
 724         /* Makefile puts PCI initialization via subsys_initcall first. */
 725         /* Add other AMD AGP bridge drivers here */
 726         no_agp = no_agp ||
 727                 (agp_amd64_init() < 0) ||
 728                 (agp_copy_info(agp_bridge, &info) < 0);
 729 #endif
 730 
 731         if (no_iommu ||
 732             (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
 733             !gart_iommu_aperture ||
 734             (no_agp && init_amd_gatt(&info) < 0)) {
 735                 if (max_pfn > MAX_DMA32_PFN) {
 736                         pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
 737                         pr_warning("falling back to iommu=soft.\n");
 738                 }
 739                 return 0;
 740         }
 741 
 742         /* need to map that range */
 743         aper_size       = info.aper_size << 20;
 744         aper_base       = info.aper_base;
 745         end_pfn         = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
 746 
 747         start_pfn = PFN_DOWN(aper_base);
 748         if (!pfn_range_is_mapped(start_pfn, end_pfn))
 749                 init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
 750 
 751         pr_info("PCI-DMA: using GART IOMMU.\n");
 752         iommu_size = check_iommu_size(info.aper_base, aper_size);
 753         iommu_pages = iommu_size >> PAGE_SHIFT;
 754 
 755         iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
 756                                                       get_order(iommu_pages/8));
 757         if (!iommu_gart_bitmap)
 758                 panic("Cannot allocate iommu bitmap\n");
 759 
 760         pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
 761                iommu_size >> 20);
 762 
 763         agp_memory_reserved     = iommu_size;
 764         iommu_start             = aper_size - iommu_size;
 765         iommu_bus_base          = info.aper_base + iommu_start;
 766         iommu_gatt_base         = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
 767 
 768         /*
 769          * Unmap the IOMMU part of the GART. The alias of the page is
 770          * always mapped with cache enabled and there is no full cache
 771          * coherency across the GART remapping. The unmapping avoids
 772          * automatic prefetches from the CPU allocating cache lines in
 773          * there. All CPU accesses are done via the direct mapping to
 774          * the backing memory. The GART address is only used by PCI
 775          * devices.
 776          */
 777         set_memory_np((unsigned long)__va(iommu_bus_base),
 778                                 iommu_size >> PAGE_SHIFT);
 779         /*
 780          * Tricky. The GART table remaps the physical memory range,
 781          * so the CPU wont notice potential aliases and if the memory
 782          * is remapped to UC later on, we might surprise the PCI devices
 783          * with a stray writeout of a cacheline. So play it sure and
 784          * do an explicit, full-scale wbinvd() _after_ having marked all
 785          * the pages as Not-Present:
 786          */
 787         wbinvd();
 788 
 789         /*
 790          * Now all caches are flushed and we can safely enable
 791          * GART hardware.  Doing it early leaves the possibility
 792          * of stale cache entries that can lead to GART PTE
 793          * errors.
 794          */
 795         enable_gart_translations();
 796 
 797         /*
 798          * Try to workaround a bug (thanks to BenH):
 799          * Set unmapped entries to a scratch page instead of 0.
 800          * Any prefetches that hit unmapped entries won't get an bus abort
 801          * then. (P2P bridge may be prefetching on DMA reads).
 802          */
 803         scratch = get_zeroed_page(GFP_KERNEL);
 804         if (!scratch)
 805                 panic("Cannot allocate iommu scratch page");
 806         gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
 807 
 808         flush_gart();
 809         dma_ops = &gart_dma_ops;
 810         x86_platform.iommu_shutdown = gart_iommu_shutdown;
 811         swiotlb = 0;
 812 
 813         return 0;
 814 }
 815 
 816 void __init gart_parse_options(char *p)
 817 {
 818         int arg;
 819 
 820         if (isdigit(*p) && get_option(&p, &arg))
 821                 iommu_size = arg;
 822         if (!strncmp(p, "fullflush", 9))
 823                 iommu_fullflush = 1;
 824         if (!strncmp(p, "nofullflush", 11))
 825                 iommu_fullflush = 0;
 826         if (!strncmp(p, "noagp", 5))
 827                 no_agp = 1;
 828         if (!strncmp(p, "noaperture", 10))
 829                 fix_aperture = 0;
 830         /* duplicated from pci-dma.c */
 831         if (!strncmp(p, "force", 5))
 832                 gart_iommu_aperture_allowed = 1;
 833         if (!strncmp(p, "allowed", 7))
 834                 gart_iommu_aperture_allowed = 1;
 835         if (!strncmp(p, "memaper", 7)) {
 836                 fallback_aper_force = 1;
 837                 p += 7;
 838                 if (*p == '=') {
 839                         ++p;
 840                         if (get_option(&p, &arg))
 841                                 fallback_aper_order = arg;
 842                 }
 843         }
 844 }
 845 IOMMU_INIT_POST(gart_iommu_hole_init);

/* [<][>][^][v][top][bottom][index][help] */