root/drivers/iommu/intel-iommu.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. agaw_to_level
  2. agaw_to_width
  3. width_to_agaw
  4. level_to_offset_bits
  5. pfn_level_offset
  6. level_mask
  7. level_size
  8. align_to_level
  9. lvl_to_nr_pages
  10. dma_to_mm_pfn
  11. mm_to_dma_pfn
  12. page_to_dma_pfn
  13. virt_to_dma_pfn
  14. root_entry_uctp
  15. context_clear_pasid_enable
  16. context_pasid_enabled
  17. context_set_copied
  18. context_copied
  19. __context_present
  20. context_present
  21. context_set_present
  22. context_set_fault_enable
  23. context_set_translation_type
  24. context_set_address_root
  25. context_set_address_width
  26. context_set_domain_id
  27. context_domain_id
  28. context_clear_entry
  29. for_each_device_domain
  30. translation_pre_enabled
  31. clear_translation_pre_enabled
  32. init_translation_status
  33. to_dmar_domain
  34. intel_iommu_setup
  35. get_iommu_domain
  36. set_iommu_domain
  37. alloc_pgtable_page
  38. free_pgtable_page
  39. alloc_domain_mem
  40. free_domain_mem
  41. alloc_devinfo_mem
  42. free_devinfo_mem
  43. domain_type_is_si
  44. domain_pfn_supported
  45. __iommu_calculate_agaw
  46. iommu_calculate_max_sagaw
  47. iommu_calculate_agaw
  48. domain_get_iommu
  49. domain_update_iommu_coherency
  50. domain_update_iommu_snooping
  51. domain_update_iommu_superpage
  52. domain_update_iommu_cap
  53. iommu_context_addr
  54. iommu_dummy
  55. is_downstream_to_pci_bridge
  56. device_to_iommu
  57. domain_flush_cache
  58. device_context_mapped
  59. free_context_table
  60. pfn_to_dma_pte
  61. dma_pfn_level_pte
  62. dma_pte_clear_range
  63. dma_pte_free_level
  64. dma_pte_free_pagetable
  65. dma_pte_list_pagetables
  66. dma_pte_clear_level
  67. domain_unmap
  68. dma_free_pagelist
  69. iova_entry_free
  70. iommu_alloc_root_entry
  71. iommu_set_root_entry
  72. iommu_flush_write_buffer
  73. __iommu_flush_context
  74. __iommu_flush_iotlb
  75. iommu_support_dev_iotlb
  76. domain_update_iotlb
  77. iommu_enable_dev_iotlb
  78. iommu_disable_dev_iotlb
  79. iommu_flush_dev_iotlb
  80. iommu_flush_iotlb_psi
  81. __mapping_notify_one
  82. iommu_flush_iova
  83. iommu_disable_protect_mem_regions
  84. iommu_enable_translation
  85. iommu_disable_translation
  86. iommu_init_domains
  87. disable_dmar_iommu
  88. free_dmar_iommu
  89. alloc_domain
  90. domain_attach_iommu
  91. domain_detach_iommu
  92. dmar_init_reserved_ranges
  93. domain_reserve_special_ranges
  94. guestwidth_to_adjustwidth
  95. domain_init
  96. domain_exit
  97. context_get_sm_pds
  98. context_set_sm_rid2pasid
  99. context_set_sm_dte
  100. context_set_sm_pre
  101. domain_context_mapping_one
  102. domain_context_mapping_cb
  103. domain_context_mapping
  104. domain_context_mapped_cb
  105. domain_context_mapped
  106. aligned_nrpages
  107. hardware_largepage_caps
  108. __domain_mapping
  109. domain_mapping
  110. domain_sg_mapping
  111. domain_pfn_mapping
  112. domain_context_clear_one
  113. unlink_domain_info
  114. domain_remove_dev_info
  115. find_domain
  116. dmar_search_domain_by_dev_info
  117. dmar_insert_one_dev_info
  118. get_last_alias
  119. find_or_alloc_domain
  120. set_domain_for_dev
  121. iommu_domain_identity_map
  122. domain_prepare_identity_map
  123. si_domain_init
  124. identity_mapping
  125. domain_add_dev_info
  126. device_has_rmrr
  127. device_rmrr_is_relaxable
  128. device_is_rmrr_locked
  129. device_def_domain_type
  130. intel_iommu_init_qi
  131. copy_context_table
  132. copy_translation_tables
  133. init_dmars
  134. intel_alloc_iova
  135. get_private_domain_for_dev
  136. iommu_need_mapping
  137. __intel_map_single
  138. intel_map_page
  139. intel_map_resource
  140. intel_unmap
  141. intel_unmap_page
  142. intel_unmap_resource
  143. intel_alloc_coherent
  144. intel_free_coherent
  145. intel_unmap_sg
  146. intel_map_sg
  147. intel_get_required_mask
  148. bounce_sync_single
  149. bounce_map_single
  150. bounce_unmap_single
  151. bounce_map_page
  152. bounce_map_resource
  153. bounce_unmap_page
  154. bounce_unmap_resource
  155. bounce_unmap_sg
  156. bounce_map_sg
  157. bounce_sync_single_for_cpu
  158. bounce_sync_single_for_device
  159. bounce_sync_sg_for_cpu
  160. bounce_sync_sg_for_device
  161. iommu_domain_cache_init
  162. iommu_devinfo_cache_init
  163. iommu_init_mempool
  164. iommu_exit_mempool
  165. quirk_ioat_snb_local_iommu
  166. init_no_remapping_devices
  167. init_iommu_hw
  168. iommu_flush_all
  169. iommu_suspend
  170. iommu_resume
  171. init_iommu_pm_ops
  172. init_iommu_pm_ops
  173. dmar_parse_one_rmrr
  174. dmar_find_atsr
  175. dmar_parse_one_atsr
  176. intel_iommu_free_atsr
  177. dmar_release_one_atsr
  178. dmar_check_one_atsr
  179. intel_iommu_add
  180. dmar_iommu_hotplug
  181. intel_iommu_free_dmars
  182. dmar_find_matched_atsr_unit
  183. dmar_iommu_notify_scope_dev
  184. intel_iommu_memory_notifier
  185. free_all_cpu_cached_iovas
  186. intel_iommu_cpu_dead
  187. intel_disable_iommus
  188. dev_to_intel_iommu
  189. intel_iommu_show_version
  190. intel_iommu_show_address
  191. intel_iommu_show_cap
  192. intel_iommu_show_ecap
  193. intel_iommu_show_ndoms
  194. intel_iommu_show_ndoms_used
  195. has_untrusted_dev
  196. platform_optin_force_iommu
  197. probe_acpi_namespace_devices
  198. intel_iommu_init
  199. domain_context_clear_one_cb
  200. domain_context_clear
  201. __dmar_remove_one_dev_info
  202. dmar_remove_one_dev_info
  203. md_domain_init
  204. intel_iommu_domain_alloc
  205. intel_iommu_domain_free
  206. is_aux_domain
  207. auxiliary_link_device
  208. auxiliary_unlink_device
  209. aux_domain_add_dev
  210. aux_domain_remove_dev
  211. prepare_domain_attach_device
  212. intel_iommu_attach_device
  213. intel_iommu_aux_attach_device
  214. intel_iommu_detach_device
  215. intel_iommu_aux_detach_device
  216. intel_iommu_map
  217. intel_iommu_unmap
  218. intel_iommu_iova_to_phys
  219. scalable_mode_support
  220. iommu_pasid_support
  221. intel_iommu_capable
  222. intel_iommu_add_device
  223. intel_iommu_remove_device
  224. intel_iommu_get_resv_regions
  225. intel_iommu_put_resv_regions
  226. intel_iommu_enable_pasid
  227. intel_iommu_apply_resv_region
  228. intel_iommu_device_group
  229. intel_svm_device_to_iommu
  230. intel_iommu_enable_auxd
  231. intel_iommu_disable_auxd
  232. siov_find_pci_dvsec
  233. intel_iommu_dev_has_feat
  234. intel_iommu_dev_enable_feat
  235. intel_iommu_dev_disable_feat
  236. intel_iommu_dev_feat_enabled
  237. intel_iommu_aux_get_pasid
  238. intel_iommu_is_attach_deferred
  239. quirk_iommu_igfx
  240. quirk_iommu_rwbf
  241. quirk_calpella_no_shadow_gtt
  242. check_tylersburg_isoch

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright © 2006-2014 Intel Corporation.
   4  *
   5  * Authors: David Woodhouse <dwmw2@infradead.org>,
   6  *          Ashok Raj <ashok.raj@intel.com>,
   7  *          Shaohua Li <shaohua.li@intel.com>,
   8  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
   9  *          Fenghua Yu <fenghua.yu@intel.com>
  10  *          Joerg Roedel <jroedel@suse.de>
  11  */
  12 
  13 #define pr_fmt(fmt)     "DMAR: " fmt
  14 #define dev_fmt(fmt)    pr_fmt(fmt)
  15 
  16 #include <linux/init.h>
  17 #include <linux/bitmap.h>
  18 #include <linux/debugfs.h>
  19 #include <linux/export.h>
  20 #include <linux/slab.h>
  21 #include <linux/irq.h>
  22 #include <linux/interrupt.h>
  23 #include <linux/spinlock.h>
  24 #include <linux/pci.h>
  25 #include <linux/dmar.h>
  26 #include <linux/dma-mapping.h>
  27 #include <linux/mempool.h>
  28 #include <linux/memory.h>
  29 #include <linux/cpu.h>
  30 #include <linux/timer.h>
  31 #include <linux/io.h>
  32 #include <linux/iova.h>
  33 #include <linux/iommu.h>
  34 #include <linux/intel-iommu.h>
  35 #include <linux/syscore_ops.h>
  36 #include <linux/tboot.h>
  37 #include <linux/dmi.h>
  38 #include <linux/pci-ats.h>
  39 #include <linux/memblock.h>
  40 #include <linux/dma-contiguous.h>
  41 #include <linux/dma-direct.h>
  42 #include <linux/crash_dump.h>
  43 #include <linux/numa.h>
  44 #include <linux/swiotlb.h>
  45 #include <asm/irq_remapping.h>
  46 #include <asm/cacheflush.h>
  47 #include <asm/iommu.h>
  48 #include <trace/events/intel_iommu.h>
  49 
  50 #include "irq_remapping.h"
  51 #include "intel-pasid.h"
  52 
  53 #define ROOT_SIZE               VTD_PAGE_SIZE
  54 #define CONTEXT_SIZE            VTD_PAGE_SIZE
  55 
  56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
  57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
  58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
  59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
  60 
  61 #define IOAPIC_RANGE_START      (0xfee00000)
  62 #define IOAPIC_RANGE_END        (0xfeefffff)
  63 #define IOVA_START_ADDR         (0x1000)
  64 
  65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
  66 
  67 #define MAX_AGAW_WIDTH 64
  68 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
  69 
  70 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
  71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
  72 
  73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
  74    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
  75 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
  76                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
  77 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
  78 
  79 /* IO virtual address start page frame number */
  80 #define IOVA_START_PFN          (1)
  81 
  82 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
  83 
  84 /* page table handling */
  85 #define LEVEL_STRIDE            (9)
  86 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
  87 
  88 /*
  89  * This bitmap is used to advertise the page sizes our hardware support
  90  * to the IOMMU core, which will then use this information to split
  91  * physically contiguous memory regions it is mapping into page sizes
  92  * that we support.
  93  *
  94  * Traditionally the IOMMU core just handed us the mappings directly,
  95  * after making sure the size is an order of a 4KiB page and that the
  96  * mapping has natural alignment.
  97  *
  98  * To retain this behavior, we currently advertise that we support
  99  * all page sizes that are an order of 4KiB.
 100  *
 101  * If at some point we'd like to utilize the IOMMU core's new behavior,
 102  * we could change this to advertise the real page sizes we support.
 103  */
 104 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
 105 
 106 static inline int agaw_to_level(int agaw)
 107 {
 108         return agaw + 2;
 109 }
 110 
 111 static inline int agaw_to_width(int agaw)
 112 {
 113         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
 114 }
 115 
 116 static inline int width_to_agaw(int width)
 117 {
 118         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
 119 }
 120 
 121 static inline unsigned int level_to_offset_bits(int level)
 122 {
 123         return (level - 1) * LEVEL_STRIDE;
 124 }
 125 
 126 static inline int pfn_level_offset(unsigned long pfn, int level)
 127 {
 128         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
 129 }
 130 
 131 static inline unsigned long level_mask(int level)
 132 {
 133         return -1UL << level_to_offset_bits(level);
 134 }
 135 
 136 static inline unsigned long level_size(int level)
 137 {
 138         return 1UL << level_to_offset_bits(level);
 139 }
 140 
 141 static inline unsigned long align_to_level(unsigned long pfn, int level)
 142 {
 143         return (pfn + level_size(level) - 1) & level_mask(level);
 144 }
 145 
 146 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
 147 {
 148         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
 149 }
 150 
 151 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
 152    are never going to work. */
 153 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
 154 {
 155         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
 156 }
 157 
 158 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
 159 {
 160         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
 161 }
 162 static inline unsigned long page_to_dma_pfn(struct page *pg)
 163 {
 164         return mm_to_dma_pfn(page_to_pfn(pg));
 165 }
 166 static inline unsigned long virt_to_dma_pfn(void *p)
 167 {
 168         return page_to_dma_pfn(virt_to_page(p));
 169 }
 170 
 171 /* global iommu list, set NULL for ignored DMAR units */
 172 static struct intel_iommu **g_iommus;
 173 
 174 static void __init check_tylersburg_isoch(void);
 175 static int rwbf_quirk;
 176 
 177 /*
 178  * set to 1 to panic kernel if can't successfully enable VT-d
 179  * (used when kernel is launched w/ TXT)
 180  */
 181 static int force_on = 0;
 182 int intel_iommu_tboot_noforce;
 183 static int no_platform_optin;
 184 
 185 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
 186 
 187 /*
 188  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
 189  * if marked present.
 190  */
 191 static phys_addr_t root_entry_lctp(struct root_entry *re)
 192 {
 193         if (!(re->lo & 1))
 194                 return 0;
 195 
 196         return re->lo & VTD_PAGE_MASK;
 197 }
 198 
 199 /*
 200  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
 201  * if marked present.
 202  */
 203 static phys_addr_t root_entry_uctp(struct root_entry *re)
 204 {
 205         if (!(re->hi & 1))
 206                 return 0;
 207 
 208         return re->hi & VTD_PAGE_MASK;
 209 }
 210 
 211 static inline void context_clear_pasid_enable(struct context_entry *context)
 212 {
 213         context->lo &= ~(1ULL << 11);
 214 }
 215 
 216 static inline bool context_pasid_enabled(struct context_entry *context)
 217 {
 218         return !!(context->lo & (1ULL << 11));
 219 }
 220 
 221 static inline void context_set_copied(struct context_entry *context)
 222 {
 223         context->hi |= (1ull << 3);
 224 }
 225 
 226 static inline bool context_copied(struct context_entry *context)
 227 {
 228         return !!(context->hi & (1ULL << 3));
 229 }
 230 
 231 static inline bool __context_present(struct context_entry *context)
 232 {
 233         return (context->lo & 1);
 234 }
 235 
 236 bool context_present(struct context_entry *context)
 237 {
 238         return context_pasid_enabled(context) ?
 239              __context_present(context) :
 240              __context_present(context) && !context_copied(context);
 241 }
 242 
 243 static inline void context_set_present(struct context_entry *context)
 244 {
 245         context->lo |= 1;
 246 }
 247 
 248 static inline void context_set_fault_enable(struct context_entry *context)
 249 {
 250         context->lo &= (((u64)-1) << 2) | 1;
 251 }
 252 
 253 static inline void context_set_translation_type(struct context_entry *context,
 254                                                 unsigned long value)
 255 {
 256         context->lo &= (((u64)-1) << 4) | 3;
 257         context->lo |= (value & 3) << 2;
 258 }
 259 
 260 static inline void context_set_address_root(struct context_entry *context,
 261                                             unsigned long value)
 262 {
 263         context->lo &= ~VTD_PAGE_MASK;
 264         context->lo |= value & VTD_PAGE_MASK;
 265 }
 266 
 267 static inline void context_set_address_width(struct context_entry *context,
 268                                              unsigned long value)
 269 {
 270         context->hi |= value & 7;
 271 }
 272 
 273 static inline void context_set_domain_id(struct context_entry *context,
 274                                          unsigned long value)
 275 {
 276         context->hi |= (value & ((1 << 16) - 1)) << 8;
 277 }
 278 
 279 static inline int context_domain_id(struct context_entry *c)
 280 {
 281         return((c->hi >> 8) & 0xffff);
 282 }
 283 
 284 static inline void context_clear_entry(struct context_entry *context)
 285 {
 286         context->lo = 0;
 287         context->hi = 0;
 288 }
 289 
 290 /*
 291  * This domain is a statically identity mapping domain.
 292  *      1. This domain creats a static 1:1 mapping to all usable memory.
 293  *      2. It maps to each iommu if successful.
 294  *      3. Each iommu mapps to this domain if successful.
 295  */
 296 static struct dmar_domain *si_domain;
 297 static int hw_pass_through = 1;
 298 
 299 /* si_domain contains mulitple devices */
 300 #define DOMAIN_FLAG_STATIC_IDENTITY             BIT(0)
 301 
 302 /*
 303  * This is a DMA domain allocated through the iommu domain allocation
 304  * interface. But one or more devices belonging to this domain have
 305  * been chosen to use a private domain. We should avoid to use the
 306  * map/unmap/iova_to_phys APIs on it.
 307  */
 308 #define DOMAIN_FLAG_LOSE_CHILDREN               BIT(1)
 309 
 310 #define for_each_domain_iommu(idx, domain)                      \
 311         for (idx = 0; idx < g_num_of_iommus; idx++)             \
 312                 if (domain->iommu_refcnt[idx])
 313 
 314 struct dmar_rmrr_unit {
 315         struct list_head list;          /* list of rmrr units   */
 316         struct acpi_dmar_header *hdr;   /* ACPI header          */
 317         u64     base_address;           /* reserved base address*/
 318         u64     end_address;            /* reserved end address */
 319         struct dmar_dev_scope *devices; /* target devices */
 320         int     devices_cnt;            /* target device count */
 321 };
 322 
 323 struct dmar_atsr_unit {
 324         struct list_head list;          /* list of ATSR units */
 325         struct acpi_dmar_header *hdr;   /* ACPI header */
 326         struct dmar_dev_scope *devices; /* target devices */
 327         int devices_cnt;                /* target device count */
 328         u8 include_all:1;               /* include all ports */
 329 };
 330 
 331 static LIST_HEAD(dmar_atsr_units);
 332 static LIST_HEAD(dmar_rmrr_units);
 333 
 334 #define for_each_rmrr_units(rmrr) \
 335         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
 336 
 337 /* bitmap for indexing intel_iommus */
 338 static int g_num_of_iommus;
 339 
 340 static void domain_exit(struct dmar_domain *domain);
 341 static void domain_remove_dev_info(struct dmar_domain *domain);
 342 static void dmar_remove_one_dev_info(struct device *dev);
 343 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
 344 static void domain_context_clear(struct intel_iommu *iommu,
 345                                  struct device *dev);
 346 static int domain_detach_iommu(struct dmar_domain *domain,
 347                                struct intel_iommu *iommu);
 348 static bool device_is_rmrr_locked(struct device *dev);
 349 static int intel_iommu_attach_device(struct iommu_domain *domain,
 350                                      struct device *dev);
 351 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
 352                                             dma_addr_t iova);
 353 
 354 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
 355 int dmar_disabled = 0;
 356 #else
 357 int dmar_disabled = 1;
 358 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
 359 
 360 int intel_iommu_sm;
 361 int intel_iommu_enabled = 0;
 362 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
 363 
 364 static int dmar_map_gfx = 1;
 365 static int dmar_forcedac;
 366 static int intel_iommu_strict;
 367 static int intel_iommu_superpage = 1;
 368 static int iommu_identity_mapping;
 369 static int intel_no_bounce;
 370 
 371 #define IDENTMAP_ALL            1
 372 #define IDENTMAP_GFX            2
 373 #define IDENTMAP_AZALIA         4
 374 
 375 int intel_iommu_gfx_mapped;
 376 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
 377 
 378 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 379 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
 380 static DEFINE_SPINLOCK(device_domain_lock);
 381 static LIST_HEAD(device_domain_list);
 382 
 383 #define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) &&    \
 384                                 to_pci_dev(d)->untrusted)
 385 
 386 /*
 387  * Iterate over elements in device_domain_list and call the specified
 388  * callback @fn against each element.
 389  */
 390 int for_each_device_domain(int (*fn)(struct device_domain_info *info,
 391                                      void *data), void *data)
 392 {
 393         int ret = 0;
 394         unsigned long flags;
 395         struct device_domain_info *info;
 396 
 397         spin_lock_irqsave(&device_domain_lock, flags);
 398         list_for_each_entry(info, &device_domain_list, global) {
 399                 ret = fn(info, data);
 400                 if (ret) {
 401                         spin_unlock_irqrestore(&device_domain_lock, flags);
 402                         return ret;
 403                 }
 404         }
 405         spin_unlock_irqrestore(&device_domain_lock, flags);
 406 
 407         return 0;
 408 }
 409 
 410 const struct iommu_ops intel_iommu_ops;
 411 
 412 static bool translation_pre_enabled(struct intel_iommu *iommu)
 413 {
 414         return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
 415 }
 416 
 417 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
 418 {
 419         iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
 420 }
 421 
 422 static void init_translation_status(struct intel_iommu *iommu)
 423 {
 424         u32 gsts;
 425 
 426         gsts = readl(iommu->reg + DMAR_GSTS_REG);
 427         if (gsts & DMA_GSTS_TES)
 428                 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
 429 }
 430 
 431 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
 432 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
 433 {
 434         return container_of(dom, struct dmar_domain, domain);
 435 }
 436 
 437 static int __init intel_iommu_setup(char *str)
 438 {
 439         if (!str)
 440                 return -EINVAL;
 441         while (*str) {
 442                 if (!strncmp(str, "on", 2)) {
 443                         dmar_disabled = 0;
 444                         pr_info("IOMMU enabled\n");
 445                 } else if (!strncmp(str, "off", 3)) {
 446                         dmar_disabled = 1;
 447                         no_platform_optin = 1;
 448                         pr_info("IOMMU disabled\n");
 449                 } else if (!strncmp(str, "igfx_off", 8)) {
 450                         dmar_map_gfx = 0;
 451                         pr_info("Disable GFX device mapping\n");
 452                 } else if (!strncmp(str, "forcedac", 8)) {
 453                         pr_info("Forcing DAC for PCI devices\n");
 454                         dmar_forcedac = 1;
 455                 } else if (!strncmp(str, "strict", 6)) {
 456                         pr_info("Disable batched IOTLB flush\n");
 457                         intel_iommu_strict = 1;
 458                 } else if (!strncmp(str, "sp_off", 6)) {
 459                         pr_info("Disable supported super page\n");
 460                         intel_iommu_superpage = 0;
 461                 } else if (!strncmp(str, "sm_on", 5)) {
 462                         pr_info("Intel-IOMMU: scalable mode supported\n");
 463                         intel_iommu_sm = 1;
 464                 } else if (!strncmp(str, "tboot_noforce", 13)) {
 465                         printk(KERN_INFO
 466                                 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
 467                         intel_iommu_tboot_noforce = 1;
 468                 } else if (!strncmp(str, "nobounce", 8)) {
 469                         pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
 470                         intel_no_bounce = 1;
 471                 }
 472 
 473                 str += strcspn(str, ",");
 474                 while (*str == ',')
 475                         str++;
 476         }
 477         return 0;
 478 }
 479 __setup("intel_iommu=", intel_iommu_setup);
 480 
 481 static struct kmem_cache *iommu_domain_cache;
 482 static struct kmem_cache *iommu_devinfo_cache;
 483 
 484 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
 485 {
 486         struct dmar_domain **domains;
 487         int idx = did >> 8;
 488 
 489         domains = iommu->domains[idx];
 490         if (!domains)
 491                 return NULL;
 492 
 493         return domains[did & 0xff];
 494 }
 495 
 496 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
 497                              struct dmar_domain *domain)
 498 {
 499         struct dmar_domain **domains;
 500         int idx = did >> 8;
 501 
 502         if (!iommu->domains[idx]) {
 503                 size_t size = 256 * sizeof(struct dmar_domain *);
 504                 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
 505         }
 506 
 507         domains = iommu->domains[idx];
 508         if (WARN_ON(!domains))
 509                 return;
 510         else
 511                 domains[did & 0xff] = domain;
 512 }
 513 
 514 void *alloc_pgtable_page(int node)
 515 {
 516         struct page *page;
 517         void *vaddr = NULL;
 518 
 519         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
 520         if (page)
 521                 vaddr = page_address(page);
 522         return vaddr;
 523 }
 524 
 525 void free_pgtable_page(void *vaddr)
 526 {
 527         free_page((unsigned long)vaddr);
 528 }
 529 
 530 static inline void *alloc_domain_mem(void)
 531 {
 532         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
 533 }
 534 
 535 static void free_domain_mem(void *vaddr)
 536 {
 537         kmem_cache_free(iommu_domain_cache, vaddr);
 538 }
 539 
 540 static inline void * alloc_devinfo_mem(void)
 541 {
 542         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
 543 }
 544 
 545 static inline void free_devinfo_mem(void *vaddr)
 546 {
 547         kmem_cache_free(iommu_devinfo_cache, vaddr);
 548 }
 549 
 550 static inline int domain_type_is_si(struct dmar_domain *domain)
 551 {
 552         return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
 553 }
 554 
 555 static inline int domain_pfn_supported(struct dmar_domain *domain,
 556                                        unsigned long pfn)
 557 {
 558         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
 559 
 560         return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
 561 }
 562 
 563 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
 564 {
 565         unsigned long sagaw;
 566         int agaw = -1;
 567 
 568         sagaw = cap_sagaw(iommu->cap);
 569         for (agaw = width_to_agaw(max_gaw);
 570              agaw >= 0; agaw--) {
 571                 if (test_bit(agaw, &sagaw))
 572                         break;
 573         }
 574 
 575         return agaw;
 576 }
 577 
 578 /*
 579  * Calculate max SAGAW for each iommu.
 580  */
 581 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
 582 {
 583         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
 584 }
 585 
 586 /*
 587  * calculate agaw for each iommu.
 588  * "SAGAW" may be different across iommus, use a default agaw, and
 589  * get a supported less agaw for iommus that don't support the default agaw.
 590  */
 591 int iommu_calculate_agaw(struct intel_iommu *iommu)
 592 {
 593         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
 594 }
 595 
 596 /* This functionin only returns single iommu in a domain */
 597 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 598 {
 599         int iommu_id;
 600 
 601         /* si_domain and vm domain should not get here. */
 602         if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
 603                 return NULL;
 604 
 605         for_each_domain_iommu(iommu_id, domain)
 606                 break;
 607 
 608         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
 609                 return NULL;
 610 
 611         return g_iommus[iommu_id];
 612 }
 613 
 614 static void domain_update_iommu_coherency(struct dmar_domain *domain)
 615 {
 616         struct dmar_drhd_unit *drhd;
 617         struct intel_iommu *iommu;
 618         bool found = false;
 619         int i;
 620 
 621         domain->iommu_coherency = 1;
 622 
 623         for_each_domain_iommu(i, domain) {
 624                 found = true;
 625                 if (!ecap_coherent(g_iommus[i]->ecap)) {
 626                         domain->iommu_coherency = 0;
 627                         break;
 628                 }
 629         }
 630         if (found)
 631                 return;
 632 
 633         /* No hardware attached; use lowest common denominator */
 634         rcu_read_lock();
 635         for_each_active_iommu(iommu, drhd) {
 636                 if (!ecap_coherent(iommu->ecap)) {
 637                         domain->iommu_coherency = 0;
 638                         break;
 639                 }
 640         }
 641         rcu_read_unlock();
 642 }
 643 
 644 static int domain_update_iommu_snooping(struct intel_iommu *skip)
 645 {
 646         struct dmar_drhd_unit *drhd;
 647         struct intel_iommu *iommu;
 648         int ret = 1;
 649 
 650         rcu_read_lock();
 651         for_each_active_iommu(iommu, drhd) {
 652                 if (iommu != skip) {
 653                         if (!ecap_sc_support(iommu->ecap)) {
 654                                 ret = 0;
 655                                 break;
 656                         }
 657                 }
 658         }
 659         rcu_read_unlock();
 660 
 661         return ret;
 662 }
 663 
 664 static int domain_update_iommu_superpage(struct intel_iommu *skip)
 665 {
 666         struct dmar_drhd_unit *drhd;
 667         struct intel_iommu *iommu;
 668         int mask = 0xf;
 669 
 670         if (!intel_iommu_superpage) {
 671                 return 0;
 672         }
 673 
 674         /* set iommu_superpage to the smallest common denominator */
 675         rcu_read_lock();
 676         for_each_active_iommu(iommu, drhd) {
 677                 if (iommu != skip) {
 678                         mask &= cap_super_page_val(iommu->cap);
 679                         if (!mask)
 680                                 break;
 681                 }
 682         }
 683         rcu_read_unlock();
 684 
 685         return fls(mask);
 686 }
 687 
 688 /* Some capabilities may be different across iommus */
 689 static void domain_update_iommu_cap(struct dmar_domain *domain)
 690 {
 691         domain_update_iommu_coherency(domain);
 692         domain->iommu_snooping = domain_update_iommu_snooping(NULL);
 693         domain->iommu_superpage = domain_update_iommu_superpage(NULL);
 694 }
 695 
 696 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
 697                                          u8 devfn, int alloc)
 698 {
 699         struct root_entry *root = &iommu->root_entry[bus];
 700         struct context_entry *context;
 701         u64 *entry;
 702 
 703         entry = &root->lo;
 704         if (sm_supported(iommu)) {
 705                 if (devfn >= 0x80) {
 706                         devfn -= 0x80;
 707                         entry = &root->hi;
 708                 }
 709                 devfn *= 2;
 710         }
 711         if (*entry & 1)
 712                 context = phys_to_virt(*entry & VTD_PAGE_MASK);
 713         else {
 714                 unsigned long phy_addr;
 715                 if (!alloc)
 716                         return NULL;
 717 
 718                 context = alloc_pgtable_page(iommu->node);
 719                 if (!context)
 720                         return NULL;
 721 
 722                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
 723                 phy_addr = virt_to_phys((void *)context);
 724                 *entry = phy_addr | 1;
 725                 __iommu_flush_cache(iommu, entry, sizeof(*entry));
 726         }
 727         return &context[devfn];
 728 }
 729 
 730 static int iommu_dummy(struct device *dev)
 731 {
 732         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
 733 }
 734 
 735 /**
 736  * is_downstream_to_pci_bridge - test if a device belongs to the PCI
 737  *                               sub-hierarchy of a candidate PCI-PCI bridge
 738  * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
 739  * @bridge: the candidate PCI-PCI bridge
 740  *
 741  * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
 742  */
 743 static bool
 744 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
 745 {
 746         struct pci_dev *pdev, *pbridge;
 747 
 748         if (!dev_is_pci(dev) || !dev_is_pci(bridge))
 749                 return false;
 750 
 751         pdev = to_pci_dev(dev);
 752         pbridge = to_pci_dev(bridge);
 753 
 754         if (pbridge->subordinate &&
 755             pbridge->subordinate->number <= pdev->bus->number &&
 756             pbridge->subordinate->busn_res.end >= pdev->bus->number)
 757                 return true;
 758 
 759         return false;
 760 }
 761 
 762 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
 763 {
 764         struct dmar_drhd_unit *drhd = NULL;
 765         struct intel_iommu *iommu;
 766         struct device *tmp;
 767         struct pci_dev *pdev = NULL;
 768         u16 segment = 0;
 769         int i;
 770 
 771         if (iommu_dummy(dev))
 772                 return NULL;
 773 
 774         if (dev_is_pci(dev)) {
 775                 struct pci_dev *pf_pdev;
 776 
 777                 pdev = to_pci_dev(dev);
 778 
 779 #ifdef CONFIG_X86
 780                 /* VMD child devices currently cannot be handled individually */
 781                 if (is_vmd(pdev->bus))
 782                         return NULL;
 783 #endif
 784 
 785                 /* VFs aren't listed in scope tables; we need to look up
 786                  * the PF instead to find the IOMMU. */
 787                 pf_pdev = pci_physfn(pdev);
 788                 dev = &pf_pdev->dev;
 789                 segment = pci_domain_nr(pdev->bus);
 790         } else if (has_acpi_companion(dev))
 791                 dev = &ACPI_COMPANION(dev)->dev;
 792 
 793         rcu_read_lock();
 794         for_each_active_iommu(iommu, drhd) {
 795                 if (pdev && segment != drhd->segment)
 796                         continue;
 797 
 798                 for_each_active_dev_scope(drhd->devices,
 799                                           drhd->devices_cnt, i, tmp) {
 800                         if (tmp == dev) {
 801                                 /* For a VF use its original BDF# not that of the PF
 802                                  * which we used for the IOMMU lookup. Strictly speaking
 803                                  * we could do this for all PCI devices; we only need to
 804                                  * get the BDF# from the scope table for ACPI matches. */
 805                                 if (pdev && pdev->is_virtfn)
 806                                         goto got_pdev;
 807 
 808                                 *bus = drhd->devices[i].bus;
 809                                 *devfn = drhd->devices[i].devfn;
 810                                 goto out;
 811                         }
 812 
 813                         if (is_downstream_to_pci_bridge(dev, tmp))
 814                                 goto got_pdev;
 815                 }
 816 
 817                 if (pdev && drhd->include_all) {
 818                 got_pdev:
 819                         *bus = pdev->bus->number;
 820                         *devfn = pdev->devfn;
 821                         goto out;
 822                 }
 823         }
 824         iommu = NULL;
 825  out:
 826         rcu_read_unlock();
 827 
 828         return iommu;
 829 }
 830 
 831 static void domain_flush_cache(struct dmar_domain *domain,
 832                                void *addr, int size)
 833 {
 834         if (!domain->iommu_coherency)
 835                 clflush_cache_range(addr, size);
 836 }
 837 
 838 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
 839 {
 840         struct context_entry *context;
 841         int ret = 0;
 842         unsigned long flags;
 843 
 844         spin_lock_irqsave(&iommu->lock, flags);
 845         context = iommu_context_addr(iommu, bus, devfn, 0);
 846         if (context)
 847                 ret = context_present(context);
 848         spin_unlock_irqrestore(&iommu->lock, flags);
 849         return ret;
 850 }
 851 
 852 static void free_context_table(struct intel_iommu *iommu)
 853 {
 854         int i;
 855         unsigned long flags;
 856         struct context_entry *context;
 857 
 858         spin_lock_irqsave(&iommu->lock, flags);
 859         if (!iommu->root_entry) {
 860                 goto out;
 861         }
 862         for (i = 0; i < ROOT_ENTRY_NR; i++) {
 863                 context = iommu_context_addr(iommu, i, 0, 0);
 864                 if (context)
 865                         free_pgtable_page(context);
 866 
 867                 if (!sm_supported(iommu))
 868                         continue;
 869 
 870                 context = iommu_context_addr(iommu, i, 0x80, 0);
 871                 if (context)
 872                         free_pgtable_page(context);
 873 
 874         }
 875         free_pgtable_page(iommu->root_entry);
 876         iommu->root_entry = NULL;
 877 out:
 878         spin_unlock_irqrestore(&iommu->lock, flags);
 879 }
 880 
 881 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 882                                       unsigned long pfn, int *target_level)
 883 {
 884         struct dma_pte *parent, *pte;
 885         int level = agaw_to_level(domain->agaw);
 886         int offset;
 887 
 888         BUG_ON(!domain->pgd);
 889 
 890         if (!domain_pfn_supported(domain, pfn))
 891                 /* Address beyond IOMMU's addressing capabilities. */
 892                 return NULL;
 893 
 894         parent = domain->pgd;
 895 
 896         while (1) {
 897                 void *tmp_page;
 898 
 899                 offset = pfn_level_offset(pfn, level);
 900                 pte = &parent[offset];
 901                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
 902                         break;
 903                 if (level == *target_level)
 904                         break;
 905 
 906                 if (!dma_pte_present(pte)) {
 907                         uint64_t pteval;
 908 
 909                         tmp_page = alloc_pgtable_page(domain->nid);
 910 
 911                         if (!tmp_page)
 912                                 return NULL;
 913 
 914                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
 915                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
 916                         if (cmpxchg64(&pte->val, 0ULL, pteval))
 917                                 /* Someone else set it while we were thinking; use theirs. */
 918                                 free_pgtable_page(tmp_page);
 919                         else
 920                                 domain_flush_cache(domain, pte, sizeof(*pte));
 921                 }
 922                 if (level == 1)
 923                         break;
 924 
 925                 parent = phys_to_virt(dma_pte_addr(pte));
 926                 level--;
 927         }
 928 
 929         if (!*target_level)
 930                 *target_level = level;
 931 
 932         return pte;
 933 }
 934 
 935 /* return address's pte at specific level */
 936 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
 937                                          unsigned long pfn,
 938                                          int level, int *large_page)
 939 {
 940         struct dma_pte *parent, *pte;
 941         int total = agaw_to_level(domain->agaw);
 942         int offset;
 943 
 944         parent = domain->pgd;
 945         while (level <= total) {
 946                 offset = pfn_level_offset(pfn, total);
 947                 pte = &parent[offset];
 948                 if (level == total)
 949                         return pte;
 950 
 951                 if (!dma_pte_present(pte)) {
 952                         *large_page = total;
 953                         break;
 954                 }
 955 
 956                 if (dma_pte_superpage(pte)) {
 957                         *large_page = total;
 958                         return pte;
 959                 }
 960 
 961                 parent = phys_to_virt(dma_pte_addr(pte));
 962                 total--;
 963         }
 964         return NULL;
 965 }
 966 
 967 /* clear last level pte, a tlb flush should be followed */
 968 static void dma_pte_clear_range(struct dmar_domain *domain,
 969                                 unsigned long start_pfn,
 970                                 unsigned long last_pfn)
 971 {
 972         unsigned int large_page;
 973         struct dma_pte *first_pte, *pte;
 974 
 975         BUG_ON(!domain_pfn_supported(domain, start_pfn));
 976         BUG_ON(!domain_pfn_supported(domain, last_pfn));
 977         BUG_ON(start_pfn > last_pfn);
 978 
 979         /* we don't need lock here; nobody else touches the iova range */
 980         do {
 981                 large_page = 1;
 982                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
 983                 if (!pte) {
 984                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
 985                         continue;
 986                 }
 987                 do {
 988                         dma_clear_pte(pte);
 989                         start_pfn += lvl_to_nr_pages(large_page);
 990                         pte++;
 991                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
 992 
 993                 domain_flush_cache(domain, first_pte,
 994                                    (void *)pte - (void *)first_pte);
 995 
 996         } while (start_pfn && start_pfn <= last_pfn);
 997 }
 998 
 999 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1000                                int retain_level, struct dma_pte *pte,
1001                                unsigned long pfn, unsigned long start_pfn,
1002                                unsigned long last_pfn)
1003 {
1004         pfn = max(start_pfn, pfn);
1005         pte = &pte[pfn_level_offset(pfn, level)];
1006 
1007         do {
1008                 unsigned long level_pfn;
1009                 struct dma_pte *level_pte;
1010 
1011                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1012                         goto next;
1013 
1014                 level_pfn = pfn & level_mask(level);
1015                 level_pte = phys_to_virt(dma_pte_addr(pte));
1016 
1017                 if (level > 2) {
1018                         dma_pte_free_level(domain, level - 1, retain_level,
1019                                            level_pte, level_pfn, start_pfn,
1020                                            last_pfn);
1021                 }
1022 
1023                 /*
1024                  * Free the page table if we're below the level we want to
1025                  * retain and the range covers the entire table.
1026                  */
1027                 if (level < retain_level && !(start_pfn > level_pfn ||
1028                       last_pfn < level_pfn + level_size(level) - 1)) {
1029                         dma_clear_pte(pte);
1030                         domain_flush_cache(domain, pte, sizeof(*pte));
1031                         free_pgtable_page(level_pte);
1032                 }
1033 next:
1034                 pfn += level_size(level);
1035         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1036 }
1037 
1038 /*
1039  * clear last level (leaf) ptes and free page table pages below the
1040  * level we wish to keep intact.
1041  */
1042 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1043                                    unsigned long start_pfn,
1044                                    unsigned long last_pfn,
1045                                    int retain_level)
1046 {
1047         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1048         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1049         BUG_ON(start_pfn > last_pfn);
1050 
1051         dma_pte_clear_range(domain, start_pfn, last_pfn);
1052 
1053         /* We don't need lock here; nobody else touches the iova range */
1054         dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1055                            domain->pgd, 0, start_pfn, last_pfn);
1056 
1057         /* free pgd */
1058         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1059                 free_pgtable_page(domain->pgd);
1060                 domain->pgd = NULL;
1061         }
1062 }
1063 
1064 /* When a page at a given level is being unlinked from its parent, we don't
1065    need to *modify* it at all. All we need to do is make a list of all the
1066    pages which can be freed just as soon as we've flushed the IOTLB and we
1067    know the hardware page-walk will no longer touch them.
1068    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1069    be freed. */
1070 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1071                                             int level, struct dma_pte *pte,
1072                                             struct page *freelist)
1073 {
1074         struct page *pg;
1075 
1076         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1077         pg->freelist = freelist;
1078         freelist = pg;
1079 
1080         if (level == 1)
1081                 return freelist;
1082 
1083         pte = page_address(pg);
1084         do {
1085                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1086                         freelist = dma_pte_list_pagetables(domain, level - 1,
1087                                                            pte, freelist);
1088                 pte++;
1089         } while (!first_pte_in_page(pte));
1090 
1091         return freelist;
1092 }
1093 
1094 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1095                                         struct dma_pte *pte, unsigned long pfn,
1096                                         unsigned long start_pfn,
1097                                         unsigned long last_pfn,
1098                                         struct page *freelist)
1099 {
1100         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1101 
1102         pfn = max(start_pfn, pfn);
1103         pte = &pte[pfn_level_offset(pfn, level)];
1104 
1105         do {
1106                 unsigned long level_pfn;
1107 
1108                 if (!dma_pte_present(pte))
1109                         goto next;
1110 
1111                 level_pfn = pfn & level_mask(level);
1112 
1113                 /* If range covers entire pagetable, free it */
1114                 if (start_pfn <= level_pfn &&
1115                     last_pfn >= level_pfn + level_size(level) - 1) {
1116                         /* These suborbinate page tables are going away entirely. Don't
1117                            bother to clear them; we're just going to *free* them. */
1118                         if (level > 1 && !dma_pte_superpage(pte))
1119                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1120 
1121                         dma_clear_pte(pte);
1122                         if (!first_pte)
1123                                 first_pte = pte;
1124                         last_pte = pte;
1125                 } else if (level > 1) {
1126                         /* Recurse down into a level that isn't *entirely* obsolete */
1127                         freelist = dma_pte_clear_level(domain, level - 1,
1128                                                        phys_to_virt(dma_pte_addr(pte)),
1129                                                        level_pfn, start_pfn, last_pfn,
1130                                                        freelist);
1131                 }
1132 next:
1133                 pfn += level_size(level);
1134         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1135 
1136         if (first_pte)
1137                 domain_flush_cache(domain, first_pte,
1138                                    (void *)++last_pte - (void *)first_pte);
1139 
1140         return freelist;
1141 }
1142 
1143 /* We can't just free the pages because the IOMMU may still be walking
1144    the page tables, and may have cached the intermediate levels. The
1145    pages can only be freed after the IOTLB flush has been done. */
1146 static struct page *domain_unmap(struct dmar_domain *domain,
1147                                  unsigned long start_pfn,
1148                                  unsigned long last_pfn)
1149 {
1150         struct page *freelist;
1151 
1152         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1153         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1154         BUG_ON(start_pfn > last_pfn);
1155 
1156         /* we don't need lock here; nobody else touches the iova range */
1157         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1158                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1159 
1160         /* free pgd */
1161         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1162                 struct page *pgd_page = virt_to_page(domain->pgd);
1163                 pgd_page->freelist = freelist;
1164                 freelist = pgd_page;
1165 
1166                 domain->pgd = NULL;
1167         }
1168 
1169         return freelist;
1170 }
1171 
1172 static void dma_free_pagelist(struct page *freelist)
1173 {
1174         struct page *pg;
1175 
1176         while ((pg = freelist)) {
1177                 freelist = pg->freelist;
1178                 free_pgtable_page(page_address(pg));
1179         }
1180 }
1181 
1182 static void iova_entry_free(unsigned long data)
1183 {
1184         struct page *freelist = (struct page *)data;
1185 
1186         dma_free_pagelist(freelist);
1187 }
1188 
1189 /* iommu handling */
1190 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1191 {
1192         struct root_entry *root;
1193         unsigned long flags;
1194 
1195         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1196         if (!root) {
1197                 pr_err("Allocating root entry for %s failed\n",
1198                         iommu->name);
1199                 return -ENOMEM;
1200         }
1201 
1202         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1203 
1204         spin_lock_irqsave(&iommu->lock, flags);
1205         iommu->root_entry = root;
1206         spin_unlock_irqrestore(&iommu->lock, flags);
1207 
1208         return 0;
1209 }
1210 
1211 static void iommu_set_root_entry(struct intel_iommu *iommu)
1212 {
1213         u64 addr;
1214         u32 sts;
1215         unsigned long flag;
1216 
1217         addr = virt_to_phys(iommu->root_entry);
1218         if (sm_supported(iommu))
1219                 addr |= DMA_RTADDR_SMT;
1220 
1221         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1222         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1223 
1224         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1225 
1226         /* Make sure hardware complete it */
1227         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1228                       readl, (sts & DMA_GSTS_RTPS), sts);
1229 
1230         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1231 }
1232 
1233 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1234 {
1235         u32 val;
1236         unsigned long flag;
1237 
1238         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1239                 return;
1240 
1241         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1242         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1243 
1244         /* Make sure hardware complete it */
1245         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1246                       readl, (!(val & DMA_GSTS_WBFS)), val);
1247 
1248         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1249 }
1250 
1251 /* return value determine if we need a write buffer flush */
1252 static void __iommu_flush_context(struct intel_iommu *iommu,
1253                                   u16 did, u16 source_id, u8 function_mask,
1254                                   u64 type)
1255 {
1256         u64 val = 0;
1257         unsigned long flag;
1258 
1259         switch (type) {
1260         case DMA_CCMD_GLOBAL_INVL:
1261                 val = DMA_CCMD_GLOBAL_INVL;
1262                 break;
1263         case DMA_CCMD_DOMAIN_INVL:
1264                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1265                 break;
1266         case DMA_CCMD_DEVICE_INVL:
1267                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1268                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1269                 break;
1270         default:
1271                 BUG();
1272         }
1273         val |= DMA_CCMD_ICC;
1274 
1275         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1276         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1277 
1278         /* Make sure hardware complete it */
1279         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1280                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1281 
1282         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1283 }
1284 
1285 /* return value determine if we need a write buffer flush */
1286 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1287                                 u64 addr, unsigned int size_order, u64 type)
1288 {
1289         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1290         u64 val = 0, val_iva = 0;
1291         unsigned long flag;
1292 
1293         switch (type) {
1294         case DMA_TLB_GLOBAL_FLUSH:
1295                 /* global flush doesn't need set IVA_REG */
1296                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1297                 break;
1298         case DMA_TLB_DSI_FLUSH:
1299                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1300                 break;
1301         case DMA_TLB_PSI_FLUSH:
1302                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1303                 /* IH bit is passed in as part of address */
1304                 val_iva = size_order | addr;
1305                 break;
1306         default:
1307                 BUG();
1308         }
1309         /* Note: set drain read/write */
1310 #if 0
1311         /*
1312          * This is probably to be super secure.. Looks like we can
1313          * ignore it without any impact.
1314          */
1315         if (cap_read_drain(iommu->cap))
1316                 val |= DMA_TLB_READ_DRAIN;
1317 #endif
1318         if (cap_write_drain(iommu->cap))
1319                 val |= DMA_TLB_WRITE_DRAIN;
1320 
1321         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1322         /* Note: Only uses first TLB reg currently */
1323         if (val_iva)
1324                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1325         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1326 
1327         /* Make sure hardware complete it */
1328         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1329                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1330 
1331         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1332 
1333         /* check IOTLB invalidation granularity */
1334         if (DMA_TLB_IAIG(val) == 0)
1335                 pr_err("Flush IOTLB failed\n");
1336         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1337                 pr_debug("TLB flush request %Lx, actual %Lx\n",
1338                         (unsigned long long)DMA_TLB_IIRG(type),
1339                         (unsigned long long)DMA_TLB_IAIG(val));
1340 }
1341 
1342 static struct device_domain_info *
1343 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1344                          u8 bus, u8 devfn)
1345 {
1346         struct device_domain_info *info;
1347 
1348         assert_spin_locked(&device_domain_lock);
1349 
1350         if (!iommu->qi)
1351                 return NULL;
1352 
1353         list_for_each_entry(info, &domain->devices, link)
1354                 if (info->iommu == iommu && info->bus == bus &&
1355                     info->devfn == devfn) {
1356                         if (info->ats_supported && info->dev)
1357                                 return info;
1358                         break;
1359                 }
1360 
1361         return NULL;
1362 }
1363 
1364 static void domain_update_iotlb(struct dmar_domain *domain)
1365 {
1366         struct device_domain_info *info;
1367         bool has_iotlb_device = false;
1368 
1369         assert_spin_locked(&device_domain_lock);
1370 
1371         list_for_each_entry(info, &domain->devices, link) {
1372                 struct pci_dev *pdev;
1373 
1374                 if (!info->dev || !dev_is_pci(info->dev))
1375                         continue;
1376 
1377                 pdev = to_pci_dev(info->dev);
1378                 if (pdev->ats_enabled) {
1379                         has_iotlb_device = true;
1380                         break;
1381                 }
1382         }
1383 
1384         domain->has_iotlb_device = has_iotlb_device;
1385 }
1386 
1387 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1388 {
1389         struct pci_dev *pdev;
1390 
1391         assert_spin_locked(&device_domain_lock);
1392 
1393         if (!info || !dev_is_pci(info->dev))
1394                 return;
1395 
1396         pdev = to_pci_dev(info->dev);
1397         /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1398          * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1399          * queue depth at PF level. If DIT is not set, PFSID will be treated as
1400          * reserved, which should be set to 0.
1401          */
1402         if (!ecap_dit(info->iommu->ecap))
1403                 info->pfsid = 0;
1404         else {
1405                 struct pci_dev *pf_pdev;
1406 
1407                 /* pdev will be returned if device is not a vf */
1408                 pf_pdev = pci_physfn(pdev);
1409                 info->pfsid = pci_dev_id(pf_pdev);
1410         }
1411 
1412 #ifdef CONFIG_INTEL_IOMMU_SVM
1413         /* The PCIe spec, in its wisdom, declares that the behaviour of
1414            the device if you enable PASID support after ATS support is
1415            undefined. So always enable PASID support on devices which
1416            have it, even if we can't yet know if we're ever going to
1417            use it. */
1418         if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1419                 info->pasid_enabled = 1;
1420 
1421         if (info->pri_supported &&
1422             (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1)  &&
1423             !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1424                 info->pri_enabled = 1;
1425 #endif
1426         if (!pdev->untrusted && info->ats_supported &&
1427             pci_ats_page_aligned(pdev) &&
1428             !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1429                 info->ats_enabled = 1;
1430                 domain_update_iotlb(info->domain);
1431                 info->ats_qdep = pci_ats_queue_depth(pdev);
1432         }
1433 }
1434 
1435 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1436 {
1437         struct pci_dev *pdev;
1438 
1439         assert_spin_locked(&device_domain_lock);
1440 
1441         if (!dev_is_pci(info->dev))
1442                 return;
1443 
1444         pdev = to_pci_dev(info->dev);
1445 
1446         if (info->ats_enabled) {
1447                 pci_disable_ats(pdev);
1448                 info->ats_enabled = 0;
1449                 domain_update_iotlb(info->domain);
1450         }
1451 #ifdef CONFIG_INTEL_IOMMU_SVM
1452         if (info->pri_enabled) {
1453                 pci_disable_pri(pdev);
1454                 info->pri_enabled = 0;
1455         }
1456         if (info->pasid_enabled) {
1457                 pci_disable_pasid(pdev);
1458                 info->pasid_enabled = 0;
1459         }
1460 #endif
1461 }
1462 
1463 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1464                                   u64 addr, unsigned mask)
1465 {
1466         u16 sid, qdep;
1467         unsigned long flags;
1468         struct device_domain_info *info;
1469 
1470         if (!domain->has_iotlb_device)
1471                 return;
1472 
1473         spin_lock_irqsave(&device_domain_lock, flags);
1474         list_for_each_entry(info, &domain->devices, link) {
1475                 if (!info->ats_enabled)
1476                         continue;
1477 
1478                 sid = info->bus << 8 | info->devfn;
1479                 qdep = info->ats_qdep;
1480                 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1481                                 qdep, addr, mask);
1482         }
1483         spin_unlock_irqrestore(&device_domain_lock, flags);
1484 }
1485 
1486 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1487                                   struct dmar_domain *domain,
1488                                   unsigned long pfn, unsigned int pages,
1489                                   int ih, int map)
1490 {
1491         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1492         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1493         u16 did = domain->iommu_did[iommu->seq_id];
1494 
1495         BUG_ON(pages == 0);
1496 
1497         if (ih)
1498                 ih = 1 << 6;
1499         /*
1500          * Fallback to domain selective flush if no PSI support or the size is
1501          * too big.
1502          * PSI requires page size to be 2 ^ x, and the base address is naturally
1503          * aligned to the size
1504          */
1505         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1506                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1507                                                 DMA_TLB_DSI_FLUSH);
1508         else
1509                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1510                                                 DMA_TLB_PSI_FLUSH);
1511 
1512         /*
1513          * In caching mode, changes of pages from non-present to present require
1514          * flush. However, device IOTLB doesn't need to be flushed in this case.
1515          */
1516         if (!cap_caching_mode(iommu->cap) || !map)
1517                 iommu_flush_dev_iotlb(domain, addr, mask);
1518 }
1519 
1520 /* Notification for newly created mappings */
1521 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1522                                         struct dmar_domain *domain,
1523                                         unsigned long pfn, unsigned int pages)
1524 {
1525         /* It's a non-present to present mapping. Only flush if caching mode */
1526         if (cap_caching_mode(iommu->cap))
1527                 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1528         else
1529                 iommu_flush_write_buffer(iommu);
1530 }
1531 
1532 static void iommu_flush_iova(struct iova_domain *iovad)
1533 {
1534         struct dmar_domain *domain;
1535         int idx;
1536 
1537         domain = container_of(iovad, struct dmar_domain, iovad);
1538 
1539         for_each_domain_iommu(idx, domain) {
1540                 struct intel_iommu *iommu = g_iommus[idx];
1541                 u16 did = domain->iommu_did[iommu->seq_id];
1542 
1543                 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1544 
1545                 if (!cap_caching_mode(iommu->cap))
1546                         iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1547                                               0, MAX_AGAW_PFN_WIDTH);
1548         }
1549 }
1550 
1551 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1552 {
1553         u32 pmen;
1554         unsigned long flags;
1555 
1556         if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1557                 return;
1558 
1559         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1560         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1561         pmen &= ~DMA_PMEN_EPM;
1562         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1563 
1564         /* wait for the protected region status bit to clear */
1565         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1566                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1567 
1568         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1569 }
1570 
1571 static void iommu_enable_translation(struct intel_iommu *iommu)
1572 {
1573         u32 sts;
1574         unsigned long flags;
1575 
1576         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1577         iommu->gcmd |= DMA_GCMD_TE;
1578         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1579 
1580         /* Make sure hardware complete it */
1581         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1582                       readl, (sts & DMA_GSTS_TES), sts);
1583 
1584         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1585 }
1586 
1587 static void iommu_disable_translation(struct intel_iommu *iommu)
1588 {
1589         u32 sts;
1590         unsigned long flag;
1591 
1592         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1593         iommu->gcmd &= ~DMA_GCMD_TE;
1594         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1595 
1596         /* Make sure hardware complete it */
1597         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1598                       readl, (!(sts & DMA_GSTS_TES)), sts);
1599 
1600         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1601 }
1602 
1603 static int iommu_init_domains(struct intel_iommu *iommu)
1604 {
1605         u32 ndomains, nlongs;
1606         size_t size;
1607 
1608         ndomains = cap_ndoms(iommu->cap);
1609         pr_debug("%s: Number of Domains supported <%d>\n",
1610                  iommu->name, ndomains);
1611         nlongs = BITS_TO_LONGS(ndomains);
1612 
1613         spin_lock_init(&iommu->lock);
1614 
1615         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1616         if (!iommu->domain_ids) {
1617                 pr_err("%s: Allocating domain id array failed\n",
1618                        iommu->name);
1619                 return -ENOMEM;
1620         }
1621 
1622         size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1623         iommu->domains = kzalloc(size, GFP_KERNEL);
1624 
1625         if (iommu->domains) {
1626                 size = 256 * sizeof(struct dmar_domain *);
1627                 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1628         }
1629 
1630         if (!iommu->domains || !iommu->domains[0]) {
1631                 pr_err("%s: Allocating domain array failed\n",
1632                        iommu->name);
1633                 kfree(iommu->domain_ids);
1634                 kfree(iommu->domains);
1635                 iommu->domain_ids = NULL;
1636                 iommu->domains    = NULL;
1637                 return -ENOMEM;
1638         }
1639 
1640         /*
1641          * If Caching mode is set, then invalid translations are tagged
1642          * with domain-id 0, hence we need to pre-allocate it. We also
1643          * use domain-id 0 as a marker for non-allocated domain-id, so
1644          * make sure it is not used for a real domain.
1645          */
1646         set_bit(0, iommu->domain_ids);
1647 
1648         /*
1649          * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1650          * entry for first-level or pass-through translation modes should
1651          * be programmed with a domain id different from those used for
1652          * second-level or nested translation. We reserve a domain id for
1653          * this purpose.
1654          */
1655         if (sm_supported(iommu))
1656                 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1657 
1658         return 0;
1659 }
1660 
1661 static void disable_dmar_iommu(struct intel_iommu *iommu)
1662 {
1663         struct device_domain_info *info, *tmp;
1664         unsigned long flags;
1665 
1666         if (!iommu->domains || !iommu->domain_ids)
1667                 return;
1668 
1669         spin_lock_irqsave(&device_domain_lock, flags);
1670         list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1671                 if (info->iommu != iommu)
1672                         continue;
1673 
1674                 if (!info->dev || !info->domain)
1675                         continue;
1676 
1677                 __dmar_remove_one_dev_info(info);
1678         }
1679         spin_unlock_irqrestore(&device_domain_lock, flags);
1680 
1681         if (iommu->gcmd & DMA_GCMD_TE)
1682                 iommu_disable_translation(iommu);
1683 }
1684 
1685 static void free_dmar_iommu(struct intel_iommu *iommu)
1686 {
1687         if ((iommu->domains) && (iommu->domain_ids)) {
1688                 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1689                 int i;
1690 
1691                 for (i = 0; i < elems; i++)
1692                         kfree(iommu->domains[i]);
1693                 kfree(iommu->domains);
1694                 kfree(iommu->domain_ids);
1695                 iommu->domains = NULL;
1696                 iommu->domain_ids = NULL;
1697         }
1698 
1699         g_iommus[iommu->seq_id] = NULL;
1700 
1701         /* free context mapping */
1702         free_context_table(iommu);
1703 
1704 #ifdef CONFIG_INTEL_IOMMU_SVM
1705         if (pasid_supported(iommu)) {
1706                 if (ecap_prs(iommu->ecap))
1707                         intel_svm_finish_prq(iommu);
1708         }
1709 #endif
1710 }
1711 
1712 static struct dmar_domain *alloc_domain(int flags)
1713 {
1714         struct dmar_domain *domain;
1715 
1716         domain = alloc_domain_mem();
1717         if (!domain)
1718                 return NULL;
1719 
1720         memset(domain, 0, sizeof(*domain));
1721         domain->nid = NUMA_NO_NODE;
1722         domain->flags = flags;
1723         domain->has_iotlb_device = false;
1724         INIT_LIST_HEAD(&domain->devices);
1725 
1726         return domain;
1727 }
1728 
1729 /* Must be called with iommu->lock */
1730 static int domain_attach_iommu(struct dmar_domain *domain,
1731                                struct intel_iommu *iommu)
1732 {
1733         unsigned long ndomains;
1734         int num;
1735 
1736         assert_spin_locked(&device_domain_lock);
1737         assert_spin_locked(&iommu->lock);
1738 
1739         domain->iommu_refcnt[iommu->seq_id] += 1;
1740         domain->iommu_count += 1;
1741         if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1742                 ndomains = cap_ndoms(iommu->cap);
1743                 num      = find_first_zero_bit(iommu->domain_ids, ndomains);
1744 
1745                 if (num >= ndomains) {
1746                         pr_err("%s: No free domain ids\n", iommu->name);
1747                         domain->iommu_refcnt[iommu->seq_id] -= 1;
1748                         domain->iommu_count -= 1;
1749                         return -ENOSPC;
1750                 }
1751 
1752                 set_bit(num, iommu->domain_ids);
1753                 set_iommu_domain(iommu, num, domain);
1754 
1755                 domain->iommu_did[iommu->seq_id] = num;
1756                 domain->nid                      = iommu->node;
1757 
1758                 domain_update_iommu_cap(domain);
1759         }
1760 
1761         return 0;
1762 }
1763 
1764 static int domain_detach_iommu(struct dmar_domain *domain,
1765                                struct intel_iommu *iommu)
1766 {
1767         int num, count;
1768 
1769         assert_spin_locked(&device_domain_lock);
1770         assert_spin_locked(&iommu->lock);
1771 
1772         domain->iommu_refcnt[iommu->seq_id] -= 1;
1773         count = --domain->iommu_count;
1774         if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1775                 num = domain->iommu_did[iommu->seq_id];
1776                 clear_bit(num, iommu->domain_ids);
1777                 set_iommu_domain(iommu, num, NULL);
1778 
1779                 domain_update_iommu_cap(domain);
1780                 domain->iommu_did[iommu->seq_id] = 0;
1781         }
1782 
1783         return count;
1784 }
1785 
1786 static struct iova_domain reserved_iova_list;
1787 static struct lock_class_key reserved_rbtree_key;
1788 
1789 static int dmar_init_reserved_ranges(void)
1790 {
1791         struct pci_dev *pdev = NULL;
1792         struct iova *iova;
1793         int i;
1794 
1795         init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1796 
1797         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1798                 &reserved_rbtree_key);
1799 
1800         /* IOAPIC ranges shouldn't be accessed by DMA */
1801         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1802                 IOVA_PFN(IOAPIC_RANGE_END));
1803         if (!iova) {
1804                 pr_err("Reserve IOAPIC range failed\n");
1805                 return -ENODEV;
1806         }
1807 
1808         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1809         for_each_pci_dev(pdev) {
1810                 struct resource *r;
1811 
1812                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1813                         r = &pdev->resource[i];
1814                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1815                                 continue;
1816                         iova = reserve_iova(&reserved_iova_list,
1817                                             IOVA_PFN(r->start),
1818                                             IOVA_PFN(r->end));
1819                         if (!iova) {
1820                                 pci_err(pdev, "Reserve iova for %pR failed\n", r);
1821                                 return -ENODEV;
1822                         }
1823                 }
1824         }
1825         return 0;
1826 }
1827 
1828 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1829 {
1830         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1831 }
1832 
1833 static inline int guestwidth_to_adjustwidth(int gaw)
1834 {
1835         int agaw;
1836         int r = (gaw - 12) % 9;
1837 
1838         if (r == 0)
1839                 agaw = gaw;
1840         else
1841                 agaw = gaw + 9 - r;
1842         if (agaw > 64)
1843                 agaw = 64;
1844         return agaw;
1845 }
1846 
1847 static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1848                        int guest_width)
1849 {
1850         int adjust_width, agaw;
1851         unsigned long sagaw;
1852         int err;
1853 
1854         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1855 
1856         err = init_iova_flush_queue(&domain->iovad,
1857                                     iommu_flush_iova, iova_entry_free);
1858         if (err)
1859                 return err;
1860 
1861         domain_reserve_special_ranges(domain);
1862 
1863         /* calculate AGAW */
1864         if (guest_width > cap_mgaw(iommu->cap))
1865                 guest_width = cap_mgaw(iommu->cap);
1866         domain->gaw = guest_width;
1867         adjust_width = guestwidth_to_adjustwidth(guest_width);
1868         agaw = width_to_agaw(adjust_width);
1869         sagaw = cap_sagaw(iommu->cap);
1870         if (!test_bit(agaw, &sagaw)) {
1871                 /* hardware doesn't support it, choose a bigger one */
1872                 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1873                 agaw = find_next_bit(&sagaw, 5, agaw);
1874                 if (agaw >= 5)
1875                         return -ENODEV;
1876         }
1877         domain->agaw = agaw;
1878 
1879         if (ecap_coherent(iommu->ecap))
1880                 domain->iommu_coherency = 1;
1881         else
1882                 domain->iommu_coherency = 0;
1883 
1884         if (ecap_sc_support(iommu->ecap))
1885                 domain->iommu_snooping = 1;
1886         else
1887                 domain->iommu_snooping = 0;
1888 
1889         if (intel_iommu_superpage)
1890                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1891         else
1892                 domain->iommu_superpage = 0;
1893 
1894         domain->nid = iommu->node;
1895 
1896         /* always allocate the top pgd */
1897         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1898         if (!domain->pgd)
1899                 return -ENOMEM;
1900         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1901         return 0;
1902 }
1903 
1904 static void domain_exit(struct dmar_domain *domain)
1905 {
1906 
1907         /* Remove associated devices and clear attached or cached domains */
1908         domain_remove_dev_info(domain);
1909 
1910         /* destroy iovas */
1911         put_iova_domain(&domain->iovad);
1912 
1913         if (domain->pgd) {
1914                 struct page *freelist;
1915 
1916                 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1917                 dma_free_pagelist(freelist);
1918         }
1919 
1920         free_domain_mem(domain);
1921 }
1922 
1923 /*
1924  * Get the PASID directory size for scalable mode context entry.
1925  * Value of X in the PDTS field of a scalable mode context entry
1926  * indicates PASID directory with 2^(X + 7) entries.
1927  */
1928 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1929 {
1930         int pds, max_pde;
1931 
1932         max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1933         pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1934         if (pds < 7)
1935                 return 0;
1936 
1937         return pds - 7;
1938 }
1939 
1940 /*
1941  * Set the RID_PASID field of a scalable mode context entry. The
1942  * IOMMU hardware will use the PASID value set in this field for
1943  * DMA translations of DMA requests without PASID.
1944  */
1945 static inline void
1946 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1947 {
1948         context->hi |= pasid & ((1 << 20) - 1);
1949         context->hi |= (1 << 20);
1950 }
1951 
1952 /*
1953  * Set the DTE(Device-TLB Enable) field of a scalable mode context
1954  * entry.
1955  */
1956 static inline void context_set_sm_dte(struct context_entry *context)
1957 {
1958         context->lo |= (1 << 2);
1959 }
1960 
1961 /*
1962  * Set the PRE(Page Request Enable) field of a scalable mode context
1963  * entry.
1964  */
1965 static inline void context_set_sm_pre(struct context_entry *context)
1966 {
1967         context->lo |= (1 << 4);
1968 }
1969 
1970 /* Convert value to context PASID directory size field coding. */
1971 #define context_pdts(pds)       (((pds) & 0x7) << 9)
1972 
1973 static int domain_context_mapping_one(struct dmar_domain *domain,
1974                                       struct intel_iommu *iommu,
1975                                       struct pasid_table *table,
1976                                       u8 bus, u8 devfn)
1977 {
1978         u16 did = domain->iommu_did[iommu->seq_id];
1979         int translation = CONTEXT_TT_MULTI_LEVEL;
1980         struct device_domain_info *info = NULL;
1981         struct context_entry *context;
1982         unsigned long flags;
1983         int ret;
1984 
1985         WARN_ON(did == 0);
1986 
1987         if (hw_pass_through && domain_type_is_si(domain))
1988                 translation = CONTEXT_TT_PASS_THROUGH;
1989 
1990         pr_debug("Set context mapping for %02x:%02x.%d\n",
1991                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1992 
1993         BUG_ON(!domain->pgd);
1994 
1995         spin_lock_irqsave(&device_domain_lock, flags);
1996         spin_lock(&iommu->lock);
1997 
1998         ret = -ENOMEM;
1999         context = iommu_context_addr(iommu, bus, devfn, 1);
2000         if (!context)
2001                 goto out_unlock;
2002 
2003         ret = 0;
2004         if (context_present(context))
2005                 goto out_unlock;
2006 
2007         /*
2008          * For kdump cases, old valid entries may be cached due to the
2009          * in-flight DMA and copied pgtable, but there is no unmapping
2010          * behaviour for them, thus we need an explicit cache flush for
2011          * the newly-mapped device. For kdump, at this point, the device
2012          * is supposed to finish reset at its driver probe stage, so no
2013          * in-flight DMA will exist, and we don't need to worry anymore
2014          * hereafter.
2015          */
2016         if (context_copied(context)) {
2017                 u16 did_old = context_domain_id(context);
2018 
2019                 if (did_old < cap_ndoms(iommu->cap)) {
2020                         iommu->flush.flush_context(iommu, did_old,
2021                                                    (((u16)bus) << 8) | devfn,
2022                                                    DMA_CCMD_MASK_NOBIT,
2023                                                    DMA_CCMD_DEVICE_INVL);
2024                         iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2025                                                  DMA_TLB_DSI_FLUSH);
2026                 }
2027         }
2028 
2029         context_clear_entry(context);
2030 
2031         if (sm_supported(iommu)) {
2032                 unsigned long pds;
2033 
2034                 WARN_ON(!table);
2035 
2036                 /* Setup the PASID DIR pointer: */
2037                 pds = context_get_sm_pds(table);
2038                 context->lo = (u64)virt_to_phys(table->table) |
2039                                 context_pdts(pds);
2040 
2041                 /* Setup the RID_PASID field: */
2042                 context_set_sm_rid2pasid(context, PASID_RID2PASID);
2043 
2044                 /*
2045                  * Setup the Device-TLB enable bit and Page request
2046                  * Enable bit:
2047                  */
2048                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2049                 if (info && info->ats_supported)
2050                         context_set_sm_dte(context);
2051                 if (info && info->pri_supported)
2052                         context_set_sm_pre(context);
2053         } else {
2054                 struct dma_pte *pgd = domain->pgd;
2055                 int agaw;
2056 
2057                 context_set_domain_id(context, did);
2058 
2059                 if (translation != CONTEXT_TT_PASS_THROUGH) {
2060                         /*
2061                          * Skip top levels of page tables for iommu which has
2062                          * less agaw than default. Unnecessary for PT mode.
2063                          */
2064                         for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2065                                 ret = -ENOMEM;
2066                                 pgd = phys_to_virt(dma_pte_addr(pgd));
2067                                 if (!dma_pte_present(pgd))
2068                                         goto out_unlock;
2069                         }
2070 
2071                         info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2072                         if (info && info->ats_supported)
2073                                 translation = CONTEXT_TT_DEV_IOTLB;
2074                         else
2075                                 translation = CONTEXT_TT_MULTI_LEVEL;
2076 
2077                         context_set_address_root(context, virt_to_phys(pgd));
2078                         context_set_address_width(context, agaw);
2079                 } else {
2080                         /*
2081                          * In pass through mode, AW must be programmed to
2082                          * indicate the largest AGAW value supported by
2083                          * hardware. And ASR is ignored by hardware.
2084                          */
2085                         context_set_address_width(context, iommu->msagaw);
2086                 }
2087 
2088                 context_set_translation_type(context, translation);
2089         }
2090 
2091         context_set_fault_enable(context);
2092         context_set_present(context);
2093         domain_flush_cache(domain, context, sizeof(*context));
2094 
2095         /*
2096          * It's a non-present to present mapping. If hardware doesn't cache
2097          * non-present entry we only need to flush the write-buffer. If the
2098          * _does_ cache non-present entries, then it does so in the special
2099          * domain #0, which we have to flush:
2100          */
2101         if (cap_caching_mode(iommu->cap)) {
2102                 iommu->flush.flush_context(iommu, 0,
2103                                            (((u16)bus) << 8) | devfn,
2104                                            DMA_CCMD_MASK_NOBIT,
2105                                            DMA_CCMD_DEVICE_INVL);
2106                 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2107         } else {
2108                 iommu_flush_write_buffer(iommu);
2109         }
2110         iommu_enable_dev_iotlb(info);
2111 
2112         ret = 0;
2113 
2114 out_unlock:
2115         spin_unlock(&iommu->lock);
2116         spin_unlock_irqrestore(&device_domain_lock, flags);
2117 
2118         return ret;
2119 }
2120 
2121 struct domain_context_mapping_data {
2122         struct dmar_domain *domain;
2123         struct intel_iommu *iommu;
2124         struct pasid_table *table;
2125 };
2126 
2127 static int domain_context_mapping_cb(struct pci_dev *pdev,
2128                                      u16 alias, void *opaque)
2129 {
2130         struct domain_context_mapping_data *data = opaque;
2131 
2132         return domain_context_mapping_one(data->domain, data->iommu,
2133                                           data->table, PCI_BUS_NUM(alias),
2134                                           alias & 0xff);
2135 }
2136 
2137 static int
2138 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2139 {
2140         struct domain_context_mapping_data data;
2141         struct pasid_table *table;
2142         struct intel_iommu *iommu;
2143         u8 bus, devfn;
2144 
2145         iommu = device_to_iommu(dev, &bus, &devfn);
2146         if (!iommu)
2147                 return -ENODEV;
2148 
2149         table = intel_pasid_get_table(dev);
2150 
2151         if (!dev_is_pci(dev))
2152                 return domain_context_mapping_one(domain, iommu, table,
2153                                                   bus, devfn);
2154 
2155         data.domain = domain;
2156         data.iommu = iommu;
2157         data.table = table;
2158 
2159         return pci_for_each_dma_alias(to_pci_dev(dev),
2160                                       &domain_context_mapping_cb, &data);
2161 }
2162 
2163 static int domain_context_mapped_cb(struct pci_dev *pdev,
2164                                     u16 alias, void *opaque)
2165 {
2166         struct intel_iommu *iommu = opaque;
2167 
2168         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2169 }
2170 
2171 static int domain_context_mapped(struct device *dev)
2172 {
2173         struct intel_iommu *iommu;
2174         u8 bus, devfn;
2175 
2176         iommu = device_to_iommu(dev, &bus, &devfn);
2177         if (!iommu)
2178                 return -ENODEV;
2179 
2180         if (!dev_is_pci(dev))
2181                 return device_context_mapped(iommu, bus, devfn);
2182 
2183         return !pci_for_each_dma_alias(to_pci_dev(dev),
2184                                        domain_context_mapped_cb, iommu);
2185 }
2186 
2187 /* Returns a number of VTD pages, but aligned to MM page size */
2188 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2189                                             size_t size)
2190 {
2191         host_addr &= ~PAGE_MASK;
2192         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2193 }
2194 
2195 /* Return largest possible superpage level for a given mapping */
2196 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2197                                           unsigned long iov_pfn,
2198                                           unsigned long phy_pfn,
2199                                           unsigned long pages)
2200 {
2201         int support, level = 1;
2202         unsigned long pfnmerge;
2203 
2204         support = domain->iommu_superpage;
2205 
2206         /* To use a large page, the virtual *and* physical addresses
2207            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2208            of them will mean we have to use smaller pages. So just
2209            merge them and check both at once. */
2210         pfnmerge = iov_pfn | phy_pfn;
2211 
2212         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2213                 pages >>= VTD_STRIDE_SHIFT;
2214                 if (!pages)
2215                         break;
2216                 pfnmerge >>= VTD_STRIDE_SHIFT;
2217                 level++;
2218                 support--;
2219         }
2220         return level;
2221 }
2222 
2223 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2224                             struct scatterlist *sg, unsigned long phys_pfn,
2225                             unsigned long nr_pages, int prot)
2226 {
2227         struct dma_pte *first_pte = NULL, *pte = NULL;
2228         phys_addr_t uninitialized_var(pteval);
2229         unsigned long sg_res = 0;
2230         unsigned int largepage_lvl = 0;
2231         unsigned long lvl_pages = 0;
2232 
2233         BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2234 
2235         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2236                 return -EINVAL;
2237 
2238         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2239 
2240         if (!sg) {
2241                 sg_res = nr_pages;
2242                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2243         }
2244 
2245         while (nr_pages > 0) {
2246                 uint64_t tmp;
2247 
2248                 if (!sg_res) {
2249                         unsigned int pgoff = sg->offset & ~PAGE_MASK;
2250 
2251                         sg_res = aligned_nrpages(sg->offset, sg->length);
2252                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2253                         sg->dma_length = sg->length;
2254                         pteval = (sg_phys(sg) - pgoff) | prot;
2255                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
2256                 }
2257 
2258                 if (!pte) {
2259                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2260 
2261                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2262                         if (!pte)
2263                                 return -ENOMEM;
2264                         /* It is large page*/
2265                         if (largepage_lvl > 1) {
2266                                 unsigned long nr_superpages, end_pfn;
2267 
2268                                 pteval |= DMA_PTE_LARGE_PAGE;
2269                                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2270 
2271                                 nr_superpages = sg_res / lvl_pages;
2272                                 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2273 
2274                                 /*
2275                                  * Ensure that old small page tables are
2276                                  * removed to make room for superpage(s).
2277                                  * We're adding new large pages, so make sure
2278                                  * we don't remove their parent tables.
2279                                  */
2280                                 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2281                                                        largepage_lvl + 1);
2282                         } else {
2283                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2284                         }
2285 
2286                 }
2287                 /* We don't need lock here, nobody else
2288                  * touches the iova range
2289                  */
2290                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2291                 if (tmp) {
2292                         static int dumps = 5;
2293                         pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2294                                 iov_pfn, tmp, (unsigned long long)pteval);
2295                         if (dumps) {
2296                                 dumps--;
2297                                 debug_dma_dump_mappings(NULL);
2298                         }
2299                         WARN_ON(1);
2300                 }
2301 
2302                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2303 
2304                 BUG_ON(nr_pages < lvl_pages);
2305                 BUG_ON(sg_res < lvl_pages);
2306 
2307                 nr_pages -= lvl_pages;
2308                 iov_pfn += lvl_pages;
2309                 phys_pfn += lvl_pages;
2310                 pteval += lvl_pages * VTD_PAGE_SIZE;
2311                 sg_res -= lvl_pages;
2312 
2313                 /* If the next PTE would be the first in a new page, then we
2314                    need to flush the cache on the entries we've just written.
2315                    And then we'll need to recalculate 'pte', so clear it and
2316                    let it get set again in the if (!pte) block above.
2317 
2318                    If we're done (!nr_pages) we need to flush the cache too.
2319 
2320                    Also if we've been setting superpages, we may need to
2321                    recalculate 'pte' and switch back to smaller pages for the
2322                    end of the mapping, if the trailing size is not enough to
2323                    use another superpage (i.e. sg_res < lvl_pages). */
2324                 pte++;
2325                 if (!nr_pages || first_pte_in_page(pte) ||
2326                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2327                         domain_flush_cache(domain, first_pte,
2328                                            (void *)pte - (void *)first_pte);
2329                         pte = NULL;
2330                 }
2331 
2332                 if (!sg_res && nr_pages)
2333                         sg = sg_next(sg);
2334         }
2335         return 0;
2336 }
2337 
2338 static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2339                           struct scatterlist *sg, unsigned long phys_pfn,
2340                           unsigned long nr_pages, int prot)
2341 {
2342         int iommu_id, ret;
2343         struct intel_iommu *iommu;
2344 
2345         /* Do the real mapping first */
2346         ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2347         if (ret)
2348                 return ret;
2349 
2350         for_each_domain_iommu(iommu_id, domain) {
2351                 iommu = g_iommus[iommu_id];
2352                 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2353         }
2354 
2355         return 0;
2356 }
2357 
2358 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2359                                     struct scatterlist *sg, unsigned long nr_pages,
2360                                     int prot)
2361 {
2362         return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2363 }
2364 
2365 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2366                                      unsigned long phys_pfn, unsigned long nr_pages,
2367                                      int prot)
2368 {
2369         return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2370 }
2371 
2372 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2373 {
2374         unsigned long flags;
2375         struct context_entry *context;
2376         u16 did_old;
2377 
2378         if (!iommu)
2379                 return;
2380 
2381         spin_lock_irqsave(&iommu->lock, flags);
2382         context = iommu_context_addr(iommu, bus, devfn, 0);
2383         if (!context) {
2384                 spin_unlock_irqrestore(&iommu->lock, flags);
2385                 return;
2386         }
2387         did_old = context_domain_id(context);
2388         context_clear_entry(context);
2389         __iommu_flush_cache(iommu, context, sizeof(*context));
2390         spin_unlock_irqrestore(&iommu->lock, flags);
2391         iommu->flush.flush_context(iommu,
2392                                    did_old,
2393                                    (((u16)bus) << 8) | devfn,
2394                                    DMA_CCMD_MASK_NOBIT,
2395                                    DMA_CCMD_DEVICE_INVL);
2396         iommu->flush.flush_iotlb(iommu,
2397                                  did_old,
2398                                  0,
2399                                  0,
2400                                  DMA_TLB_DSI_FLUSH);
2401 }
2402 
2403 static inline void unlink_domain_info(struct device_domain_info *info)
2404 {
2405         assert_spin_locked(&device_domain_lock);
2406         list_del(&info->link);
2407         list_del(&info->global);
2408         if (info->dev)
2409                 info->dev->archdata.iommu = NULL;
2410 }
2411 
2412 static void domain_remove_dev_info(struct dmar_domain *domain)
2413 {
2414         struct device_domain_info *info, *tmp;
2415         unsigned long flags;
2416 
2417         spin_lock_irqsave(&device_domain_lock, flags);
2418         list_for_each_entry_safe(info, tmp, &domain->devices, link)
2419                 __dmar_remove_one_dev_info(info);
2420         spin_unlock_irqrestore(&device_domain_lock, flags);
2421 }
2422 
2423 /*
2424  * find_domain
2425  * Note: we use struct device->archdata.iommu stores the info
2426  */
2427 static struct dmar_domain *find_domain(struct device *dev)
2428 {
2429         struct device_domain_info *info;
2430 
2431         if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) {
2432                 struct iommu_domain *domain;
2433 
2434                 dev->archdata.iommu = NULL;
2435                 domain = iommu_get_domain_for_dev(dev);
2436                 if (domain)
2437                         intel_iommu_attach_device(domain, dev);
2438         }
2439 
2440         /* No lock here, assumes no domain exit in normal case */
2441         info = dev->archdata.iommu;
2442 
2443         if (likely(info))
2444                 return info->domain;
2445         return NULL;
2446 }
2447 
2448 static inline struct device_domain_info *
2449 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2450 {
2451         struct device_domain_info *info;
2452 
2453         list_for_each_entry(info, &device_domain_list, global)
2454                 if (info->iommu->segment == segment && info->bus == bus &&
2455                     info->devfn == devfn)
2456                         return info;
2457 
2458         return NULL;
2459 }
2460 
2461 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2462                                                     int bus, int devfn,
2463                                                     struct device *dev,
2464                                                     struct dmar_domain *domain)
2465 {
2466         struct dmar_domain *found = NULL;
2467         struct device_domain_info *info;
2468         unsigned long flags;
2469         int ret;
2470 
2471         info = alloc_devinfo_mem();
2472         if (!info)
2473                 return NULL;
2474 
2475         info->bus = bus;
2476         info->devfn = devfn;
2477         info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2478         info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2479         info->ats_qdep = 0;
2480         info->dev = dev;
2481         info->domain = domain;
2482         info->iommu = iommu;
2483         info->pasid_table = NULL;
2484         info->auxd_enabled = 0;
2485         INIT_LIST_HEAD(&info->auxiliary_domains);
2486 
2487         if (dev && dev_is_pci(dev)) {
2488                 struct pci_dev *pdev = to_pci_dev(info->dev);
2489 
2490                 if (!pdev->untrusted &&
2491                     !pci_ats_disabled() &&
2492                     ecap_dev_iotlb_support(iommu->ecap) &&
2493                     pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2494                     dmar_find_matched_atsr_unit(pdev))
2495                         info->ats_supported = 1;
2496 
2497                 if (sm_supported(iommu)) {
2498                         if (pasid_supported(iommu)) {
2499                                 int features = pci_pasid_features(pdev);
2500                                 if (features >= 0)
2501                                         info->pasid_supported = features | 1;
2502                         }
2503 
2504                         if (info->ats_supported && ecap_prs(iommu->ecap) &&
2505                             pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2506                                 info->pri_supported = 1;
2507                 }
2508         }
2509 
2510         spin_lock_irqsave(&device_domain_lock, flags);
2511         if (dev)
2512                 found = find_domain(dev);
2513 
2514         if (!found) {
2515                 struct device_domain_info *info2;
2516                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2517                 if (info2) {
2518                         found      = info2->domain;
2519                         info2->dev = dev;
2520                 }
2521         }
2522 
2523         if (found) {
2524                 spin_unlock_irqrestore(&device_domain_lock, flags);
2525                 free_devinfo_mem(info);
2526                 /* Caller must free the original domain */
2527                 return found;
2528         }
2529 
2530         spin_lock(&iommu->lock);
2531         ret = domain_attach_iommu(domain, iommu);
2532         spin_unlock(&iommu->lock);
2533 
2534         if (ret) {
2535                 spin_unlock_irqrestore(&device_domain_lock, flags);
2536                 free_devinfo_mem(info);
2537                 return NULL;
2538         }
2539 
2540         list_add(&info->link, &domain->devices);
2541         list_add(&info->global, &device_domain_list);
2542         if (dev)
2543                 dev->archdata.iommu = info;
2544         spin_unlock_irqrestore(&device_domain_lock, flags);
2545 
2546         /* PASID table is mandatory for a PCI device in scalable mode. */
2547         if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2548                 ret = intel_pasid_alloc_table(dev);
2549                 if (ret) {
2550                         dev_err(dev, "PASID table allocation failed\n");
2551                         dmar_remove_one_dev_info(dev);
2552                         return NULL;
2553                 }
2554 
2555                 /* Setup the PASID entry for requests without PASID: */
2556                 spin_lock(&iommu->lock);
2557                 if (hw_pass_through && domain_type_is_si(domain))
2558                         ret = intel_pasid_setup_pass_through(iommu, domain,
2559                                         dev, PASID_RID2PASID);
2560                 else
2561                         ret = intel_pasid_setup_second_level(iommu, domain,
2562                                         dev, PASID_RID2PASID);
2563                 spin_unlock(&iommu->lock);
2564                 if (ret) {
2565                         dev_err(dev, "Setup RID2PASID failed\n");
2566                         dmar_remove_one_dev_info(dev);
2567                         return NULL;
2568                 }
2569         }
2570 
2571         if (dev && domain_context_mapping(domain, dev)) {
2572                 dev_err(dev, "Domain context map failed\n");
2573                 dmar_remove_one_dev_info(dev);
2574                 return NULL;
2575         }
2576 
2577         return domain;
2578 }
2579 
2580 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2581 {
2582         *(u16 *)opaque = alias;
2583         return 0;
2584 }
2585 
2586 static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
2587 {
2588         struct device_domain_info *info;
2589         struct dmar_domain *domain = NULL;
2590         struct intel_iommu *iommu;
2591         u16 dma_alias;
2592         unsigned long flags;
2593         u8 bus, devfn;
2594 
2595         iommu = device_to_iommu(dev, &bus, &devfn);
2596         if (!iommu)
2597                 return NULL;
2598 
2599         if (dev_is_pci(dev)) {
2600                 struct pci_dev *pdev = to_pci_dev(dev);
2601 
2602                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2603 
2604                 spin_lock_irqsave(&device_domain_lock, flags);
2605                 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2606                                                       PCI_BUS_NUM(dma_alias),
2607                                                       dma_alias & 0xff);
2608                 if (info) {
2609                         iommu = info->iommu;
2610                         domain = info->domain;
2611                 }
2612                 spin_unlock_irqrestore(&device_domain_lock, flags);
2613 
2614                 /* DMA alias already has a domain, use it */
2615                 if (info)
2616                         goto out;
2617         }
2618 
2619         /* Allocate and initialize new domain for the device */
2620         domain = alloc_domain(0);
2621         if (!domain)
2622                 return NULL;
2623         if (domain_init(domain, iommu, gaw)) {
2624                 domain_exit(domain);
2625                 return NULL;
2626         }
2627 
2628 out:
2629         return domain;
2630 }
2631 
2632 static struct dmar_domain *set_domain_for_dev(struct device *dev,
2633                                               struct dmar_domain *domain)
2634 {
2635         struct intel_iommu *iommu;
2636         struct dmar_domain *tmp;
2637         u16 req_id, dma_alias;
2638         u8 bus, devfn;
2639 
2640         iommu = device_to_iommu(dev, &bus, &devfn);
2641         if (!iommu)
2642                 return NULL;
2643 
2644         req_id = ((u16)bus << 8) | devfn;
2645 
2646         if (dev_is_pci(dev)) {
2647                 struct pci_dev *pdev = to_pci_dev(dev);
2648 
2649                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2650 
2651                 /* register PCI DMA alias device */
2652                 if (req_id != dma_alias) {
2653                         tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2654                                         dma_alias & 0xff, NULL, domain);
2655 
2656                         if (!tmp || tmp != domain)
2657                                 return tmp;
2658                 }
2659         }
2660 
2661         tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2662         if (!tmp || tmp != domain)
2663                 return tmp;
2664 
2665         return domain;
2666 }
2667 
2668 static int iommu_domain_identity_map(struct dmar_domain *domain,
2669                                      unsigned long long start,
2670                                      unsigned long long end)
2671 {
2672         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2673         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2674 
2675         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2676                           dma_to_mm_pfn(last_vpfn))) {
2677                 pr_err("Reserving iova failed\n");
2678                 return -ENOMEM;
2679         }
2680 
2681         pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2682         /*
2683          * RMRR range might have overlap with physical memory range,
2684          * clear it first
2685          */
2686         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2687 
2688         return __domain_mapping(domain, first_vpfn, NULL,
2689                                 first_vpfn, last_vpfn - first_vpfn + 1,
2690                                 DMA_PTE_READ|DMA_PTE_WRITE);
2691 }
2692 
2693 static int domain_prepare_identity_map(struct device *dev,
2694                                        struct dmar_domain *domain,
2695                                        unsigned long long start,
2696                                        unsigned long long end)
2697 {
2698         /* For _hardware_ passthrough, don't bother. But for software
2699            passthrough, we do it anyway -- it may indicate a memory
2700            range which is reserved in E820, so which didn't get set
2701            up to start with in si_domain */
2702         if (domain == si_domain && hw_pass_through) {
2703                 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2704                          start, end);
2705                 return 0;
2706         }
2707 
2708         dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
2709 
2710         if (end < start) {
2711                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2712                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2713                         dmi_get_system_info(DMI_BIOS_VENDOR),
2714                         dmi_get_system_info(DMI_BIOS_VERSION),
2715                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2716                 return -EIO;
2717         }
2718 
2719         if (end >> agaw_to_width(domain->agaw)) {
2720                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2721                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2722                      agaw_to_width(domain->agaw),
2723                      dmi_get_system_info(DMI_BIOS_VENDOR),
2724                      dmi_get_system_info(DMI_BIOS_VERSION),
2725                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2726                 return -EIO;
2727         }
2728 
2729         return iommu_domain_identity_map(domain, start, end);
2730 }
2731 
2732 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2733 
2734 static int __init si_domain_init(int hw)
2735 {
2736         struct dmar_rmrr_unit *rmrr;
2737         struct device *dev;
2738         int i, nid, ret;
2739 
2740         si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2741         if (!si_domain)
2742                 return -EFAULT;
2743 
2744         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2745                 domain_exit(si_domain);
2746                 return -EFAULT;
2747         }
2748 
2749         if (hw)
2750                 return 0;
2751 
2752         for_each_online_node(nid) {
2753                 unsigned long start_pfn, end_pfn;
2754                 int i;
2755 
2756                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2757                         ret = iommu_domain_identity_map(si_domain,
2758                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2759                         if (ret)
2760                                 return ret;
2761                 }
2762         }
2763 
2764         /*
2765          * Identity map the RMRRs so that devices with RMRRs could also use
2766          * the si_domain.
2767          */
2768         for_each_rmrr_units(rmrr) {
2769                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2770                                           i, dev) {
2771                         unsigned long long start = rmrr->base_address;
2772                         unsigned long long end = rmrr->end_address;
2773 
2774                         if (WARN_ON(end < start ||
2775                                     end >> agaw_to_width(si_domain->agaw)))
2776                                 continue;
2777 
2778                         ret = iommu_domain_identity_map(si_domain, start, end);
2779                         if (ret)
2780                                 return ret;
2781                 }
2782         }
2783 
2784         return 0;
2785 }
2786 
2787 static int identity_mapping(struct device *dev)
2788 {
2789         struct device_domain_info *info;
2790 
2791         info = dev->archdata.iommu;
2792         if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO)
2793                 return (info->domain == si_domain);
2794 
2795         return 0;
2796 }
2797 
2798 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2799 {
2800         struct dmar_domain *ndomain;
2801         struct intel_iommu *iommu;
2802         u8 bus, devfn;
2803 
2804         iommu = device_to_iommu(dev, &bus, &devfn);
2805         if (!iommu)
2806                 return -ENODEV;
2807 
2808         ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2809         if (ndomain != domain)
2810                 return -EBUSY;
2811 
2812         return 0;
2813 }
2814 
2815 static bool device_has_rmrr(struct device *dev)
2816 {
2817         struct dmar_rmrr_unit *rmrr;
2818         struct device *tmp;
2819         int i;
2820 
2821         rcu_read_lock();
2822         for_each_rmrr_units(rmrr) {
2823                 /*
2824                  * Return TRUE if this RMRR contains the device that
2825                  * is passed in.
2826                  */
2827                 for_each_active_dev_scope(rmrr->devices,
2828                                           rmrr->devices_cnt, i, tmp)
2829                         if (tmp == dev ||
2830                             is_downstream_to_pci_bridge(dev, tmp)) {
2831                                 rcu_read_unlock();
2832                                 return true;
2833                         }
2834         }
2835         rcu_read_unlock();
2836         return false;
2837 }
2838 
2839 /**
2840  * device_rmrr_is_relaxable - Test whether the RMRR of this device
2841  * is relaxable (ie. is allowed to be not enforced under some conditions)
2842  * @dev: device handle
2843  *
2844  * We assume that PCI USB devices with RMRRs have them largely
2845  * for historical reasons and that the RMRR space is not actively used post
2846  * boot.  This exclusion may change if vendors begin to abuse it.
2847  *
2848  * The same exception is made for graphics devices, with the requirement that
2849  * any use of the RMRR regions will be torn down before assigning the device
2850  * to a guest.
2851  *
2852  * Return: true if the RMRR is relaxable, false otherwise
2853  */
2854 static bool device_rmrr_is_relaxable(struct device *dev)
2855 {
2856         struct pci_dev *pdev;
2857 
2858         if (!dev_is_pci(dev))
2859                 return false;
2860 
2861         pdev = to_pci_dev(dev);
2862         if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2863                 return true;
2864         else
2865                 return false;
2866 }
2867 
2868 /*
2869  * There are a couple cases where we need to restrict the functionality of
2870  * devices associated with RMRRs.  The first is when evaluating a device for
2871  * identity mapping because problems exist when devices are moved in and out
2872  * of domains and their respective RMRR information is lost.  This means that
2873  * a device with associated RMRRs will never be in a "passthrough" domain.
2874  * The second is use of the device through the IOMMU API.  This interface
2875  * expects to have full control of the IOVA space for the device.  We cannot
2876  * satisfy both the requirement that RMRR access is maintained and have an
2877  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2878  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2879  * We therefore prevent devices associated with an RMRR from participating in
2880  * the IOMMU API, which eliminates them from device assignment.
2881  *
2882  * In both cases, devices which have relaxable RMRRs are not concerned by this
2883  * restriction. See device_rmrr_is_relaxable comment.
2884  */
2885 static bool device_is_rmrr_locked(struct device *dev)
2886 {
2887         if (!device_has_rmrr(dev))
2888                 return false;
2889 
2890         if (device_rmrr_is_relaxable(dev))
2891                 return false;
2892 
2893         return true;
2894 }
2895 
2896 /*
2897  * Return the required default domain type for a specific device.
2898  *
2899  * @dev: the device in query
2900  * @startup: true if this is during early boot
2901  *
2902  * Returns:
2903  *  - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2904  *  - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2905  *  - 0: both identity and dynamic domains work for this device
2906  */
2907 static int device_def_domain_type(struct device *dev)
2908 {
2909         if (dev_is_pci(dev)) {
2910                 struct pci_dev *pdev = to_pci_dev(dev);
2911 
2912                 /*
2913                  * Prevent any device marked as untrusted from getting
2914                  * placed into the statically identity mapping domain.
2915                  */
2916                 if (pdev->untrusted)
2917                         return IOMMU_DOMAIN_DMA;
2918 
2919                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2920                         return IOMMU_DOMAIN_IDENTITY;
2921 
2922                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2923                         return IOMMU_DOMAIN_IDENTITY;
2924 
2925                 /*
2926                  * We want to start off with all devices in the 1:1 domain, and
2927                  * take them out later if we find they can't access all of memory.
2928                  *
2929                  * However, we can't do this for PCI devices behind bridges,
2930                  * because all PCI devices behind the same bridge will end up
2931                  * with the same source-id on their transactions.
2932                  *
2933                  * Practically speaking, we can't change things around for these
2934                  * devices at run-time, because we can't be sure there'll be no
2935                  * DMA transactions in flight for any of their siblings.
2936                  *
2937                  * So PCI devices (unless they're on the root bus) as well as
2938                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2939                  * the 1:1 domain, just in _case_ one of their siblings turns out
2940                  * not to be able to map all of memory.
2941                  */
2942                 if (!pci_is_pcie(pdev)) {
2943                         if (!pci_is_root_bus(pdev->bus))
2944                                 return IOMMU_DOMAIN_DMA;
2945                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2946                                 return IOMMU_DOMAIN_DMA;
2947                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2948                         return IOMMU_DOMAIN_DMA;
2949         }
2950 
2951         return (iommu_identity_mapping & IDENTMAP_ALL) ?
2952                         IOMMU_DOMAIN_IDENTITY : 0;
2953 }
2954 
2955 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2956 {
2957         /*
2958          * Start from the sane iommu hardware state.
2959          * If the queued invalidation is already initialized by us
2960          * (for example, while enabling interrupt-remapping) then
2961          * we got the things already rolling from a sane state.
2962          */
2963         if (!iommu->qi) {
2964                 /*
2965                  * Clear any previous faults.
2966                  */
2967                 dmar_fault(-1, iommu);
2968                 /*
2969                  * Disable queued invalidation if supported and already enabled
2970                  * before OS handover.
2971                  */
2972                 dmar_disable_qi(iommu);
2973         }
2974 
2975         if (dmar_enable_qi(iommu)) {
2976                 /*
2977                  * Queued Invalidate not enabled, use Register Based Invalidate
2978                  */
2979                 iommu->flush.flush_context = __iommu_flush_context;
2980                 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2981                 pr_info("%s: Using Register based invalidation\n",
2982                         iommu->name);
2983         } else {
2984                 iommu->flush.flush_context = qi_flush_context;
2985                 iommu->flush.flush_iotlb = qi_flush_iotlb;
2986                 pr_info("%s: Using Queued invalidation\n", iommu->name);
2987         }
2988 }
2989 
2990 static int copy_context_table(struct intel_iommu *iommu,
2991                               struct root_entry *old_re,
2992                               struct context_entry **tbl,
2993                               int bus, bool ext)
2994 {
2995         int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2996         struct context_entry *new_ce = NULL, ce;
2997         struct context_entry *old_ce = NULL;
2998         struct root_entry re;
2999         phys_addr_t old_ce_phys;
3000 
3001         tbl_idx = ext ? bus * 2 : bus;
3002         memcpy(&re, old_re, sizeof(re));
3003 
3004         for (devfn = 0; devfn < 256; devfn++) {
3005                 /* First calculate the correct index */
3006                 idx = (ext ? devfn * 2 : devfn) % 256;
3007 
3008                 if (idx == 0) {
3009                         /* First save what we may have and clean up */
3010                         if (new_ce) {
3011                                 tbl[tbl_idx] = new_ce;
3012                                 __iommu_flush_cache(iommu, new_ce,
3013                                                     VTD_PAGE_SIZE);
3014                                 pos = 1;
3015                         }
3016 
3017                         if (old_ce)
3018                                 memunmap(old_ce);
3019 
3020                         ret = 0;
3021                         if (devfn < 0x80)
3022                                 old_ce_phys = root_entry_lctp(&re);
3023                         else
3024                                 old_ce_phys = root_entry_uctp(&re);
3025 
3026                         if (!old_ce_phys) {
3027                                 if (ext && devfn == 0) {
3028                                         /* No LCTP, try UCTP */
3029                                         devfn = 0x7f;
3030                                         continue;
3031                                 } else {
3032                                         goto out;
3033                                 }
3034                         }
3035 
3036                         ret = -ENOMEM;
3037                         old_ce = memremap(old_ce_phys, PAGE_SIZE,
3038                                         MEMREMAP_WB);
3039                         if (!old_ce)
3040                                 goto out;
3041 
3042                         new_ce = alloc_pgtable_page(iommu->node);
3043                         if (!new_ce)
3044                                 goto out_unmap;
3045 
3046                         ret = 0;
3047                 }
3048 
3049                 /* Now copy the context entry */
3050                 memcpy(&ce, old_ce + idx, sizeof(ce));
3051 
3052                 if (!__context_present(&ce))
3053                         continue;
3054 
3055                 did = context_domain_id(&ce);
3056                 if (did >= 0 && did < cap_ndoms(iommu->cap))
3057                         set_bit(did, iommu->domain_ids);
3058 
3059                 /*
3060                  * We need a marker for copied context entries. This
3061                  * marker needs to work for the old format as well as
3062                  * for extended context entries.
3063                  *
3064                  * Bit 67 of the context entry is used. In the old
3065                  * format this bit is available to software, in the
3066                  * extended format it is the PGE bit, but PGE is ignored
3067                  * by HW if PASIDs are disabled (and thus still
3068                  * available).
3069                  *
3070                  * So disable PASIDs first and then mark the entry
3071                  * copied. This means that we don't copy PASID
3072                  * translations from the old kernel, but this is fine as
3073                  * faults there are not fatal.
3074                  */
3075                 context_clear_pasid_enable(&ce);
3076                 context_set_copied(&ce);
3077 
3078                 new_ce[idx] = ce;
3079         }
3080 
3081         tbl[tbl_idx + pos] = new_ce;
3082 
3083         __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3084 
3085 out_unmap:
3086         memunmap(old_ce);
3087 
3088 out:
3089         return ret;
3090 }
3091 
3092 static int copy_translation_tables(struct intel_iommu *iommu)
3093 {
3094         struct context_entry **ctxt_tbls;
3095         struct root_entry *old_rt;
3096         phys_addr_t old_rt_phys;
3097         int ctxt_table_entries;
3098         unsigned long flags;
3099         u64 rtaddr_reg;
3100         int bus, ret;
3101         bool new_ext, ext;
3102 
3103         rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3104         ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
3105         new_ext    = !!ecap_ecs(iommu->ecap);
3106 
3107         /*
3108          * The RTT bit can only be changed when translation is disabled,
3109          * but disabling translation means to open a window for data
3110          * corruption. So bail out and don't copy anything if we would
3111          * have to change the bit.
3112          */
3113         if (new_ext != ext)
3114                 return -EINVAL;
3115 
3116         old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3117         if (!old_rt_phys)
3118                 return -EINVAL;
3119 
3120         old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3121         if (!old_rt)
3122                 return -ENOMEM;
3123 
3124         /* This is too big for the stack - allocate it from slab */
3125         ctxt_table_entries = ext ? 512 : 256;
3126         ret = -ENOMEM;
3127         ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3128         if (!ctxt_tbls)
3129                 goto out_unmap;
3130 
3131         for (bus = 0; bus < 256; bus++) {
3132                 ret = copy_context_table(iommu, &old_rt[bus],
3133                                          ctxt_tbls, bus, ext);
3134                 if (ret) {
3135                         pr_err("%s: Failed to copy context table for bus %d\n",
3136                                 iommu->name, bus);
3137                         continue;
3138                 }
3139         }
3140 
3141         spin_lock_irqsave(&iommu->lock, flags);
3142 
3143         /* Context tables are copied, now write them to the root_entry table */
3144         for (bus = 0; bus < 256; bus++) {
3145                 int idx = ext ? bus * 2 : bus;
3146                 u64 val;
3147 
3148                 if (ctxt_tbls[idx]) {
3149                         val = virt_to_phys(ctxt_tbls[idx]) | 1;
3150                         iommu->root_entry[bus].lo = val;
3151                 }
3152 
3153                 if (!ext || !ctxt_tbls[idx + 1])
3154                         continue;
3155 
3156                 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3157                 iommu->root_entry[bus].hi = val;
3158         }
3159 
3160         spin_unlock_irqrestore(&iommu->lock, flags);
3161 
3162         kfree(ctxt_tbls);
3163 
3164         __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3165 
3166         ret = 0;
3167 
3168 out_unmap:
3169         memunmap(old_rt);
3170 
3171         return ret;
3172 }
3173 
3174 static int __init init_dmars(void)
3175 {
3176         struct dmar_drhd_unit *drhd;
3177         struct intel_iommu *iommu;
3178         int ret;
3179 
3180         /*
3181          * for each drhd
3182          *    allocate root
3183          *    initialize and program root entry to not present
3184          * endfor
3185          */
3186         for_each_drhd_unit(drhd) {
3187                 /*
3188                  * lock not needed as this is only incremented in the single
3189                  * threaded kernel __init code path all other access are read
3190                  * only
3191                  */
3192                 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3193                         g_num_of_iommus++;
3194                         continue;
3195                 }
3196                 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3197         }
3198 
3199         /* Preallocate enough resources for IOMMU hot-addition */
3200         if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3201                 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3202 
3203         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3204                         GFP_KERNEL);
3205         if (!g_iommus) {
3206                 pr_err("Allocating global iommu array failed\n");
3207                 ret = -ENOMEM;
3208                 goto error;
3209         }
3210 
3211         for_each_iommu(iommu, drhd) {
3212                 if (drhd->ignored) {
3213                         iommu_disable_translation(iommu);
3214                         continue;
3215                 }
3216 
3217                 /*
3218                  * Find the max pasid size of all IOMMU's in the system.
3219                  * We need to ensure the system pasid table is no bigger
3220                  * than the smallest supported.
3221                  */
3222                 if (pasid_supported(iommu)) {
3223                         u32 temp = 2 << ecap_pss(iommu->ecap);
3224 
3225                         intel_pasid_max_id = min_t(u32, temp,
3226                                                    intel_pasid_max_id);
3227                 }
3228 
3229                 g_iommus[iommu->seq_id] = iommu;
3230 
3231                 intel_iommu_init_qi(iommu);
3232 
3233                 ret = iommu_init_domains(iommu);
3234                 if (ret)
3235                         goto free_iommu;
3236 
3237                 init_translation_status(iommu);
3238 
3239                 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3240                         iommu_disable_translation(iommu);
3241                         clear_translation_pre_enabled(iommu);
3242                         pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3243                                 iommu->name);
3244                 }
3245 
3246                 /*
3247                  * TBD:
3248                  * we could share the same root & context tables
3249                  * among all IOMMU's. Need to Split it later.
3250                  */
3251                 ret = iommu_alloc_root_entry(iommu);
3252                 if (ret)
3253                         goto free_iommu;
3254 
3255                 if (translation_pre_enabled(iommu)) {
3256                         pr_info("Translation already enabled - trying to copy translation structures\n");
3257 
3258                         ret = copy_translation_tables(iommu);
3259                         if (ret) {
3260                                 /*
3261                                  * We found the IOMMU with translation
3262                                  * enabled - but failed to copy over the
3263                                  * old root-entry table. Try to proceed
3264                                  * by disabling translation now and
3265                                  * allocating a clean root-entry table.
3266                                  * This might cause DMAR faults, but
3267                                  * probably the dump will still succeed.
3268                                  */
3269                                 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3270                                        iommu->name);
3271                                 iommu_disable_translation(iommu);
3272                                 clear_translation_pre_enabled(iommu);
3273                         } else {
3274                                 pr_info("Copied translation tables from previous kernel for %s\n",
3275                                         iommu->name);
3276                         }
3277                 }
3278 
3279                 if (!ecap_pass_through(iommu->ecap))
3280                         hw_pass_through = 0;
3281 #ifdef CONFIG_INTEL_IOMMU_SVM
3282                 if (pasid_supported(iommu))
3283                         intel_svm_init(iommu);
3284 #endif
3285         }
3286 
3287         /*
3288          * Now that qi is enabled on all iommus, set the root entry and flush
3289          * caches. This is required on some Intel X58 chipsets, otherwise the
3290          * flush_context function will loop forever and the boot hangs.
3291          */
3292         for_each_active_iommu(iommu, drhd) {
3293                 iommu_flush_write_buffer(iommu);
3294                 iommu_set_root_entry(iommu);
3295                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3296                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3297         }
3298 
3299         if (iommu_default_passthrough())
3300                 iommu_identity_mapping |= IDENTMAP_ALL;
3301 
3302 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3303         dmar_map_gfx = 0;
3304 #endif
3305 
3306         if (!dmar_map_gfx)
3307                 iommu_identity_mapping |= IDENTMAP_GFX;
3308 
3309         check_tylersburg_isoch();
3310 
3311         ret = si_domain_init(hw_pass_through);
3312         if (ret)
3313                 goto free_iommu;
3314 
3315         /*
3316          * for each drhd
3317          *   enable fault log
3318          *   global invalidate context cache
3319          *   global invalidate iotlb
3320          *   enable translation
3321          */
3322         for_each_iommu(iommu, drhd) {
3323                 if (drhd->ignored) {
3324                         /*
3325                          * we always have to disable PMRs or DMA may fail on
3326                          * this device
3327                          */
3328                         if (force_on)
3329                                 iommu_disable_protect_mem_regions(iommu);
3330                         continue;
3331                 }
3332 
3333                 iommu_flush_write_buffer(iommu);
3334 
3335 #ifdef CONFIG_INTEL_IOMMU_SVM
3336                 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3337                         /*
3338                          * Call dmar_alloc_hwirq() with dmar_global_lock held,
3339                          * could cause possible lock race condition.
3340                          */
3341                         up_write(&dmar_global_lock);
3342                         ret = intel_svm_enable_prq(iommu);
3343                         down_write(&dmar_global_lock);
3344                         if (ret)
3345                                 goto free_iommu;
3346                 }
3347 #endif
3348                 ret = dmar_set_interrupt(iommu);
3349                 if (ret)
3350                         goto free_iommu;
3351         }
3352 
3353         return 0;
3354 
3355 free_iommu:
3356         for_each_active_iommu(iommu, drhd) {
3357                 disable_dmar_iommu(iommu);
3358                 free_dmar_iommu(iommu);
3359         }
3360 
3361         kfree(g_iommus);
3362 
3363 error:
3364         return ret;
3365 }
3366 
3367 /* This takes a number of _MM_ pages, not VTD pages */
3368 static unsigned long intel_alloc_iova(struct device *dev,
3369                                      struct dmar_domain *domain,
3370                                      unsigned long nrpages, uint64_t dma_mask)
3371 {
3372         unsigned long iova_pfn;
3373 
3374         /* Restrict dma_mask to the width that the iommu can handle */
3375         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3376         /* Ensure we reserve the whole size-aligned region */
3377         nrpages = __roundup_pow_of_two(nrpages);
3378 
3379         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3380                 /*
3381                  * First try to allocate an io virtual address in
3382                  * DMA_BIT_MASK(32) and if that fails then try allocating
3383                  * from higher range
3384                  */
3385                 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3386                                            IOVA_PFN(DMA_BIT_MASK(32)), false);
3387                 if (iova_pfn)
3388                         return iova_pfn;
3389         }
3390         iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3391                                    IOVA_PFN(dma_mask), true);
3392         if (unlikely(!iova_pfn)) {
3393                 dev_err_once(dev, "Allocating %ld-page iova failed\n",
3394                              nrpages);
3395                 return 0;
3396         }
3397 
3398         return iova_pfn;
3399 }
3400 
3401 static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
3402 {
3403         struct dmar_domain *domain, *tmp;
3404         struct dmar_rmrr_unit *rmrr;
3405         struct device *i_dev;
3406         int i, ret;
3407 
3408         /* Device shouldn't be attached by any domains. */
3409         domain = find_domain(dev);
3410         if (domain)
3411                 return NULL;
3412 
3413         domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3414         if (!domain)
3415                 goto out;
3416 
3417         /* We have a new domain - setup possible RMRRs for the device */
3418         rcu_read_lock();
3419         for_each_rmrr_units(rmrr) {
3420                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3421                                           i, i_dev) {
3422                         if (i_dev != dev)
3423                                 continue;
3424 
3425                         ret = domain_prepare_identity_map(dev, domain,
3426                                                           rmrr->base_address,
3427                                                           rmrr->end_address);
3428                         if (ret)
3429                                 dev_err(dev, "Mapping reserved region failed\n");
3430                 }
3431         }
3432         rcu_read_unlock();
3433 
3434         tmp = set_domain_for_dev(dev, domain);
3435         if (!tmp || domain != tmp) {
3436                 domain_exit(domain);
3437                 domain = tmp;
3438         }
3439 
3440 out:
3441         if (!domain)
3442                 dev_err(dev, "Allocating domain failed\n");
3443         else
3444                 domain->domain.type = IOMMU_DOMAIN_DMA;
3445 
3446         return domain;
3447 }
3448 
3449 /* Check if the dev needs to go through non-identity map and unmap process.*/
3450 static bool iommu_need_mapping(struct device *dev)
3451 {
3452         int ret;
3453 
3454         if (iommu_dummy(dev))
3455                 return false;
3456 
3457         ret = identity_mapping(dev);
3458         if (ret) {
3459                 u64 dma_mask = *dev->dma_mask;
3460 
3461                 if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
3462                         dma_mask = dev->coherent_dma_mask;
3463 
3464                 if (dma_mask >= dma_direct_get_required_mask(dev))
3465                         return false;
3466 
3467                 /*
3468                  * 32 bit DMA is removed from si_domain and fall back to
3469                  * non-identity mapping.
3470                  */
3471                 dmar_remove_one_dev_info(dev);
3472                 ret = iommu_request_dma_domain_for_dev(dev);
3473                 if (ret) {
3474                         struct iommu_domain *domain;
3475                         struct dmar_domain *dmar_domain;
3476 
3477                         domain = iommu_get_domain_for_dev(dev);
3478                         if (domain) {
3479                                 dmar_domain = to_dmar_domain(domain);
3480                                 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
3481                         }
3482                         dmar_remove_one_dev_info(dev);
3483                         get_private_domain_for_dev(dev);
3484                 }
3485 
3486                 dev_info(dev, "32bit DMA uses non-identity mapping\n");
3487         }
3488 
3489         return true;
3490 }
3491 
3492 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3493                                      size_t size, int dir, u64 dma_mask)
3494 {
3495         struct dmar_domain *domain;
3496         phys_addr_t start_paddr;
3497         unsigned long iova_pfn;
3498         int prot = 0;
3499         int ret;
3500         struct intel_iommu *iommu;
3501         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3502 
3503         BUG_ON(dir == DMA_NONE);
3504 
3505         domain = find_domain(dev);
3506         if (!domain)
3507                 return DMA_MAPPING_ERROR;
3508 
3509         iommu = domain_get_iommu(domain);
3510         size = aligned_nrpages(paddr, size);
3511 
3512         iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3513         if (!iova_pfn)
3514                 goto error;
3515 
3516         /*
3517          * Check if DMAR supports zero-length reads on write only
3518          * mappings..
3519          */
3520         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3521                         !cap_zlr(iommu->cap))
3522                 prot |= DMA_PTE_READ;
3523         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3524                 prot |= DMA_PTE_WRITE;
3525         /*
3526          * paddr - (paddr + size) might be partial page, we should map the whole
3527          * page.  Note: if two part of one page are separately mapped, we
3528          * might have two guest_addr mapping to the same host paddr, but this
3529          * is not a big problem
3530          */
3531         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3532                                  mm_to_dma_pfn(paddr_pfn), size, prot);
3533         if (ret)
3534                 goto error;
3535 
3536         start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
3537         start_paddr += paddr & ~PAGE_MASK;
3538 
3539         trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT);
3540 
3541         return start_paddr;
3542 
3543 error:
3544         if (iova_pfn)
3545                 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3546         dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3547                 size, (unsigned long long)paddr, dir);
3548         return DMA_MAPPING_ERROR;
3549 }
3550 
3551 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3552                                  unsigned long offset, size_t size,
3553                                  enum dma_data_direction dir,
3554                                  unsigned long attrs)
3555 {
3556         if (iommu_need_mapping(dev))
3557                 return __intel_map_single(dev, page_to_phys(page) + offset,
3558                                 size, dir, *dev->dma_mask);
3559         return dma_direct_map_page(dev, page, offset, size, dir, attrs);
3560 }
3561 
3562 static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3563                                      size_t size, enum dma_data_direction dir,
3564                                      unsigned long attrs)
3565 {
3566         if (iommu_need_mapping(dev))
3567                 return __intel_map_single(dev, phys_addr, size, dir,
3568                                 *dev->dma_mask);
3569         return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
3570 }
3571 
3572 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
3573 {
3574         struct dmar_domain *domain;
3575         unsigned long start_pfn, last_pfn;
3576         unsigned long nrpages;
3577         unsigned long iova_pfn;
3578         struct intel_iommu *iommu;
3579         struct page *freelist;
3580         struct pci_dev *pdev = NULL;
3581 
3582         domain = find_domain(dev);
3583         BUG_ON(!domain);
3584 
3585         iommu = domain_get_iommu(domain);
3586 
3587         iova_pfn = IOVA_PFN(dev_addr);
3588 
3589         nrpages = aligned_nrpages(dev_addr, size);
3590         start_pfn = mm_to_dma_pfn(iova_pfn);
3591         last_pfn = start_pfn + nrpages - 1;
3592 
3593         if (dev_is_pci(dev))
3594                 pdev = to_pci_dev(dev);
3595 
3596         freelist = domain_unmap(domain, start_pfn, last_pfn);
3597         if (intel_iommu_strict || (pdev && pdev->untrusted) ||
3598                         !has_iova_flush_queue(&domain->iovad)) {
3599                 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3600                                       nrpages, !freelist, 0);
3601                 /* free iova */
3602                 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3603                 dma_free_pagelist(freelist);
3604         } else {
3605                 queue_iova(&domain->iovad, iova_pfn, nrpages,
3606                            (unsigned long)freelist);
3607                 /*
3608                  * queue up the release of the unmap to save the 1/6th of the
3609                  * cpu used up by the iotlb flush operation...
3610                  */
3611         }
3612 
3613         trace_unmap_single(dev, dev_addr, size);
3614 }
3615 
3616 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3617                              size_t size, enum dma_data_direction dir,
3618                              unsigned long attrs)
3619 {
3620         if (iommu_need_mapping(dev))
3621                 intel_unmap(dev, dev_addr, size);
3622         else
3623                 dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3624 }
3625 
3626 static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3627                 size_t size, enum dma_data_direction dir, unsigned long attrs)
3628 {
3629         if (iommu_need_mapping(dev))
3630                 intel_unmap(dev, dev_addr, size);
3631 }
3632 
3633 static void *intel_alloc_coherent(struct device *dev, size_t size,
3634                                   dma_addr_t *dma_handle, gfp_t flags,
3635                                   unsigned long attrs)
3636 {
3637         struct page *page = NULL;
3638         int order;
3639 
3640         if (!iommu_need_mapping(dev))
3641                 return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3642 
3643         size = PAGE_ALIGN(size);
3644         order = get_order(size);
3645 
3646         if (gfpflags_allow_blocking(flags)) {
3647                 unsigned int count = size >> PAGE_SHIFT;
3648 
3649                 page = dma_alloc_from_contiguous(dev, count, order,
3650                                                  flags & __GFP_NOWARN);
3651         }
3652 
3653         if (!page)
3654                 page = alloc_pages(flags, order);
3655         if (!page)
3656                 return NULL;
3657         memset(page_address(page), 0, size);
3658 
3659         *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3660                                          DMA_BIDIRECTIONAL,
3661                                          dev->coherent_dma_mask);
3662         if (*dma_handle != DMA_MAPPING_ERROR)
3663                 return page_address(page);
3664         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3665                 __free_pages(page, order);
3666 
3667         return NULL;
3668 }
3669 
3670 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3671                                 dma_addr_t dma_handle, unsigned long attrs)
3672 {
3673         int order;
3674         struct page *page = virt_to_page(vaddr);
3675 
3676         if (!iommu_need_mapping(dev))
3677                 return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3678 
3679         size = PAGE_ALIGN(size);
3680         order = get_order(size);
3681 
3682         intel_unmap(dev, dma_handle, size);
3683         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3684                 __free_pages(page, order);
3685 }
3686 
3687 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3688                            int nelems, enum dma_data_direction dir,
3689                            unsigned long attrs)
3690 {
3691         dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3692         unsigned long nrpages = 0;
3693         struct scatterlist *sg;
3694         int i;
3695 
3696         if (!iommu_need_mapping(dev))
3697                 return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3698 
3699         for_each_sg(sglist, sg, nelems, i) {
3700                 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3701         }
3702 
3703         intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3704 
3705         trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3706 }
3707 
3708 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3709                         enum dma_data_direction dir, unsigned long attrs)
3710 {
3711         int i;
3712         struct dmar_domain *domain;
3713         size_t size = 0;
3714         int prot = 0;
3715         unsigned long iova_pfn;
3716         int ret;
3717         struct scatterlist *sg;
3718         unsigned long start_vpfn;
3719         struct intel_iommu *iommu;
3720 
3721         BUG_ON(dir == DMA_NONE);
3722         if (!iommu_need_mapping(dev))
3723                 return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
3724 
3725         domain = find_domain(dev);
3726         if (!domain)
3727                 return 0;
3728 
3729         iommu = domain_get_iommu(domain);
3730 
3731         for_each_sg(sglist, sg, nelems, i)
3732                 size += aligned_nrpages(sg->offset, sg->length);
3733 
3734         iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3735                                 *dev->dma_mask);
3736         if (!iova_pfn) {
3737                 sglist->dma_length = 0;
3738                 return 0;
3739         }
3740 
3741         /*
3742          * Check if DMAR supports zero-length reads on write only
3743          * mappings..
3744          */
3745         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3746                         !cap_zlr(iommu->cap))
3747                 prot |= DMA_PTE_READ;
3748         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3749                 prot |= DMA_PTE_WRITE;
3750 
3751         start_vpfn = mm_to_dma_pfn(iova_pfn);
3752 
3753         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3754         if (unlikely(ret)) {
3755                 dma_pte_free_pagetable(domain, start_vpfn,
3756                                        start_vpfn + size - 1,
3757                                        agaw_to_level(domain->agaw) + 1);
3758                 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3759                 return 0;
3760         }
3761 
3762         trace_map_sg(dev, iova_pfn << PAGE_SHIFT,
3763                      sg_phys(sglist), size << VTD_PAGE_SHIFT);
3764 
3765         return nelems;
3766 }
3767 
3768 static u64 intel_get_required_mask(struct device *dev)
3769 {
3770         if (!iommu_need_mapping(dev))
3771                 return dma_direct_get_required_mask(dev);
3772         return DMA_BIT_MASK(32);
3773 }
3774 
3775 static const struct dma_map_ops intel_dma_ops = {
3776         .alloc = intel_alloc_coherent,
3777         .free = intel_free_coherent,
3778         .map_sg = intel_map_sg,
3779         .unmap_sg = intel_unmap_sg,
3780         .map_page = intel_map_page,
3781         .unmap_page = intel_unmap_page,
3782         .map_resource = intel_map_resource,
3783         .unmap_resource = intel_unmap_resource,
3784         .dma_supported = dma_direct_supported,
3785         .mmap = dma_common_mmap,
3786         .get_sgtable = dma_common_get_sgtable,
3787         .get_required_mask = intel_get_required_mask,
3788 };
3789 
3790 static void
3791 bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size,
3792                    enum dma_data_direction dir, enum dma_sync_target target)
3793 {
3794         struct dmar_domain *domain;
3795         phys_addr_t tlb_addr;
3796 
3797         domain = find_domain(dev);
3798         if (WARN_ON(!domain))
3799                 return;
3800 
3801         tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr);
3802         if (is_swiotlb_buffer(tlb_addr))
3803                 swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target);
3804 }
3805 
3806 static dma_addr_t
3807 bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
3808                   enum dma_data_direction dir, unsigned long attrs,
3809                   u64 dma_mask)
3810 {
3811         size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3812         struct dmar_domain *domain;
3813         struct intel_iommu *iommu;
3814         unsigned long iova_pfn;
3815         unsigned long nrpages;
3816         phys_addr_t tlb_addr;
3817         int prot = 0;
3818         int ret;
3819 
3820         domain = find_domain(dev);
3821         if (WARN_ON(dir == DMA_NONE || !domain))
3822                 return DMA_MAPPING_ERROR;
3823 
3824         iommu = domain_get_iommu(domain);
3825         if (WARN_ON(!iommu))
3826                 return DMA_MAPPING_ERROR;
3827 
3828         nrpages = aligned_nrpages(0, size);
3829         iova_pfn = intel_alloc_iova(dev, domain,
3830                                     dma_to_mm_pfn(nrpages), dma_mask);
3831         if (!iova_pfn)
3832                 return DMA_MAPPING_ERROR;
3833 
3834         /*
3835          * Check if DMAR supports zero-length reads on write only
3836          * mappings..
3837          */
3838         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL ||
3839                         !cap_zlr(iommu->cap))
3840                 prot |= DMA_PTE_READ;
3841         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3842                 prot |= DMA_PTE_WRITE;
3843 
3844         /*
3845          * If both the physical buffer start address and size are
3846          * page aligned, we don't need to use a bounce page.
3847          */
3848         if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
3849                 tlb_addr = swiotlb_tbl_map_single(dev,
3850                                 __phys_to_dma(dev, io_tlb_start),
3851                                 paddr, size, aligned_size, dir, attrs);
3852                 if (tlb_addr == DMA_MAPPING_ERROR) {
3853                         goto swiotlb_error;
3854                 } else {
3855                         /* Cleanup the padding area. */
3856                         void *padding_start = phys_to_virt(tlb_addr);
3857                         size_t padding_size = aligned_size;
3858 
3859                         if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
3860                             (dir == DMA_TO_DEVICE ||
3861                              dir == DMA_BIDIRECTIONAL)) {
3862                                 padding_start += size;
3863                                 padding_size -= size;
3864                         }
3865 
3866                         memset(padding_start, 0, padding_size);
3867                 }
3868         } else {
3869                 tlb_addr = paddr;
3870         }
3871 
3872         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3873                                  tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot);
3874         if (ret)
3875                 goto mapping_error;
3876 
3877         trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size);
3878 
3879         return (phys_addr_t)iova_pfn << PAGE_SHIFT;
3880 
3881 mapping_error:
3882         if (is_swiotlb_buffer(tlb_addr))
3883                 swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3884                                          aligned_size, dir, attrs);
3885 swiotlb_error:
3886         free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3887         dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n",
3888                 size, (unsigned long long)paddr, dir);
3889 
3890         return DMA_MAPPING_ERROR;
3891 }
3892 
3893 static void
3894 bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
3895                     enum dma_data_direction dir, unsigned long attrs)
3896 {
3897         size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3898         struct dmar_domain *domain;
3899         phys_addr_t tlb_addr;
3900 
3901         domain = find_domain(dev);
3902         if (WARN_ON(!domain))
3903                 return;
3904 
3905         tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr);
3906         if (WARN_ON(!tlb_addr))
3907                 return;
3908 
3909         intel_unmap(dev, dev_addr, size);
3910         if (is_swiotlb_buffer(tlb_addr))
3911                 swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3912                                          aligned_size, dir, attrs);
3913 
3914         trace_bounce_unmap_single(dev, dev_addr, size);
3915 }
3916 
3917 static dma_addr_t
3918 bounce_map_page(struct device *dev, struct page *page, unsigned long offset,
3919                 size_t size, enum dma_data_direction dir, unsigned long attrs)
3920 {
3921         return bounce_map_single(dev, page_to_phys(page) + offset,
3922                                  size, dir, attrs, *dev->dma_mask);
3923 }
3924 
3925 static dma_addr_t
3926 bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
3927                     enum dma_data_direction dir, unsigned long attrs)
3928 {
3929         return bounce_map_single(dev, phys_addr, size,
3930                                  dir, attrs, *dev->dma_mask);
3931 }
3932 
3933 static void
3934 bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size,
3935                   enum dma_data_direction dir, unsigned long attrs)
3936 {
3937         bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3938 }
3939 
3940 static void
3941 bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size,
3942                       enum dma_data_direction dir, unsigned long attrs)
3943 {
3944         bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3945 }
3946 
3947 static void
3948 bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3949                 enum dma_data_direction dir, unsigned long attrs)
3950 {
3951         struct scatterlist *sg;
3952         int i;
3953 
3954         for_each_sg(sglist, sg, nelems, i)
3955                 bounce_unmap_page(dev, sg->dma_address,
3956                                   sg_dma_len(sg), dir, attrs);
3957 }
3958 
3959 static int
3960 bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3961               enum dma_data_direction dir, unsigned long attrs)
3962 {
3963         int i;
3964         struct scatterlist *sg;
3965 
3966         for_each_sg(sglist, sg, nelems, i) {
3967                 sg->dma_address = bounce_map_page(dev, sg_page(sg),
3968                                                   sg->offset, sg->length,
3969                                                   dir, attrs);
3970                 if (sg->dma_address == DMA_MAPPING_ERROR)
3971                         goto out_unmap;
3972                 sg_dma_len(sg) = sg->length;
3973         }
3974 
3975         return nelems;
3976 
3977 out_unmap:
3978         bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
3979         return 0;
3980 }
3981 
3982 static void
3983 bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
3984                            size_t size, enum dma_data_direction dir)
3985 {
3986         bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU);
3987 }
3988 
3989 static void
3990 bounce_sync_single_for_device(struct device *dev, dma_addr_t addr,
3991                               size_t size, enum dma_data_direction dir)
3992 {
3993         bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE);
3994 }
3995 
3996 static void
3997 bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
3998                        int nelems, enum dma_data_direction dir)
3999 {
4000         struct scatterlist *sg;
4001         int i;
4002 
4003         for_each_sg(sglist, sg, nelems, i)
4004                 bounce_sync_single(dev, sg_dma_address(sg),
4005                                    sg_dma_len(sg), dir, SYNC_FOR_CPU);
4006 }
4007 
4008 static void
4009 bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
4010                           int nelems, enum dma_data_direction dir)
4011 {
4012         struct scatterlist *sg;
4013         int i;
4014 
4015         for_each_sg(sglist, sg, nelems, i)
4016                 bounce_sync_single(dev, sg_dma_address(sg),
4017                                    sg_dma_len(sg), dir, SYNC_FOR_DEVICE);
4018 }
4019 
4020 static const struct dma_map_ops bounce_dma_ops = {
4021         .alloc                  = intel_alloc_coherent,
4022         .free                   = intel_free_coherent,
4023         .map_sg                 = bounce_map_sg,
4024         .unmap_sg               = bounce_unmap_sg,
4025         .map_page               = bounce_map_page,
4026         .unmap_page             = bounce_unmap_page,
4027         .sync_single_for_cpu    = bounce_sync_single_for_cpu,
4028         .sync_single_for_device = bounce_sync_single_for_device,
4029         .sync_sg_for_cpu        = bounce_sync_sg_for_cpu,
4030         .sync_sg_for_device     = bounce_sync_sg_for_device,
4031         .map_resource           = bounce_map_resource,
4032         .unmap_resource         = bounce_unmap_resource,
4033         .dma_supported          = dma_direct_supported,
4034 };
4035 
4036 static inline int iommu_domain_cache_init(void)
4037 {
4038         int ret = 0;
4039 
4040         iommu_domain_cache = kmem_cache_create("iommu_domain",
4041                                          sizeof(struct dmar_domain),
4042                                          0,
4043                                          SLAB_HWCACHE_ALIGN,
4044 
4045                                          NULL);
4046         if (!iommu_domain_cache) {
4047                 pr_err("Couldn't create iommu_domain cache\n");
4048                 ret = -ENOMEM;
4049         }
4050 
4051         return ret;
4052 }
4053 
4054 static inline int iommu_devinfo_cache_init(void)
4055 {
4056         int ret = 0;
4057 
4058         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
4059                                          sizeof(struct device_domain_info),
4060                                          0,
4061                                          SLAB_HWCACHE_ALIGN,
4062                                          NULL);
4063         if (!iommu_devinfo_cache) {
4064                 pr_err("Couldn't create devinfo cache\n");
4065                 ret = -ENOMEM;
4066         }
4067 
4068         return ret;
4069 }
4070 
4071 static int __init iommu_init_mempool(void)
4072 {
4073         int ret;
4074         ret = iova_cache_get();
4075         if (ret)
4076                 return ret;
4077 
4078         ret = iommu_domain_cache_init();
4079         if (ret)
4080                 goto domain_error;
4081 
4082         ret = iommu_devinfo_cache_init();
4083         if (!ret)
4084                 return ret;
4085 
4086         kmem_cache_destroy(iommu_domain_cache);
4087 domain_error:
4088         iova_cache_put();
4089 
4090         return -ENOMEM;
4091 }
4092 
4093 static void __init iommu_exit_mempool(void)
4094 {
4095         kmem_cache_destroy(iommu_devinfo_cache);
4096         kmem_cache_destroy(iommu_domain_cache);
4097         iova_cache_put();
4098 }
4099 
4100 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
4101 {
4102         struct dmar_drhd_unit *drhd;
4103         u32 vtbar;
4104         int rc;
4105 
4106         /* We know that this device on this chipset has its own IOMMU.
4107          * If we find it under a different IOMMU, then the BIOS is lying
4108          * to us. Hope that the IOMMU for this device is actually
4109          * disabled, and it needs no translation...
4110          */
4111         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4112         if (rc) {
4113                 /* "can't" happen */
4114                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4115                 return;
4116         }
4117         vtbar &= 0xffff0000;
4118 
4119         /* we know that the this iommu should be at offset 0xa000 from vtbar */
4120         drhd = dmar_find_matched_drhd_unit(pdev);
4121         if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
4122                 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
4123                 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
4124                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4125         }
4126 }
4127 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4128 
4129 static void __init init_no_remapping_devices(void)
4130 {
4131         struct dmar_drhd_unit *drhd;
4132         struct device *dev;
4133         int i;
4134 
4135         for_each_drhd_unit(drhd) {
4136                 if (!drhd->include_all) {
4137                         for_each_active_dev_scope(drhd->devices,
4138                                                   drhd->devices_cnt, i, dev)
4139                                 break;
4140                         /* ignore DMAR unit if no devices exist */
4141                         if (i == drhd->devices_cnt)
4142                                 drhd->ignored = 1;
4143                 }
4144         }
4145 
4146         for_each_active_drhd_unit(drhd) {
4147                 if (drhd->include_all)
4148                         continue;
4149 
4150                 for_each_active_dev_scope(drhd->devices,
4151                                           drhd->devices_cnt, i, dev)
4152                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
4153                                 break;
4154                 if (i < drhd->devices_cnt)
4155                         continue;
4156 
4157                 /* This IOMMU has *only* gfx devices. Either bypass it or
4158                    set the gfx_mapped flag, as appropriate */
4159                 if (!dmar_map_gfx) {
4160                         drhd->ignored = 1;
4161                         for_each_active_dev_scope(drhd->devices,
4162                                                   drhd->devices_cnt, i, dev)
4163                                 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4164                 }
4165         }
4166 }
4167 
4168 #ifdef CONFIG_SUSPEND
4169 static int init_iommu_hw(void)
4170 {
4171         struct dmar_drhd_unit *drhd;
4172         struct intel_iommu *iommu = NULL;
4173 
4174         for_each_active_iommu(iommu, drhd)
4175                 if (iommu->qi)
4176                         dmar_reenable_qi(iommu);
4177 
4178         for_each_iommu(iommu, drhd) {
4179                 if (drhd->ignored) {
4180                         /*
4181                          * we always have to disable PMRs or DMA may fail on
4182                          * this device
4183                          */
4184                         if (force_on)
4185                                 iommu_disable_protect_mem_regions(iommu);
4186                         continue;
4187                 }
4188 
4189                 iommu_flush_write_buffer(iommu);
4190 
4191                 iommu_set_root_entry(iommu);
4192 
4193                 iommu->flush.flush_context(iommu, 0, 0, 0,
4194                                            DMA_CCMD_GLOBAL_INVL);
4195                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4196                 iommu_enable_translation(iommu);
4197                 iommu_disable_protect_mem_regions(iommu);
4198         }
4199 
4200         return 0;
4201 }
4202 
4203 static void iommu_flush_all(void)
4204 {
4205         struct dmar_drhd_unit *drhd;
4206         struct intel_iommu *iommu;
4207 
4208         for_each_active_iommu(iommu, drhd) {
4209                 iommu->flush.flush_context(iommu, 0, 0, 0,
4210                                            DMA_CCMD_GLOBAL_INVL);
4211                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
4212                                          DMA_TLB_GLOBAL_FLUSH);
4213         }
4214 }
4215 
4216 static int iommu_suspend(void)
4217 {
4218         struct dmar_drhd_unit *drhd;
4219         struct intel_iommu *iommu = NULL;
4220         unsigned long flag;
4221 
4222         for_each_active_iommu(iommu, drhd) {
4223                 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
4224                                                  GFP_ATOMIC);
4225                 if (!iommu->iommu_state)
4226                         goto nomem;
4227         }
4228 
4229         iommu_flush_all();
4230 
4231         for_each_active_iommu(iommu, drhd) {
4232                 iommu_disable_translation(iommu);
4233 
4234                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4235 
4236                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4237                         readl(iommu->reg + DMAR_FECTL_REG);
4238                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4239                         readl(iommu->reg + DMAR_FEDATA_REG);
4240                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4241                         readl(iommu->reg + DMAR_FEADDR_REG);
4242                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4243                         readl(iommu->reg + DMAR_FEUADDR_REG);
4244 
4245                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4246         }
4247         return 0;
4248 
4249 nomem:
4250         for_each_active_iommu(iommu, drhd)
4251                 kfree(iommu->iommu_state);
4252 
4253         return -ENOMEM;
4254 }
4255 
4256 static void iommu_resume(void)
4257 {
4258         struct dmar_drhd_unit *drhd;
4259         struct intel_iommu *iommu = NULL;
4260         unsigned long flag;
4261 
4262         if (init_iommu_hw()) {
4263                 if (force_on)
4264                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4265                 else
4266                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4267                 return;
4268         }
4269 
4270         for_each_active_iommu(iommu, drhd) {
4271 
4272                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4273 
4274                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4275                         iommu->reg + DMAR_FECTL_REG);
4276                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4277                         iommu->reg + DMAR_FEDATA_REG);
4278                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4279                         iommu->reg + DMAR_FEADDR_REG);
4280                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4281                         iommu->reg + DMAR_FEUADDR_REG);
4282 
4283                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4284         }
4285 
4286         for_each_active_iommu(iommu, drhd)
4287                 kfree(iommu->iommu_state);
4288 }
4289 
4290 static struct syscore_ops iommu_syscore_ops = {
4291         .resume         = iommu_resume,
4292         .suspend        = iommu_suspend,
4293 };
4294 
4295 static void __init init_iommu_pm_ops(void)
4296 {
4297         register_syscore_ops(&iommu_syscore_ops);
4298 }
4299 
4300 #else
4301 static inline void init_iommu_pm_ops(void) {}
4302 #endif  /* CONFIG_PM */
4303 
4304 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4305 {
4306         struct acpi_dmar_reserved_memory *rmrr;
4307         struct dmar_rmrr_unit *rmrru;
4308 
4309         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4310         if (!rmrru)
4311                 goto out;
4312 
4313         rmrru->hdr = header;
4314         rmrr = (struct acpi_dmar_reserved_memory *)header;
4315         rmrru->base_address = rmrr->base_address;
4316         rmrru->end_address = rmrr->end_address;
4317 
4318         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4319                                 ((void *)rmrr) + rmrr->header.length,
4320                                 &rmrru->devices_cnt);
4321         if (rmrru->devices_cnt && rmrru->devices == NULL)
4322                 goto free_rmrru;
4323 
4324         list_add(&rmrru->list, &dmar_rmrr_units);
4325 
4326         return 0;
4327 free_rmrru:
4328         kfree(rmrru);
4329 out:
4330         return -ENOMEM;
4331 }
4332 
4333 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4334 {
4335         struct dmar_atsr_unit *atsru;
4336         struct acpi_dmar_atsr *tmp;
4337 
4338         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
4339                                 dmar_rcu_check()) {
4340                 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4341                 if (atsr->segment != tmp->segment)
4342                         continue;
4343                 if (atsr->header.length != tmp->header.length)
4344                         continue;
4345                 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4346                         return atsru;
4347         }
4348 
4349         return NULL;
4350 }
4351 
4352 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4353 {
4354         struct acpi_dmar_atsr *atsr;
4355         struct dmar_atsr_unit *atsru;
4356 
4357         if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
4358                 return 0;
4359 
4360         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4361         atsru = dmar_find_atsr(atsr);
4362         if (atsru)
4363                 return 0;
4364 
4365         atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4366         if (!atsru)
4367                 return -ENOMEM;
4368 
4369         /*
4370          * If memory is allocated from slab by ACPI _DSM method, we need to
4371          * copy the memory content because the memory buffer will be freed
4372          * on return.
4373          */
4374         atsru->hdr = (void *)(atsru + 1);
4375         memcpy(atsru->hdr, hdr, hdr->length);
4376         atsru->include_all = atsr->flags & 0x1;
4377         if (!atsru->include_all) {
4378                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4379                                 (void *)atsr + atsr->header.length,
4380                                 &atsru->devices_cnt);
4381                 if (atsru->devices_cnt && atsru->devices == NULL) {
4382                         kfree(atsru);
4383                         return -ENOMEM;
4384                 }
4385         }
4386 
4387         list_add_rcu(&atsru->list, &dmar_atsr_units);
4388 
4389         return 0;
4390 }
4391 
4392 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4393 {
4394         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4395         kfree(atsru);
4396 }
4397 
4398 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4399 {
4400         struct acpi_dmar_atsr *atsr;
4401         struct dmar_atsr_unit *atsru;
4402 
4403         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4404         atsru = dmar_find_atsr(atsr);
4405         if (atsru) {
4406                 list_del_rcu(&atsru->list);
4407                 synchronize_rcu();
4408                 intel_iommu_free_atsr(atsru);
4409         }
4410 
4411         return 0;
4412 }
4413 
4414 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4415 {
4416         int i;
4417         struct device *dev;
4418         struct acpi_dmar_atsr *atsr;
4419         struct dmar_atsr_unit *atsru;
4420 
4421         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4422         atsru = dmar_find_atsr(atsr);
4423         if (!atsru)
4424                 return 0;
4425 
4426         if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4427                 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4428                                           i, dev)
4429                         return -EBUSY;
4430         }
4431 
4432         return 0;
4433 }
4434 
4435 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4436 {
4437         int sp, ret;
4438         struct intel_iommu *iommu = dmaru->iommu;
4439 
4440         if (g_iommus[iommu->seq_id])
4441                 return 0;
4442 
4443         if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4444                 pr_warn("%s: Doesn't support hardware pass through.\n",
4445                         iommu->name);
4446                 return -ENXIO;
4447         }
4448         if (!ecap_sc_support(iommu->ecap) &&
4449             domain_update_iommu_snooping(iommu)) {
4450                 pr_warn("%s: Doesn't support snooping.\n",
4451                         iommu->name);
4452                 return -ENXIO;
4453         }
4454         sp = domain_update_iommu_superpage(iommu) - 1;
4455         if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4456                 pr_warn("%s: Doesn't support large page.\n",
4457                         iommu->name);
4458                 return -ENXIO;
4459         }
4460 
4461         /*
4462          * Disable translation if already enabled prior to OS handover.
4463          */
4464         if (iommu->gcmd & DMA_GCMD_TE)
4465                 iommu_disable_translation(iommu);
4466 
4467         g_iommus[iommu->seq_id] = iommu;
4468         ret = iommu_init_domains(iommu);
4469         if (ret == 0)
4470                 ret = iommu_alloc_root_entry(iommu);
4471         if (ret)
4472                 goto out;
4473 
4474 #ifdef CONFIG_INTEL_IOMMU_SVM
4475         if (pasid_supported(iommu))
4476                 intel_svm_init(iommu);
4477 #endif
4478 
4479         if (dmaru->ignored) {
4480                 /*
4481                  * we always have to disable PMRs or DMA may fail on this device
4482                  */
4483                 if (force_on)
4484                         iommu_disable_protect_mem_regions(iommu);
4485                 return 0;
4486         }
4487 
4488         intel_iommu_init_qi(iommu);
4489         iommu_flush_write_buffer(iommu);
4490 
4491 #ifdef CONFIG_INTEL_IOMMU_SVM
4492         if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
4493                 ret = intel_svm_enable_prq(iommu);
4494                 if (ret)
4495                         goto disable_iommu;
4496         }
4497 #endif
4498         ret = dmar_set_interrupt(iommu);
4499         if (ret)
4500                 goto disable_iommu;
4501 
4502         iommu_set_root_entry(iommu);
4503         iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4504         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4505         iommu_enable_translation(iommu);
4506 
4507         iommu_disable_protect_mem_regions(iommu);
4508         return 0;
4509 
4510 disable_iommu:
4511         disable_dmar_iommu(iommu);
4512 out:
4513         free_dmar_iommu(iommu);
4514         return ret;
4515 }
4516 
4517 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4518 {
4519         int ret = 0;
4520         struct intel_iommu *iommu = dmaru->iommu;
4521 
4522         if (!intel_iommu_enabled)
4523                 return 0;
4524         if (iommu == NULL)
4525                 return -EINVAL;
4526 
4527         if (insert) {
4528                 ret = intel_iommu_add(dmaru);
4529         } else {
4530                 disable_dmar_iommu(iommu);
4531                 free_dmar_iommu(iommu);
4532         }
4533 
4534         return ret;
4535 }
4536 
4537 static void intel_iommu_free_dmars(void)
4538 {
4539         struct dmar_rmrr_unit *rmrru, *rmrr_n;
4540         struct dmar_atsr_unit *atsru, *atsr_n;
4541 
4542         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4543                 list_del(&rmrru->list);
4544                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4545                 kfree(rmrru);
4546         }
4547 
4548         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4549                 list_del(&atsru->list);
4550                 intel_iommu_free_atsr(atsru);
4551         }
4552 }
4553 
4554 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4555 {
4556         int i, ret = 1;
4557         struct pci_bus *bus;
4558         struct pci_dev *bridge = NULL;
4559         struct device *tmp;
4560         struct acpi_dmar_atsr *atsr;
4561         struct dmar_atsr_unit *atsru;
4562 
4563         dev = pci_physfn(dev);
4564         for (bus = dev->bus; bus; bus = bus->parent) {
4565                 bridge = bus->self;
4566                 /* If it's an integrated device, allow ATS */
4567                 if (!bridge)
4568                         return 1;
4569                 /* Connected via non-PCIe: no ATS */
4570                 if (!pci_is_pcie(bridge) ||
4571                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4572                         return 0;
4573                 /* If we found the root port, look it up in the ATSR */
4574                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4575                         break;
4576         }
4577 
4578         rcu_read_lock();
4579         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4580                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4581                 if (atsr->segment != pci_domain_nr(dev->bus))
4582                         continue;
4583 
4584                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4585                         if (tmp == &bridge->dev)
4586                                 goto out;
4587 
4588                 if (atsru->include_all)
4589                         goto out;
4590         }
4591         ret = 0;
4592 out:
4593         rcu_read_unlock();
4594 
4595         return ret;
4596 }
4597 
4598 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4599 {
4600         int ret;
4601         struct dmar_rmrr_unit *rmrru;
4602         struct dmar_atsr_unit *atsru;
4603         struct acpi_dmar_atsr *atsr;
4604         struct acpi_dmar_reserved_memory *rmrr;
4605 
4606         if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4607                 return 0;
4608 
4609         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4610                 rmrr = container_of(rmrru->hdr,
4611                                     struct acpi_dmar_reserved_memory, header);
4612                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4613                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4614                                 ((void *)rmrr) + rmrr->header.length,
4615                                 rmrr->segment, rmrru->devices,
4616                                 rmrru->devices_cnt);
4617                         if (ret < 0)
4618                                 return ret;
4619                 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4620                         dmar_remove_dev_scope(info, rmrr->segment,
4621                                 rmrru->devices, rmrru->devices_cnt);
4622                 }
4623         }
4624 
4625         list_for_each_entry(atsru, &dmar_atsr_units, list) {
4626                 if (atsru->include_all)
4627                         continue;
4628 
4629                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4630                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4631                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4632                                         (void *)atsr + atsr->header.length,
4633                                         atsr->segment, atsru->devices,
4634                                         atsru->devices_cnt);
4635                         if (ret > 0)
4636                                 break;
4637                         else if (ret < 0)
4638                                 return ret;
4639                 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4640                         if (dmar_remove_dev_scope(info, atsr->segment,
4641                                         atsru->devices, atsru->devices_cnt))
4642                                 break;
4643                 }
4644         }
4645 
4646         return 0;
4647 }
4648 
4649 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4650                                        unsigned long val, void *v)
4651 {
4652         struct memory_notify *mhp = v;
4653         unsigned long long start, end;
4654         unsigned long start_vpfn, last_vpfn;
4655 
4656         switch (val) {
4657         case MEM_GOING_ONLINE:
4658                 start = mhp->start_pfn << PAGE_SHIFT;
4659                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4660                 if (iommu_domain_identity_map(si_domain, start, end)) {
4661                         pr_warn("Failed to build identity map for [%llx-%llx]\n",
4662                                 start, end);
4663                         return NOTIFY_BAD;
4664                 }
4665                 break;
4666 
4667         case MEM_OFFLINE:
4668         case MEM_CANCEL_ONLINE:
4669                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4670                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4671                 while (start_vpfn <= last_vpfn) {
4672                         struct iova *iova;
4673                         struct dmar_drhd_unit *drhd;
4674                         struct intel_iommu *iommu;
4675                         struct page *freelist;
4676 
4677                         iova = find_iova(&si_domain->iovad, start_vpfn);
4678                         if (iova == NULL) {
4679                                 pr_debug("Failed get IOVA for PFN %lx\n",
4680                                          start_vpfn);
4681                                 break;
4682                         }
4683 
4684                         iova = split_and_remove_iova(&si_domain->iovad, iova,
4685                                                      start_vpfn, last_vpfn);
4686                         if (iova == NULL) {
4687                                 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4688                                         start_vpfn, last_vpfn);
4689                                 return NOTIFY_BAD;
4690                         }
4691 
4692                         freelist = domain_unmap(si_domain, iova->pfn_lo,
4693                                                iova->pfn_hi);
4694 
4695                         rcu_read_lock();
4696                         for_each_active_iommu(iommu, drhd)
4697                                 iommu_flush_iotlb_psi(iommu, si_domain,
4698                                         iova->pfn_lo, iova_size(iova),
4699                                         !freelist, 0);
4700                         rcu_read_unlock();
4701                         dma_free_pagelist(freelist);
4702 
4703                         start_vpfn = iova->pfn_hi + 1;
4704                         free_iova_mem(iova);
4705                 }
4706                 break;
4707         }
4708 
4709         return NOTIFY_OK;
4710 }
4711 
4712 static struct notifier_block intel_iommu_memory_nb = {
4713         .notifier_call = intel_iommu_memory_notifier,
4714         .priority = 0
4715 };
4716 
4717 static void free_all_cpu_cached_iovas(unsigned int cpu)
4718 {
4719         int i;
4720 
4721         for (i = 0; i < g_num_of_iommus; i++) {
4722                 struct intel_iommu *iommu = g_iommus[i];
4723                 struct dmar_domain *domain;
4724                 int did;
4725 
4726                 if (!iommu)
4727                         continue;
4728 
4729                 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
4730                         domain = get_iommu_domain(iommu, (u16)did);
4731 
4732                         if (!domain)
4733                                 continue;
4734                         free_cpu_cached_iovas(cpu, &domain->iovad);
4735                 }
4736         }
4737 }
4738 
4739 static int intel_iommu_cpu_dead(unsigned int cpu)
4740 {
4741         free_all_cpu_cached_iovas(cpu);
4742         return 0;
4743 }
4744 
4745 static void intel_disable_iommus(void)
4746 {
4747         struct intel_iommu *iommu = NULL;
4748         struct dmar_drhd_unit *drhd;
4749 
4750         for_each_iommu(iommu, drhd)
4751                 iommu_disable_translation(iommu);
4752 }
4753 
4754 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4755 {
4756         struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4757 
4758         return container_of(iommu_dev, struct intel_iommu, iommu);
4759 }
4760 
4761 static ssize_t intel_iommu_show_version(struct device *dev,
4762                                         struct device_attribute *attr,
4763                                         char *buf)
4764 {
4765         struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4766         u32 ver = readl(iommu->reg + DMAR_VER_REG);
4767         return sprintf(buf, "%d:%d\n",
4768                        DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4769 }
4770 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4771 
4772 static ssize_t intel_iommu_show_address(struct device *dev,
4773                                         struct device_attribute *attr,
4774                                         char *buf)
4775 {
4776         struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4777         return sprintf(buf, "%llx\n", iommu->reg_phys);
4778 }
4779 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4780 
4781 static ssize_t intel_iommu_show_cap(struct device *dev,
4782                                     struct device_attribute *attr,
4783                                     char *buf)
4784 {
4785         struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4786         return sprintf(buf, "%llx\n", iommu->cap);
4787 }
4788 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4789 
4790 static ssize_t intel_iommu_show_ecap(struct device *dev,
4791                                     struct device_attribute *attr,
4792                                     char *buf)
4793 {
4794         struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4795         return sprintf(buf, "%llx\n", iommu->ecap);
4796 }
4797 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4798 
4799 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4800                                       struct device_attribute *attr,
4801                                       char *buf)
4802 {
4803         struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4804         return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4805 }
4806 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4807 
4808 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4809                                            struct device_attribute *attr,
4810                                            char *buf)
4811 {
4812         struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4813         return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4814                                                   cap_ndoms(iommu->cap)));
4815 }
4816 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4817 
4818 static struct attribute *intel_iommu_attrs[] = {
4819         &dev_attr_version.attr,
4820         &dev_attr_address.attr,
4821         &dev_attr_cap.attr,
4822         &dev_attr_ecap.attr,
4823         &dev_attr_domains_supported.attr,
4824         &dev_attr_domains_used.attr,
4825         NULL,
4826 };
4827 
4828 static struct attribute_group intel_iommu_group = {
4829         .name = "intel-iommu",
4830         .attrs = intel_iommu_attrs,
4831 };
4832 
4833 const struct attribute_group *intel_iommu_groups[] = {
4834         &intel_iommu_group,
4835         NULL,
4836 };
4837 
4838 static inline bool has_untrusted_dev(void)
4839 {
4840         struct pci_dev *pdev = NULL;
4841 
4842         for_each_pci_dev(pdev)
4843                 if (pdev->untrusted)
4844                         return true;
4845 
4846         return false;
4847 }
4848 
4849 static int __init platform_optin_force_iommu(void)
4850 {
4851         if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev())
4852                 return 0;
4853 
4854         if (no_iommu || dmar_disabled)
4855                 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4856 
4857         /*
4858          * If Intel-IOMMU is disabled by default, we will apply identity
4859          * map for all devices except those marked as being untrusted.
4860          */
4861         if (dmar_disabled)
4862                 iommu_identity_mapping |= IDENTMAP_ALL;
4863 
4864         dmar_disabled = 0;
4865         no_iommu = 0;
4866 
4867         return 1;
4868 }
4869 
4870 static int __init probe_acpi_namespace_devices(void)
4871 {
4872         struct dmar_drhd_unit *drhd;
4873         /* To avoid a -Wunused-but-set-variable warning. */
4874         struct intel_iommu *iommu __maybe_unused;
4875         struct device *dev;
4876         int i, ret = 0;
4877 
4878         for_each_active_iommu(iommu, drhd) {
4879                 for_each_active_dev_scope(drhd->devices,
4880                                           drhd->devices_cnt, i, dev) {
4881                         struct acpi_device_physical_node *pn;
4882                         struct iommu_group *group;
4883                         struct acpi_device *adev;
4884 
4885                         if (dev->bus != &acpi_bus_type)
4886                                 continue;
4887 
4888                         adev = to_acpi_device(dev);
4889                         mutex_lock(&adev->physical_node_lock);
4890                         list_for_each_entry(pn,
4891                                             &adev->physical_node_list, node) {
4892                                 group = iommu_group_get(pn->dev);
4893                                 if (group) {
4894                                         iommu_group_put(group);
4895                                         continue;
4896                                 }
4897 
4898                                 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4899                                 ret = iommu_probe_device(pn->dev);
4900                                 if (ret)
4901                                         break;
4902                         }
4903                         mutex_unlock(&adev->physical_node_lock);
4904 
4905                         if (ret)
4906                                 return ret;
4907                 }
4908         }
4909 
4910         return 0;
4911 }
4912 
4913 int __init intel_iommu_init(void)
4914 {
4915         int ret = -ENODEV;
4916         struct dmar_drhd_unit *drhd;
4917         struct intel_iommu *iommu;
4918 
4919         /*
4920          * Intel IOMMU is required for a TXT/tboot launch or platform
4921          * opt in, so enforce that.
4922          */
4923         force_on = tboot_force_iommu() || platform_optin_force_iommu();
4924 
4925         if (iommu_init_mempool()) {
4926                 if (force_on)
4927                         panic("tboot: Failed to initialize iommu memory\n");
4928                 return -ENOMEM;
4929         }
4930 
4931         down_write(&dmar_global_lock);
4932         if (dmar_table_init()) {
4933                 if (force_on)
4934                         panic("tboot: Failed to initialize DMAR table\n");
4935                 goto out_free_dmar;
4936         }
4937 
4938         if (dmar_dev_scope_init() < 0) {
4939                 if (force_on)
4940                         panic("tboot: Failed to initialize DMAR device scope\n");
4941                 goto out_free_dmar;
4942         }
4943 
4944         up_write(&dmar_global_lock);
4945 
4946         /*
4947          * The bus notifier takes the dmar_global_lock, so lockdep will
4948          * complain later when we register it under the lock.
4949          */
4950         dmar_register_bus_notifier();
4951 
4952         down_write(&dmar_global_lock);
4953 
4954         if (!no_iommu)
4955                 intel_iommu_debugfs_init();
4956 
4957         if (no_iommu || dmar_disabled) {
4958                 /*
4959                  * We exit the function here to ensure IOMMU's remapping and
4960                  * mempool aren't setup, which means that the IOMMU's PMRs
4961                  * won't be disabled via the call to init_dmars(). So disable
4962                  * it explicitly here. The PMRs were setup by tboot prior to
4963                  * calling SENTER, but the kernel is expected to reset/tear
4964                  * down the PMRs.
4965                  */
4966                 if (intel_iommu_tboot_noforce) {
4967                         for_each_iommu(iommu, drhd)
4968                                 iommu_disable_protect_mem_regions(iommu);
4969                 }
4970 
4971                 /*
4972                  * Make sure the IOMMUs are switched off, even when we
4973                  * boot into a kexec kernel and the previous kernel left
4974                  * them enabled
4975                  */
4976                 intel_disable_iommus();
4977                 goto out_free_dmar;
4978         }
4979 
4980         if (list_empty(&dmar_rmrr_units))
4981                 pr_info("No RMRR found\n");
4982 
4983         if (list_empty(&dmar_atsr_units))
4984                 pr_info("No ATSR found\n");
4985 
4986         if (dmar_init_reserved_ranges()) {
4987                 if (force_on)
4988                         panic("tboot: Failed to reserve iommu ranges\n");
4989                 goto out_free_reserved_range;
4990         }
4991 
4992         if (dmar_map_gfx)
4993                 intel_iommu_gfx_mapped = 1;
4994 
4995         init_no_remapping_devices();
4996 
4997         ret = init_dmars();
4998         if (ret) {
4999                 if (force_on)
5000                         panic("tboot: Failed to initialize DMARs\n");
5001                 pr_err("Initialization failed\n");
5002                 goto out_free_reserved_range;
5003         }
5004         up_write(&dmar_global_lock);
5005 
5006 #if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
5007         /*
5008          * If the system has no untrusted device or the user has decided
5009          * to disable the bounce page mechanisms, we don't need swiotlb.
5010          * Mark this and the pre-allocated bounce pages will be released
5011          * later.
5012          */
5013         if (!has_untrusted_dev() || intel_no_bounce)
5014                 swiotlb = 0;
5015 #endif
5016         dma_ops = &intel_dma_ops;
5017 
5018         init_iommu_pm_ops();
5019 
5020         down_read(&dmar_global_lock);
5021         for_each_active_iommu(iommu, drhd) {
5022                 iommu_device_sysfs_add(&iommu->iommu, NULL,
5023                                        intel_iommu_groups,
5024                                        "%s", iommu->name);
5025                 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
5026                 iommu_device_register(&iommu->iommu);
5027         }
5028         up_read(&dmar_global_lock);
5029 
5030         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
5031         if (si_domain && !hw_pass_through)
5032                 register_memory_notifier(&intel_iommu_memory_nb);
5033         cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
5034                           intel_iommu_cpu_dead);
5035 
5036         down_read(&dmar_global_lock);
5037         if (probe_acpi_namespace_devices())
5038                 pr_warn("ACPI name space devices didn't probe correctly\n");
5039 
5040         /* Finally, we enable the DMA remapping hardware. */
5041         for_each_iommu(iommu, drhd) {
5042                 if (!drhd->ignored && !translation_pre_enabled(iommu))
5043                         iommu_enable_translation(iommu);
5044 
5045                 iommu_disable_protect_mem_regions(iommu);
5046         }
5047         up_read(&dmar_global_lock);
5048 
5049         pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
5050 
5051         intel_iommu_enabled = 1;
5052 
5053         return 0;
5054 
5055 out_free_reserved_range:
5056         put_iova_domain(&reserved_iova_list);
5057 out_free_dmar:
5058         intel_iommu_free_dmars();
5059         up_write(&dmar_global_lock);
5060         iommu_exit_mempool();
5061         return ret;
5062 }
5063 
5064 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
5065 {
5066         struct intel_iommu *iommu = opaque;
5067 
5068         domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
5069         return 0;
5070 }
5071 
5072 /*
5073  * NB - intel-iommu lacks any sort of reference counting for the users of
5074  * dependent devices.  If multiple endpoints have intersecting dependent
5075  * devices, unbinding the driver from any one of them will possibly leave
5076  * the others unable to operate.
5077  */
5078 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
5079 {
5080         if (!iommu || !dev || !dev_is_pci(dev))
5081                 return;
5082 
5083         pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
5084 }
5085 
5086 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
5087 {
5088         struct dmar_domain *domain;
5089         struct intel_iommu *iommu;
5090         unsigned long flags;
5091 
5092         assert_spin_locked(&device_domain_lock);
5093 
5094         if (WARN_ON(!info))
5095                 return;
5096 
5097         iommu = info->iommu;
5098         domain = info->domain;
5099 
5100         if (info->dev) {
5101                 if (dev_is_pci(info->dev) && sm_supported(iommu))
5102                         intel_pasid_tear_down_entry(iommu, info->dev,
5103                                         PASID_RID2PASID);
5104 
5105                 iommu_disable_dev_iotlb(info);
5106                 domain_context_clear(iommu, info->dev);
5107                 intel_pasid_free_table(info->dev);
5108         }
5109 
5110         unlink_domain_info(info);
5111 
5112         spin_lock_irqsave(&iommu->lock, flags);
5113         domain_detach_iommu(domain, iommu);
5114         spin_unlock_irqrestore(&iommu->lock, flags);
5115 
5116         /* free the private domain */
5117         if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
5118             !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
5119             list_empty(&domain->devices))
5120                 domain_exit(info->domain);
5121 
5122         free_devinfo_mem(info);
5123 }
5124 
5125 static void dmar_remove_one_dev_info(struct device *dev)
5126 {
5127         struct device_domain_info *info;
5128         unsigned long flags;
5129 
5130         spin_lock_irqsave(&device_domain_lock, flags);
5131         info = dev->archdata.iommu;
5132         if (info && info != DEFER_DEVICE_DOMAIN_INFO
5133             && info != DUMMY_DEVICE_DOMAIN_INFO)
5134                 __dmar_remove_one_dev_info(info);
5135         spin_unlock_irqrestore(&device_domain_lock, flags);
5136 }
5137 
5138 static int md_domain_init(struct dmar_domain *domain, int guest_width)
5139 {
5140         int adjust_width;
5141 
5142         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5143         domain_reserve_special_ranges(domain);
5144 
5145         /* calculate AGAW */
5146         domain->gaw = guest_width;
5147         adjust_width = guestwidth_to_adjustwidth(guest_width);
5148         domain->agaw = width_to_agaw(adjust_width);
5149 
5150         domain->iommu_coherency = 0;
5151         domain->iommu_snooping = 0;
5152         domain->iommu_superpage = 0;
5153         domain->max_addr = 0;
5154 
5155         /* always allocate the top pgd */
5156         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5157         if (!domain->pgd)
5158                 return -ENOMEM;
5159         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5160         return 0;
5161 }
5162 
5163 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
5164 {
5165         struct dmar_domain *dmar_domain;
5166         struct iommu_domain *domain;
5167 
5168         switch (type) {
5169         case IOMMU_DOMAIN_DMA:
5170         /* fallthrough */
5171         case IOMMU_DOMAIN_UNMANAGED:
5172                 dmar_domain = alloc_domain(0);
5173                 if (!dmar_domain) {
5174                         pr_err("Can't allocate dmar_domain\n");
5175                         return NULL;
5176                 }
5177                 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
5178                         pr_err("Domain initialization failed\n");
5179                         domain_exit(dmar_domain);
5180                         return NULL;
5181                 }
5182 
5183                 if (type == IOMMU_DOMAIN_DMA &&
5184                     init_iova_flush_queue(&dmar_domain->iovad,
5185                                           iommu_flush_iova, iova_entry_free)) {
5186                         pr_warn("iova flush queue initialization failed\n");
5187                         intel_iommu_strict = 1;
5188                 }
5189 
5190                 domain_update_iommu_cap(dmar_domain);
5191 
5192                 domain = &dmar_domain->domain;
5193                 domain->geometry.aperture_start = 0;
5194                 domain->geometry.aperture_end   =
5195                                 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5196                 domain->geometry.force_aperture = true;
5197 
5198                 return domain;
5199         case IOMMU_DOMAIN_IDENTITY:
5200                 return &si_domain->domain;
5201         default:
5202                 return NULL;
5203         }
5204 
5205         return NULL;
5206 }
5207 
5208 static void intel_iommu_domain_free(struct iommu_domain *domain)
5209 {
5210         if (domain != &si_domain->domain)
5211                 domain_exit(to_dmar_domain(domain));
5212 }
5213 
5214 /*
5215  * Check whether a @domain could be attached to the @dev through the
5216  * aux-domain attach/detach APIs.
5217  */
5218 static inline bool
5219 is_aux_domain(struct device *dev, struct iommu_domain *domain)
5220 {
5221         struct device_domain_info *info = dev->archdata.iommu;
5222 
5223         return info && info->auxd_enabled &&
5224                         domain->type == IOMMU_DOMAIN_UNMANAGED;
5225 }
5226 
5227 static void auxiliary_link_device(struct dmar_domain *domain,
5228                                   struct device *dev)
5229 {
5230         struct device_domain_info *info = dev->archdata.iommu;
5231 
5232         assert_spin_locked(&device_domain_lock);
5233         if (WARN_ON(!info))
5234                 return;
5235 
5236         domain->auxd_refcnt++;
5237         list_add(&domain->auxd, &info->auxiliary_domains);
5238 }
5239 
5240 static void auxiliary_unlink_device(struct dmar_domain *domain,
5241                                     struct device *dev)
5242 {
5243         struct device_domain_info *info = dev->archdata.iommu;
5244 
5245         assert_spin_locked(&device_domain_lock);
5246         if (WARN_ON(!info))
5247                 return;
5248 
5249         list_del(&domain->auxd);
5250         domain->auxd_refcnt--;
5251 
5252         if (!domain->auxd_refcnt && domain->default_pasid > 0)
5253                 intel_pasid_free_id(domain->default_pasid);
5254 }
5255 
5256 static int aux_domain_add_dev(struct dmar_domain *domain,
5257                               struct device *dev)
5258 {
5259         int ret;
5260         u8 bus, devfn;
5261         unsigned long flags;
5262         struct intel_iommu *iommu;
5263 
5264         iommu = device_to_iommu(dev, &bus, &devfn);
5265         if (!iommu)
5266                 return -ENODEV;
5267 
5268         if (domain->default_pasid <= 0) {
5269                 int pasid;
5270 
5271                 pasid = intel_pasid_alloc_id(domain, PASID_MIN,
5272                                              pci_max_pasids(to_pci_dev(dev)),
5273                                              GFP_KERNEL);
5274                 if (pasid <= 0) {
5275                         pr_err("Can't allocate default pasid\n");
5276                         return -ENODEV;
5277                 }
5278                 domain->default_pasid = pasid;
5279         }
5280 
5281         spin_lock_irqsave(&device_domain_lock, flags);
5282         /*
5283          * iommu->lock must be held to attach domain to iommu and setup the
5284          * pasid entry for second level translation.
5285          */
5286         spin_lock(&iommu->lock);
5287         ret = domain_attach_iommu(domain, iommu);
5288         if (ret)
5289                 goto attach_failed;
5290 
5291         /* Setup the PASID entry for mediated devices: */
5292         ret = intel_pasid_setup_second_level(iommu, domain, dev,
5293                                              domain->default_pasid);
5294         if (ret)
5295                 goto table_failed;
5296         spin_unlock(&iommu->lock);
5297 
5298         auxiliary_link_device(domain, dev);
5299 
5300         spin_unlock_irqrestore(&device_domain_lock, flags);
5301 
5302         return 0;
5303 
5304 table_failed:
5305         domain_detach_iommu(domain, iommu);
5306 attach_failed:
5307         spin_unlock(&iommu->lock);
5308         spin_unlock_irqrestore(&device_domain_lock, flags);
5309         if (!domain->auxd_refcnt && domain->default_pasid > 0)
5310                 intel_pasid_free_id(domain->default_pasid);
5311 
5312         return ret;
5313 }
5314 
5315 static void aux_domain_remove_dev(struct dmar_domain *domain,
5316                                   struct device *dev)
5317 {
5318         struct device_domain_info *info;
5319         struct intel_iommu *iommu;
5320         unsigned long flags;
5321 
5322         if (!is_aux_domain(dev, &domain->domain))
5323                 return;
5324 
5325         spin_lock_irqsave(&device_domain_lock, flags);
5326         info = dev->archdata.iommu;
5327         iommu = info->iommu;
5328 
5329         auxiliary_unlink_device(domain, dev);
5330 
5331         spin_lock(&iommu->lock);
5332         intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5333         domain_detach_iommu(domain, iommu);
5334         spin_unlock(&iommu->lock);
5335 
5336         spin_unlock_irqrestore(&device_domain_lock, flags);
5337 }
5338 
5339 static int prepare_domain_attach_device(struct iommu_domain *domain,
5340                                         struct device *dev)
5341 {
5342         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5343         struct intel_iommu *iommu;
5344         int addr_width;
5345         u8 bus, devfn;
5346 
5347         iommu = device_to_iommu(dev, &bus, &devfn);
5348         if (!iommu)
5349                 return -ENODEV;
5350 
5351         /* check if this iommu agaw is sufficient for max mapped address */
5352         addr_width = agaw_to_width(iommu->agaw);
5353         if (addr_width > cap_mgaw(iommu->cap))
5354                 addr_width = cap_mgaw(iommu->cap);
5355 
5356         if (dmar_domain->max_addr > (1LL << addr_width)) {
5357                 dev_err(dev, "%s: iommu width (%d) is not "
5358                         "sufficient for the mapped address (%llx)\n",
5359                         __func__, addr_width, dmar_domain->max_addr);
5360                 return -EFAULT;
5361         }
5362         dmar_domain->gaw = addr_width;
5363 
5364         /*
5365          * Knock out extra levels of page tables if necessary
5366          */
5367         while (iommu->agaw < dmar_domain->agaw) {
5368                 struct dma_pte *pte;
5369 
5370                 pte = dmar_domain->pgd;
5371                 if (dma_pte_present(pte)) {
5372                         dmar_domain->pgd = (struct dma_pte *)
5373                                 phys_to_virt(dma_pte_addr(pte));
5374                         free_pgtable_page(pte);
5375                 }
5376                 dmar_domain->agaw--;
5377         }
5378 
5379         return 0;
5380 }
5381 
5382 static int intel_iommu_attach_device(struct iommu_domain *domain,
5383                                      struct device *dev)
5384 {
5385         int ret;
5386 
5387         if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
5388             device_is_rmrr_locked(dev)) {
5389                 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
5390                 return -EPERM;
5391         }
5392 
5393         if (is_aux_domain(dev, domain))
5394                 return -EPERM;
5395 
5396         /* normally dev is not mapped */
5397         if (unlikely(domain_context_mapped(dev))) {
5398                 struct dmar_domain *old_domain;
5399 
5400                 old_domain = find_domain(dev);
5401                 if (old_domain)
5402                         dmar_remove_one_dev_info(dev);
5403         }
5404 
5405         ret = prepare_domain_attach_device(domain, dev);
5406         if (ret)
5407                 return ret;
5408 
5409         return domain_add_dev_info(to_dmar_domain(domain), dev);
5410 }
5411 
5412 static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5413                                          struct device *dev)
5414 {
5415         int ret;
5416 
5417         if (!is_aux_domain(dev, domain))
5418                 return -EPERM;
5419 
5420         ret = prepare_domain_attach_device(domain, dev);
5421         if (ret)
5422                 return ret;
5423 
5424         return aux_domain_add_dev(to_dmar_domain(domain), dev);
5425 }
5426 
5427 static void intel_iommu_detach_device(struct iommu_domain *domain,
5428                                       struct device *dev)
5429 {
5430         dmar_remove_one_dev_info(dev);
5431 }
5432 
5433 static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5434                                           struct device *dev)
5435 {
5436         aux_domain_remove_dev(to_dmar_domain(domain), dev);
5437 }
5438 
5439 static int intel_iommu_map(struct iommu_domain *domain,
5440                            unsigned long iova, phys_addr_t hpa,
5441                            size_t size, int iommu_prot)
5442 {
5443         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5444         u64 max_addr;
5445         int prot = 0;
5446         int ret;
5447 
5448         if (iommu_prot & IOMMU_READ)
5449                 prot |= DMA_PTE_READ;
5450         if (iommu_prot & IOMMU_WRITE)
5451                 prot |= DMA_PTE_WRITE;
5452         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5453                 prot |= DMA_PTE_SNP;
5454 
5455         max_addr = iova + size;
5456         if (dmar_domain->max_addr < max_addr) {
5457                 u64 end;
5458 
5459                 /* check if minimum agaw is sufficient for mapped address */
5460                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5461                 if (end < max_addr) {
5462                         pr_err("%s: iommu width (%d) is not "
5463                                "sufficient for the mapped address (%llx)\n",
5464                                __func__, dmar_domain->gaw, max_addr);
5465                         return -EFAULT;
5466                 }
5467                 dmar_domain->max_addr = max_addr;
5468         }
5469         /* Round up size to next multiple of PAGE_SIZE, if it and
5470            the low bits of hpa would take us onto the next page */
5471         size = aligned_nrpages(hpa, size);
5472         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5473                                  hpa >> VTD_PAGE_SHIFT, size, prot);
5474         return ret;
5475 }
5476 
5477 static size_t intel_iommu_unmap(struct iommu_domain *domain,
5478                                 unsigned long iova, size_t size,
5479                                 struct iommu_iotlb_gather *gather)
5480 {
5481         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5482         struct page *freelist = NULL;
5483         unsigned long start_pfn, last_pfn;
5484         unsigned int npages;
5485         int iommu_id, level = 0;
5486 
5487         /* Cope with horrid API which requires us to unmap more than the
5488            size argument if it happens to be a large-page mapping. */
5489         BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5490 
5491         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5492                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5493 
5494         start_pfn = iova >> VTD_PAGE_SHIFT;
5495         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5496 
5497         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5498 
5499         npages = last_pfn - start_pfn + 1;
5500 
5501         for_each_domain_iommu(iommu_id, dmar_domain)
5502                 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5503                                       start_pfn, npages, !freelist, 0);
5504 
5505         dma_free_pagelist(freelist);
5506 
5507         if (dmar_domain->max_addr == iova + size)
5508                 dmar_domain->max_addr = iova;
5509 
5510         return size;
5511 }
5512 
5513 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5514                                             dma_addr_t iova)
5515 {
5516         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5517         struct dma_pte *pte;
5518         int level = 0;
5519         u64 phys = 0;
5520 
5521         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5522         if (pte && dma_pte_present(pte))
5523                 phys = dma_pte_addr(pte) +
5524                         (iova & (BIT_MASK(level_to_offset_bits(level) +
5525                                                 VTD_PAGE_SHIFT) - 1));
5526 
5527         return phys;
5528 }
5529 
5530 static inline bool scalable_mode_support(void)
5531 {
5532         struct dmar_drhd_unit *drhd;
5533         struct intel_iommu *iommu;
5534         bool ret = true;
5535 
5536         rcu_read_lock();
5537         for_each_active_iommu(iommu, drhd) {
5538                 if (!sm_supported(iommu)) {
5539                         ret = false;
5540                         break;
5541                 }
5542         }
5543         rcu_read_unlock();
5544 
5545         return ret;
5546 }
5547 
5548 static inline bool iommu_pasid_support(void)
5549 {
5550         struct dmar_drhd_unit *drhd;
5551         struct intel_iommu *iommu;
5552         bool ret = true;
5553 
5554         rcu_read_lock();
5555         for_each_active_iommu(iommu, drhd) {
5556                 if (!pasid_supported(iommu)) {
5557                         ret = false;
5558                         break;
5559                 }
5560         }
5561         rcu_read_unlock();
5562 
5563         return ret;
5564 }
5565 
5566 static bool intel_iommu_capable(enum iommu_cap cap)
5567 {
5568         if (cap == IOMMU_CAP_CACHE_COHERENCY)
5569                 return domain_update_iommu_snooping(NULL) == 1;
5570         if (cap == IOMMU_CAP_INTR_REMAP)
5571                 return irq_remapping_enabled == 1;
5572 
5573         return false;
5574 }
5575 
5576 static int intel_iommu_add_device(struct device *dev)
5577 {
5578         struct dmar_domain *dmar_domain;
5579         struct iommu_domain *domain;
5580         struct intel_iommu *iommu;
5581         struct iommu_group *group;
5582         u8 bus, devfn;
5583         int ret;
5584 
5585         iommu = device_to_iommu(dev, &bus, &devfn);
5586         if (!iommu)
5587                 return -ENODEV;
5588 
5589         iommu_device_link(&iommu->iommu, dev);
5590 
5591         if (translation_pre_enabled(iommu))
5592                 dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
5593 
5594         group = iommu_group_get_for_dev(dev);
5595 
5596         if (IS_ERR(group)) {
5597                 ret = PTR_ERR(group);
5598                 goto unlink;
5599         }
5600 
5601         iommu_group_put(group);
5602 
5603         domain = iommu_get_domain_for_dev(dev);
5604         dmar_domain = to_dmar_domain(domain);
5605         if (domain->type == IOMMU_DOMAIN_DMA) {
5606                 if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
5607                         ret = iommu_request_dm_for_dev(dev);
5608                         if (ret) {
5609                                 dmar_remove_one_dev_info(dev);
5610                                 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5611                                 domain_add_dev_info(si_domain, dev);
5612                                 dev_info(dev,
5613                                          "Device uses a private identity domain.\n");
5614                         }
5615                 }
5616         } else {
5617                 if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
5618                         ret = iommu_request_dma_domain_for_dev(dev);
5619                         if (ret) {
5620                                 dmar_remove_one_dev_info(dev);
5621                                 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5622                                 if (!get_private_domain_for_dev(dev)) {
5623                                         dev_warn(dev,
5624                                                  "Failed to get a private domain.\n");
5625                                         ret = -ENOMEM;
5626                                         goto unlink;
5627                                 }
5628 
5629                                 dev_info(dev,
5630                                          "Device uses a private dma domain.\n");
5631                         }
5632                 }
5633         }
5634 
5635         if (device_needs_bounce(dev)) {
5636                 dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n");
5637                 set_dma_ops(dev, &bounce_dma_ops);
5638         }
5639 
5640         return 0;
5641 
5642 unlink:
5643         iommu_device_unlink(&iommu->iommu, dev);
5644         return ret;
5645 }
5646 
5647 static void intel_iommu_remove_device(struct device *dev)
5648 {
5649         struct intel_iommu *iommu;
5650         u8 bus, devfn;
5651 
5652         iommu = device_to_iommu(dev, &bus, &devfn);
5653         if (!iommu)
5654                 return;
5655 
5656         dmar_remove_one_dev_info(dev);
5657 
5658         iommu_group_remove_device(dev);
5659 
5660         iommu_device_unlink(&iommu->iommu, dev);
5661 
5662         if (device_needs_bounce(dev))
5663                 set_dma_ops(dev, NULL);
5664 }
5665 
5666 static void intel_iommu_get_resv_regions(struct device *device,
5667                                          struct list_head *head)
5668 {
5669         int prot = DMA_PTE_READ | DMA_PTE_WRITE;
5670         struct iommu_resv_region *reg;
5671         struct dmar_rmrr_unit *rmrr;
5672         struct device *i_dev;
5673         int i;
5674 
5675         down_read(&dmar_global_lock);
5676         for_each_rmrr_units(rmrr) {
5677                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5678                                           i, i_dev) {
5679                         struct iommu_resv_region *resv;
5680                         enum iommu_resv_type type;
5681                         size_t length;
5682 
5683                         if (i_dev != device &&
5684                             !is_downstream_to_pci_bridge(device, i_dev))
5685                                 continue;
5686 
5687                         length = rmrr->end_address - rmrr->base_address + 1;
5688 
5689                         type = device_rmrr_is_relaxable(device) ?
5690                                 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5691 
5692                         resv = iommu_alloc_resv_region(rmrr->base_address,
5693                                                        length, prot, type);
5694                         if (!resv)
5695                                 break;
5696 
5697                         list_add_tail(&resv->list, head);
5698                 }
5699         }
5700         up_read(&dmar_global_lock);
5701 
5702 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5703         if (dev_is_pci(device)) {
5704                 struct pci_dev *pdev = to_pci_dev(device);
5705 
5706                 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5707                         reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
5708                                                    IOMMU_RESV_DIRECT_RELAXABLE);
5709                         if (reg)
5710                                 list_add_tail(&reg->list, head);
5711                 }
5712         }
5713 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5714 
5715         reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5716                                       IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5717                                       0, IOMMU_RESV_MSI);
5718         if (!reg)
5719                 return;
5720         list_add_tail(&reg->list, head);
5721 }
5722 
5723 static void intel_iommu_put_resv_regions(struct device *dev,
5724                                          struct list_head *head)
5725 {
5726         struct iommu_resv_region *entry, *next;
5727 
5728         list_for_each_entry_safe(entry, next, head, list)
5729                 kfree(entry);
5730 }
5731 
5732 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
5733 {
5734         struct device_domain_info *info;
5735         struct context_entry *context;
5736         struct dmar_domain *domain;
5737         unsigned long flags;
5738         u64 ctx_lo;
5739         int ret;
5740 
5741         domain = find_domain(dev);
5742         if (!domain)
5743                 return -EINVAL;
5744 
5745         spin_lock_irqsave(&device_domain_lock, flags);
5746         spin_lock(&iommu->lock);
5747 
5748         ret = -EINVAL;
5749         info = dev->archdata.iommu;
5750         if (!info || !info->pasid_supported)
5751                 goto out;
5752 
5753         context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5754         if (WARN_ON(!context))
5755                 goto out;
5756 
5757         ctx_lo = context[0].lo;
5758 
5759         if (!(ctx_lo & CONTEXT_PASIDE)) {
5760                 ctx_lo |= CONTEXT_PASIDE;
5761                 context[0].lo = ctx_lo;
5762                 wmb();
5763                 iommu->flush.flush_context(iommu,
5764                                            domain->iommu_did[iommu->seq_id],
5765                                            PCI_DEVID(info->bus, info->devfn),
5766                                            DMA_CCMD_MASK_NOBIT,
5767                                            DMA_CCMD_DEVICE_INVL);
5768         }
5769 
5770         /* Enable PASID support in the device, if it wasn't already */
5771         if (!info->pasid_enabled)
5772                 iommu_enable_dev_iotlb(info);
5773 
5774         ret = 0;
5775 
5776  out:
5777         spin_unlock(&iommu->lock);
5778         spin_unlock_irqrestore(&device_domain_lock, flags);
5779 
5780         return ret;
5781 }
5782 
5783 static void intel_iommu_apply_resv_region(struct device *dev,
5784                                           struct iommu_domain *domain,
5785                                           struct iommu_resv_region *region)
5786 {
5787         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5788         unsigned long start, end;
5789 
5790         start = IOVA_PFN(region->start);
5791         end   = IOVA_PFN(region->start + region->length - 1);
5792 
5793         WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5794 }
5795 
5796 static struct iommu_group *intel_iommu_device_group(struct device *dev)
5797 {
5798         if (dev_is_pci(dev))
5799                 return pci_device_group(dev);
5800         return generic_device_group(dev);
5801 }
5802 
5803 #ifdef CONFIG_INTEL_IOMMU_SVM
5804 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5805 {
5806         struct intel_iommu *iommu;
5807         u8 bus, devfn;
5808 
5809         if (iommu_dummy(dev)) {
5810                 dev_warn(dev,
5811                          "No IOMMU translation for device; cannot enable SVM\n");
5812                 return NULL;
5813         }
5814 
5815         iommu = device_to_iommu(dev, &bus, &devfn);
5816         if ((!iommu)) {
5817                 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5818                 return NULL;
5819         }
5820 
5821         return iommu;
5822 }
5823 #endif /* CONFIG_INTEL_IOMMU_SVM */
5824 
5825 static int intel_iommu_enable_auxd(struct device *dev)
5826 {
5827         struct device_domain_info *info;
5828         struct intel_iommu *iommu;
5829         unsigned long flags;
5830         u8 bus, devfn;
5831         int ret;
5832 
5833         iommu = device_to_iommu(dev, &bus, &devfn);
5834         if (!iommu || dmar_disabled)
5835                 return -EINVAL;
5836 
5837         if (!sm_supported(iommu) || !pasid_supported(iommu))
5838                 return -EINVAL;
5839 
5840         ret = intel_iommu_enable_pasid(iommu, dev);
5841         if (ret)
5842                 return -ENODEV;
5843 
5844         spin_lock_irqsave(&device_domain_lock, flags);
5845         info = dev->archdata.iommu;
5846         info->auxd_enabled = 1;
5847         spin_unlock_irqrestore(&device_domain_lock, flags);
5848 
5849         return 0;
5850 }
5851 
5852 static int intel_iommu_disable_auxd(struct device *dev)
5853 {
5854         struct device_domain_info *info;
5855         unsigned long flags;
5856 
5857         spin_lock_irqsave(&device_domain_lock, flags);
5858         info = dev->archdata.iommu;
5859         if (!WARN_ON(!info))
5860                 info->auxd_enabled = 0;
5861         spin_unlock_irqrestore(&device_domain_lock, flags);
5862 
5863         return 0;
5864 }
5865 
5866 /*
5867  * A PCI express designated vendor specific extended capability is defined
5868  * in the section 3.7 of Intel scalable I/O virtualization technical spec
5869  * for system software and tools to detect endpoint devices supporting the
5870  * Intel scalable IO virtualization without host driver dependency.
5871  *
5872  * Returns the address of the matching extended capability structure within
5873  * the device's PCI configuration space or 0 if the device does not support
5874  * it.
5875  */
5876 static int siov_find_pci_dvsec(struct pci_dev *pdev)
5877 {
5878         int pos;
5879         u16 vendor, id;
5880 
5881         pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5882         while (pos) {
5883                 pci_read_config_word(pdev, pos + 4, &vendor);
5884                 pci_read_config_word(pdev, pos + 8, &id);
5885                 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5886                         return pos;
5887 
5888                 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5889         }
5890 
5891         return 0;
5892 }
5893 
5894 static bool
5895 intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5896 {
5897         if (feat == IOMMU_DEV_FEAT_AUX) {
5898                 int ret;
5899 
5900                 if (!dev_is_pci(dev) || dmar_disabled ||
5901                     !scalable_mode_support() || !iommu_pasid_support())
5902                         return false;
5903 
5904                 ret = pci_pasid_features(to_pci_dev(dev));
5905                 if (ret < 0)
5906                         return false;
5907 
5908                 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5909         }
5910 
5911         return false;
5912 }
5913 
5914 static int
5915 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5916 {
5917         if (feat == IOMMU_DEV_FEAT_AUX)
5918                 return intel_iommu_enable_auxd(dev);
5919 
5920         return -ENODEV;
5921 }
5922 
5923 static int
5924 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5925 {
5926         if (feat == IOMMU_DEV_FEAT_AUX)
5927                 return intel_iommu_disable_auxd(dev);
5928 
5929         return -ENODEV;
5930 }
5931 
5932 static bool
5933 intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5934 {
5935         struct device_domain_info *info = dev->archdata.iommu;
5936 
5937         if (feat == IOMMU_DEV_FEAT_AUX)
5938                 return scalable_mode_support() && info && info->auxd_enabled;
5939 
5940         return false;
5941 }
5942 
5943 static int
5944 intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5945 {
5946         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5947 
5948         return dmar_domain->default_pasid > 0 ?
5949                         dmar_domain->default_pasid : -EINVAL;
5950 }
5951 
5952 static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5953                                            struct device *dev)
5954 {
5955         return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
5956 }
5957 
5958 const struct iommu_ops intel_iommu_ops = {
5959         .capable                = intel_iommu_capable,
5960         .domain_alloc           = intel_iommu_domain_alloc,
5961         .domain_free            = intel_iommu_domain_free,
5962         .attach_dev             = intel_iommu_attach_device,
5963         .detach_dev             = intel_iommu_detach_device,
5964         .aux_attach_dev         = intel_iommu_aux_attach_device,
5965         .aux_detach_dev         = intel_iommu_aux_detach_device,
5966         .aux_get_pasid          = intel_iommu_aux_get_pasid,
5967         .map                    = intel_iommu_map,
5968         .unmap                  = intel_iommu_unmap,
5969         .iova_to_phys           = intel_iommu_iova_to_phys,
5970         .add_device             = intel_iommu_add_device,
5971         .remove_device          = intel_iommu_remove_device,
5972         .get_resv_regions       = intel_iommu_get_resv_regions,
5973         .put_resv_regions       = intel_iommu_put_resv_regions,
5974         .apply_resv_region      = intel_iommu_apply_resv_region,
5975         .device_group           = intel_iommu_device_group,
5976         .dev_has_feat           = intel_iommu_dev_has_feat,
5977         .dev_feat_enabled       = intel_iommu_dev_feat_enabled,
5978         .dev_enable_feat        = intel_iommu_dev_enable_feat,
5979         .dev_disable_feat       = intel_iommu_dev_disable_feat,
5980         .is_attach_deferred     = intel_iommu_is_attach_deferred,
5981         .pgsize_bitmap          = INTEL_IOMMU_PGSIZES,
5982 };
5983 
5984 static void quirk_iommu_igfx(struct pci_dev *dev)
5985 {
5986         pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
5987         dmar_map_gfx = 0;
5988 }
5989 
5990 /* G4x/GM45 integrated gfx dmar support is totally busted. */
5991 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
5992 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
5993 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
5994 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
5995 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
5996 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
5997 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
5998 
5999 /* Broadwell igfx malfunctions with dmar */
6000 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
6001 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
6002 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
6003 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
6004 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
6005 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
6006 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
6007 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
6008 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
6009 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
6010 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
6011 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
6012 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
6013 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
6014 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
6015 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
6016 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
6017 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
6018 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
6019 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
6020 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
6021 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
6022 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
6023 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
6024 
6025 static void quirk_iommu_rwbf(struct pci_dev *dev)
6026 {
6027         /*
6028          * Mobile 4 Series Chipset neglects to set RWBF capability,
6029          * but needs it. Same seems to hold for the desktop versions.
6030          */
6031         pci_info(dev, "Forcing write-buffer flush capability\n");
6032         rwbf_quirk = 1;
6033 }
6034 
6035 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
6036 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
6037 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
6038 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
6039 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
6040 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
6041 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
6042 
6043 #define GGC 0x52
6044 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
6045 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
6046 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
6047 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
6048 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
6049 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
6050 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
6051 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
6052 
6053 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
6054 {
6055         unsigned short ggc;
6056 
6057         if (pci_read_config_word(dev, GGC, &ggc))
6058                 return;
6059 
6060         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
6061                 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
6062                 dmar_map_gfx = 0;
6063         } else if (dmar_map_gfx) {
6064                 /* we have to ensure the gfx device is idle before we flush */
6065                 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6066                 intel_iommu_strict = 1;
6067        }
6068 }
6069 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
6070 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
6071 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
6072 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
6073 
6074 /* On Tylersburg chipsets, some BIOSes have been known to enable the
6075    ISOCH DMAR unit for the Azalia sound device, but not give it any
6076    TLB entries, which causes it to deadlock. Check for that.  We do
6077    this in a function called from init_dmars(), instead of in a PCI
6078    quirk, because we don't want to print the obnoxious "BIOS broken"
6079    message if VT-d is actually disabled.
6080 */
6081 static void __init check_tylersburg_isoch(void)
6082 {
6083         struct pci_dev *pdev;
6084         uint32_t vtisochctrl;
6085 
6086         /* If there's no Azalia in the system anyway, forget it. */
6087         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
6088         if (!pdev)
6089                 return;
6090         pci_dev_put(pdev);
6091 
6092         /* System Management Registers. Might be hidden, in which case
6093            we can't do the sanity check. But that's OK, because the
6094            known-broken BIOSes _don't_ actually hide it, so far. */
6095         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
6096         if (!pdev)
6097                 return;
6098 
6099         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
6100                 pci_dev_put(pdev);
6101                 return;
6102         }
6103 
6104         pci_dev_put(pdev);
6105 
6106         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
6107         if (vtisochctrl & 1)
6108                 return;
6109 
6110         /* Drop all bits other than the number of TLB entries */
6111         vtisochctrl &= 0x1c;
6112 
6113         /* If we have the recommended number of TLB entries (16), fine. */
6114         if (vtisochctrl == 0x10)
6115                 return;
6116 
6117         /* Zero TLB entries? You get to ride the short bus to school. */
6118         if (!vtisochctrl) {
6119                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
6120                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
6121                      dmi_get_system_info(DMI_BIOS_VENDOR),
6122                      dmi_get_system_info(DMI_BIOS_VERSION),
6123                      dmi_get_system_info(DMI_PRODUCT_VERSION));
6124                 iommu_identity_mapping |= IDENTMAP_AZALIA;
6125                 return;
6126         }
6127 
6128         pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
6129                vtisochctrl);
6130 }

/* [<][>][^][v][top][bottom][index][help] */