1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  */
19 
20 #include <linux/init.h>
21 #include <linux/bitmap.h>
22 #include <linux/debugfs.h>
23 #include <linux/export.h>
24 #include <linux/slab.h>
25 #include <linux/irq.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/pci.h>
29 #include <linux/dmar.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/mempool.h>
32 #include <linux/memory.h>
33 #include <linux/timer.h>
34 #include <linux/iova.h>
35 #include <linux/iommu.h>
36 #include <linux/intel-iommu.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <linux/dma-contiguous.h>
43 #include <asm/irq_remapping.h>
44 #include <asm/cacheflush.h>
45 #include <asm/iommu.h>
46 
47 #include "irq_remapping.h"
48 
49 #define ROOT_SIZE		VTD_PAGE_SIZE
50 #define CONTEXT_SIZE		VTD_PAGE_SIZE
51 
52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
54 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
55 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
56 
57 #define IOAPIC_RANGE_START	(0xfee00000)
58 #define IOAPIC_RANGE_END	(0xfeefffff)
59 #define IOVA_START_ADDR		(0x1000)
60 
61 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
62 
63 #define MAX_AGAW_WIDTH 64
64 #define MAX_AGAW_PFN_WIDTH	(MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
65 
66 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
67 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
68 
69 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
70    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
71 #define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
72 				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
73 #define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
74 
75 /* IO virtual address start page frame number */
76 #define IOVA_START_PFN		(1)
77 
78 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
79 #define DMA_32BIT_PFN		IOVA_PFN(DMA_BIT_MASK(32))
80 #define DMA_64BIT_PFN		IOVA_PFN(DMA_BIT_MASK(64))
81 
82 /* page table handling */
83 #define LEVEL_STRIDE		(9)
84 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
85 
86 /*
87  * This bitmap is used to advertise the page sizes our hardware support
88  * to the IOMMU core, which will then use this information to split
89  * physically contiguous memory regions it is mapping into page sizes
90  * that we support.
91  *
92  * Traditionally the IOMMU core just handed us the mappings directly,
93  * after making sure the size is an order of a 4KiB page and that the
94  * mapping has natural alignment.
95  *
96  * To retain this behavior, we currently advertise that we support
97  * all page sizes that are an order of 4KiB.
98  *
99  * If at some point we'd like to utilize the IOMMU core's new behavior,
100  * we could change this to advertise the real page sizes we support.
101  */
102 #define INTEL_IOMMU_PGSIZES	(~0xFFFUL)
103 
agaw_to_level(int agaw)104 static inline int agaw_to_level(int agaw)
105 {
106 	return agaw + 2;
107 }
108 
agaw_to_width(int agaw)109 static inline int agaw_to_width(int agaw)
110 {
111 	return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
112 }
113 
width_to_agaw(int width)114 static inline int width_to_agaw(int width)
115 {
116 	return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
117 }
118 
level_to_offset_bits(int level)119 static inline unsigned int level_to_offset_bits(int level)
120 {
121 	return (level - 1) * LEVEL_STRIDE;
122 }
123 
pfn_level_offset(unsigned long pfn,int level)124 static inline int pfn_level_offset(unsigned long pfn, int level)
125 {
126 	return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
127 }
128 
level_mask(int level)129 static inline unsigned long level_mask(int level)
130 {
131 	return -1UL << level_to_offset_bits(level);
132 }
133 
level_size(int level)134 static inline unsigned long level_size(int level)
135 {
136 	return 1UL << level_to_offset_bits(level);
137 }
138 
align_to_level(unsigned long pfn,int level)139 static inline unsigned long align_to_level(unsigned long pfn, int level)
140 {
141 	return (pfn + level_size(level) - 1) & level_mask(level);
142 }
143 
lvl_to_nr_pages(unsigned int lvl)144 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
145 {
146 	return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
147 }
148 
149 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
150    are never going to work. */
dma_to_mm_pfn(unsigned long dma_pfn)151 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
152 {
153 	return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
154 }
155 
mm_to_dma_pfn(unsigned long mm_pfn)156 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
157 {
158 	return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
159 }
page_to_dma_pfn(struct page * pg)160 static inline unsigned long page_to_dma_pfn(struct page *pg)
161 {
162 	return mm_to_dma_pfn(page_to_pfn(pg));
163 }
virt_to_dma_pfn(void * p)164 static inline unsigned long virt_to_dma_pfn(void *p)
165 {
166 	return page_to_dma_pfn(virt_to_page(p));
167 }
168 
169 /* global iommu list, set NULL for ignored DMAR units */
170 static struct intel_iommu **g_iommus;
171 
172 static void __init check_tylersburg_isoch(void);
173 static int rwbf_quirk;
174 
175 /*
176  * set to 1 to panic kernel if can't successfully enable VT-d
177  * (used when kernel is launched w/ TXT)
178  */
179 static int force_on = 0;
180 
181 /*
182  * 0: Present
183  * 1-11: Reserved
184  * 12-63: Context Ptr (12 - (haw-1))
185  * 64-127: Reserved
186  */
187 struct root_entry {
188 	u64	lo;
189 	u64	hi;
190 };
191 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
192 
193 
194 /*
195  * low 64 bits:
196  * 0: present
197  * 1: fault processing disable
198  * 2-3: translation type
199  * 12-63: address space root
200  * high 64 bits:
201  * 0-2: address width
202  * 3-6: aval
203  * 8-23: domain id
204  */
205 struct context_entry {
206 	u64 lo;
207 	u64 hi;
208 };
209 
context_present(struct context_entry * context)210 static inline bool context_present(struct context_entry *context)
211 {
212 	return (context->lo & 1);
213 }
context_set_present(struct context_entry * context)214 static inline void context_set_present(struct context_entry *context)
215 {
216 	context->lo |= 1;
217 }
218 
context_set_fault_enable(struct context_entry * context)219 static inline void context_set_fault_enable(struct context_entry *context)
220 {
221 	context->lo &= (((u64)-1) << 2) | 1;
222 }
223 
context_set_translation_type(struct context_entry * context,unsigned long value)224 static inline void context_set_translation_type(struct context_entry *context,
225 						unsigned long value)
226 {
227 	context->lo &= (((u64)-1) << 4) | 3;
228 	context->lo |= (value & 3) << 2;
229 }
230 
context_set_address_root(struct context_entry * context,unsigned long value)231 static inline void context_set_address_root(struct context_entry *context,
232 					    unsigned long value)
233 {
234 	context->lo &= ~VTD_PAGE_MASK;
235 	context->lo |= value & VTD_PAGE_MASK;
236 }
237 
context_set_address_width(struct context_entry * context,unsigned long value)238 static inline void context_set_address_width(struct context_entry *context,
239 					     unsigned long value)
240 {
241 	context->hi |= value & 7;
242 }
243 
context_set_domain_id(struct context_entry * context,unsigned long value)244 static inline void context_set_domain_id(struct context_entry *context,
245 					 unsigned long value)
246 {
247 	context->hi |= (value & ((1 << 16) - 1)) << 8;
248 }
249 
context_clear_entry(struct context_entry * context)250 static inline void context_clear_entry(struct context_entry *context)
251 {
252 	context->lo = 0;
253 	context->hi = 0;
254 }
255 
256 /*
257  * 0: readable
258  * 1: writable
259  * 2-6: reserved
260  * 7: super page
261  * 8-10: available
262  * 11: snoop behavior
263  * 12-63: Host physcial address
264  */
265 struct dma_pte {
266 	u64 val;
267 };
268 
dma_clear_pte(struct dma_pte * pte)269 static inline void dma_clear_pte(struct dma_pte *pte)
270 {
271 	pte->val = 0;
272 }
273 
dma_pte_addr(struct dma_pte * pte)274 static inline u64 dma_pte_addr(struct dma_pte *pte)
275 {
276 #ifdef CONFIG_64BIT
277 	return pte->val & VTD_PAGE_MASK;
278 #else
279 	/* Must have a full atomic 64-bit read */
280 	return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
281 #endif
282 }
283 
dma_pte_present(struct dma_pte * pte)284 static inline bool dma_pte_present(struct dma_pte *pte)
285 {
286 	return (pte->val & 3) != 0;
287 }
288 
dma_pte_superpage(struct dma_pte * pte)289 static inline bool dma_pte_superpage(struct dma_pte *pte)
290 {
291 	return (pte->val & DMA_PTE_LARGE_PAGE);
292 }
293 
first_pte_in_page(struct dma_pte * pte)294 static inline int first_pte_in_page(struct dma_pte *pte)
295 {
296 	return !((unsigned long)pte & ~VTD_PAGE_MASK);
297 }
298 
299 /*
300  * This domain is a statically identity mapping domain.
301  *	1. This domain creats a static 1:1 mapping to all usable memory.
302  * 	2. It maps to each iommu if successful.
303  *	3. Each iommu mapps to this domain if successful.
304  */
305 static struct dmar_domain *si_domain;
306 static int hw_pass_through = 1;
307 
308 /* domain represents a virtual machine, more than one devices
309  * across iommus may be owned in one domain, e.g. kvm guest.
310  */
311 #define DOMAIN_FLAG_VIRTUAL_MACHINE	(1 << 0)
312 
313 /* si_domain contains mulitple devices */
314 #define DOMAIN_FLAG_STATIC_IDENTITY	(1 << 1)
315 
316 struct dmar_domain {
317 	int	id;			/* domain id */
318 	int	nid;			/* node id */
319 	DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
320 					/* bitmap of iommus this domain uses*/
321 
322 	struct list_head devices;	/* all devices' list */
323 	struct iova_domain iovad;	/* iova's that belong to this domain */
324 
325 	struct dma_pte	*pgd;		/* virtual address */
326 	int		gaw;		/* max guest address width */
327 
328 	/* adjusted guest address width, 0 is level 2 30-bit */
329 	int		agaw;
330 
331 	int		flags;		/* flags to find out type of domain */
332 
333 	int		iommu_coherency;/* indicate coherency of iommu access */
334 	int		iommu_snooping; /* indicate snooping control feature*/
335 	int		iommu_count;	/* reference count of iommu */
336 	int		iommu_superpage;/* Level of superpages supported:
337 					   0 == 4KiB (no superpages), 1 == 2MiB,
338 					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
339 	spinlock_t	iommu_lock;	/* protect iommu set in domain */
340 	u64		max_addr;	/* maximum mapped address */
341 
342 	struct iommu_domain domain;	/* generic domain data structure for
343 					   iommu core */
344 };
345 
346 /* PCI domain-device relationship */
347 struct device_domain_info {
348 	struct list_head link;	/* link to domain siblings */
349 	struct list_head global; /* link to global list */
350 	u8 bus;			/* PCI bus number */
351 	u8 devfn;		/* PCI devfn number */
352 	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
353 	struct intel_iommu *iommu; /* IOMMU used by this device */
354 	struct dmar_domain *domain; /* pointer to domain */
355 };
356 
357 struct dmar_rmrr_unit {
358 	struct list_head list;		/* list of rmrr units	*/
359 	struct acpi_dmar_header *hdr;	/* ACPI header		*/
360 	u64	base_address;		/* reserved base address*/
361 	u64	end_address;		/* reserved end address */
362 	struct dmar_dev_scope *devices;	/* target devices */
363 	int	devices_cnt;		/* target device count */
364 };
365 
366 struct dmar_atsr_unit {
367 	struct list_head list;		/* list of ATSR units */
368 	struct acpi_dmar_header *hdr;	/* ACPI header */
369 	struct dmar_dev_scope *devices;	/* target devices */
370 	int devices_cnt;		/* target device count */
371 	u8 include_all:1;		/* include all ports */
372 };
373 
374 static LIST_HEAD(dmar_atsr_units);
375 static LIST_HEAD(dmar_rmrr_units);
376 
377 #define for_each_rmrr_units(rmrr) \
378 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
379 
380 static void flush_unmaps_timeout(unsigned long data);
381 
382 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
383 
384 #define HIGH_WATER_MARK 250
385 struct deferred_flush_tables {
386 	int next;
387 	struct iova *iova[HIGH_WATER_MARK];
388 	struct dmar_domain *domain[HIGH_WATER_MARK];
389 	struct page *freelist[HIGH_WATER_MARK];
390 };
391 
392 static struct deferred_flush_tables *deferred_flush;
393 
394 /* bitmap for indexing intel_iommus */
395 static int g_num_of_iommus;
396 
397 static DEFINE_SPINLOCK(async_umap_flush_lock);
398 static LIST_HEAD(unmaps_to_do);
399 
400 static int timer_on;
401 static long list_size;
402 
403 static void domain_exit(struct dmar_domain *domain);
404 static void domain_remove_dev_info(struct dmar_domain *domain);
405 static void domain_remove_one_dev_info(struct dmar_domain *domain,
406 				       struct device *dev);
407 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
408 					   struct device *dev);
409 static int domain_detach_iommu(struct dmar_domain *domain,
410 			       struct intel_iommu *iommu);
411 
412 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
413 int dmar_disabled = 0;
414 #else
415 int dmar_disabled = 1;
416 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
417 
418 int intel_iommu_enabled = 0;
419 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
420 
421 static int dmar_map_gfx = 1;
422 static int dmar_forcedac;
423 static int intel_iommu_strict;
424 static int intel_iommu_superpage = 1;
425 static int intel_iommu_ecs = 1;
426 
427 /* We only actually use ECS when PASID support (on the new bit 40)
428  * is also advertised. Some early implementations — the ones with
429  * PASID support on bit 28 — have issues even when we *only* use
430  * extended root/context tables. */
431 #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
432 			    ecap_pasid(iommu->ecap))
433 
434 int intel_iommu_gfx_mapped;
435 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
436 
437 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
438 static DEFINE_SPINLOCK(device_domain_lock);
439 static LIST_HEAD(device_domain_list);
440 
441 static const struct iommu_ops intel_iommu_ops;
442 
443 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
to_dmar_domain(struct iommu_domain * dom)444 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
445 {
446 	return container_of(dom, struct dmar_domain, domain);
447 }
448 
intel_iommu_setup(char * str)449 static int __init intel_iommu_setup(char *str)
450 {
451 	if (!str)
452 		return -EINVAL;
453 	while (*str) {
454 		if (!strncmp(str, "on", 2)) {
455 			dmar_disabled = 0;
456 			printk(KERN_INFO "Intel-IOMMU: enabled\n");
457 		} else if (!strncmp(str, "off", 3)) {
458 			dmar_disabled = 1;
459 			printk(KERN_INFO "Intel-IOMMU: disabled\n");
460 		} else if (!strncmp(str, "igfx_off", 8)) {
461 			dmar_map_gfx = 0;
462 			printk(KERN_INFO
463 				"Intel-IOMMU: disable GFX device mapping\n");
464 		} else if (!strncmp(str, "forcedac", 8)) {
465 			printk(KERN_INFO
466 				"Intel-IOMMU: Forcing DAC for PCI devices\n");
467 			dmar_forcedac = 1;
468 		} else if (!strncmp(str, "strict", 6)) {
469 			printk(KERN_INFO
470 				"Intel-IOMMU: disable batched IOTLB flush\n");
471 			intel_iommu_strict = 1;
472 		} else if (!strncmp(str, "sp_off", 6)) {
473 			printk(KERN_INFO
474 				"Intel-IOMMU: disable supported super page\n");
475 			intel_iommu_superpage = 0;
476 		} else if (!strncmp(str, "ecs_off", 7)) {
477 			printk(KERN_INFO
478 				"Intel-IOMMU: disable extended context table support\n");
479 			intel_iommu_ecs = 0;
480 		}
481 
482 		str += strcspn(str, ",");
483 		while (*str == ',')
484 			str++;
485 	}
486 	return 0;
487 }
488 __setup("intel_iommu=", intel_iommu_setup);
489 
490 static struct kmem_cache *iommu_domain_cache;
491 static struct kmem_cache *iommu_devinfo_cache;
492 
alloc_pgtable_page(int node)493 static inline void *alloc_pgtable_page(int node)
494 {
495 	struct page *page;
496 	void *vaddr = NULL;
497 
498 	page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
499 	if (page)
500 		vaddr = page_address(page);
501 	return vaddr;
502 }
503 
free_pgtable_page(void * vaddr)504 static inline void free_pgtable_page(void *vaddr)
505 {
506 	free_page((unsigned long)vaddr);
507 }
508 
alloc_domain_mem(void)509 static inline void *alloc_domain_mem(void)
510 {
511 	return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
512 }
513 
free_domain_mem(void * vaddr)514 static void free_domain_mem(void *vaddr)
515 {
516 	kmem_cache_free(iommu_domain_cache, vaddr);
517 }
518 
alloc_devinfo_mem(void)519 static inline void * alloc_devinfo_mem(void)
520 {
521 	return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
522 }
523 
free_devinfo_mem(void * vaddr)524 static inline void free_devinfo_mem(void *vaddr)
525 {
526 	kmem_cache_free(iommu_devinfo_cache, vaddr);
527 }
528 
domain_type_is_vm(struct dmar_domain * domain)529 static inline int domain_type_is_vm(struct dmar_domain *domain)
530 {
531 	return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
532 }
533 
domain_type_is_vm_or_si(struct dmar_domain * domain)534 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
535 {
536 	return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
537 				DOMAIN_FLAG_STATIC_IDENTITY);
538 }
539 
domain_pfn_supported(struct dmar_domain * domain,unsigned long pfn)540 static inline int domain_pfn_supported(struct dmar_domain *domain,
541 				       unsigned long pfn)
542 {
543 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
544 
545 	return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
546 }
547 
__iommu_calculate_agaw(struct intel_iommu * iommu,int max_gaw)548 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
549 {
550 	unsigned long sagaw;
551 	int agaw = -1;
552 
553 	sagaw = cap_sagaw(iommu->cap);
554 	for (agaw = width_to_agaw(max_gaw);
555 	     agaw >= 0; agaw--) {
556 		if (test_bit(agaw, &sagaw))
557 			break;
558 	}
559 
560 	return agaw;
561 }
562 
563 /*
564  * Calculate max SAGAW for each iommu.
565  */
iommu_calculate_max_sagaw(struct intel_iommu * iommu)566 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
567 {
568 	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
569 }
570 
571 /*
572  * calculate agaw for each iommu.
573  * "SAGAW" may be different across iommus, use a default agaw, and
574  * get a supported less agaw for iommus that don't support the default agaw.
575  */
iommu_calculate_agaw(struct intel_iommu * iommu)576 int iommu_calculate_agaw(struct intel_iommu *iommu)
577 {
578 	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
579 }
580 
581 /* This functionin only returns single iommu in a domain */
domain_get_iommu(struct dmar_domain * domain)582 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
583 {
584 	int iommu_id;
585 
586 	/* si_domain and vm domain should not get here. */
587 	BUG_ON(domain_type_is_vm_or_si(domain));
588 	iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
589 	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
590 		return NULL;
591 
592 	return g_iommus[iommu_id];
593 }
594 
domain_update_iommu_coherency(struct dmar_domain * domain)595 static void domain_update_iommu_coherency(struct dmar_domain *domain)
596 {
597 	struct dmar_drhd_unit *drhd;
598 	struct intel_iommu *iommu;
599 	bool found = false;
600 	int i;
601 
602 	domain->iommu_coherency = 1;
603 
604 	for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
605 		found = true;
606 		if (!ecap_coherent(g_iommus[i]->ecap)) {
607 			domain->iommu_coherency = 0;
608 			break;
609 		}
610 	}
611 	if (found)
612 		return;
613 
614 	/* No hardware attached; use lowest common denominator */
615 	rcu_read_lock();
616 	for_each_active_iommu(iommu, drhd) {
617 		if (!ecap_coherent(iommu->ecap)) {
618 			domain->iommu_coherency = 0;
619 			break;
620 		}
621 	}
622 	rcu_read_unlock();
623 }
624 
domain_update_iommu_snooping(struct intel_iommu * skip)625 static int domain_update_iommu_snooping(struct intel_iommu *skip)
626 {
627 	struct dmar_drhd_unit *drhd;
628 	struct intel_iommu *iommu;
629 	int ret = 1;
630 
631 	rcu_read_lock();
632 	for_each_active_iommu(iommu, drhd) {
633 		if (iommu != skip) {
634 			if (!ecap_sc_support(iommu->ecap)) {
635 				ret = 0;
636 				break;
637 			}
638 		}
639 	}
640 	rcu_read_unlock();
641 
642 	return ret;
643 }
644 
domain_update_iommu_superpage(struct intel_iommu * skip)645 static int domain_update_iommu_superpage(struct intel_iommu *skip)
646 {
647 	struct dmar_drhd_unit *drhd;
648 	struct intel_iommu *iommu;
649 	int mask = 0xf;
650 
651 	if (!intel_iommu_superpage) {
652 		return 0;
653 	}
654 
655 	/* set iommu_superpage to the smallest common denominator */
656 	rcu_read_lock();
657 	for_each_active_iommu(iommu, drhd) {
658 		if (iommu != skip) {
659 			mask &= cap_super_page_val(iommu->cap);
660 			if (!mask)
661 				break;
662 		}
663 	}
664 	rcu_read_unlock();
665 
666 	return fls(mask);
667 }
668 
669 /* Some capabilities may be different across iommus */
domain_update_iommu_cap(struct dmar_domain * domain)670 static void domain_update_iommu_cap(struct dmar_domain *domain)
671 {
672 	domain_update_iommu_coherency(domain);
673 	domain->iommu_snooping = domain_update_iommu_snooping(NULL);
674 	domain->iommu_superpage = domain_update_iommu_superpage(NULL);
675 }
676 
iommu_context_addr(struct intel_iommu * iommu,u8 bus,u8 devfn,int alloc)677 static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
678 						       u8 bus, u8 devfn, int alloc)
679 {
680 	struct root_entry *root = &iommu->root_entry[bus];
681 	struct context_entry *context;
682 	u64 *entry;
683 
684 	entry = &root->lo;
685 	if (ecs_enabled(iommu)) {
686 		if (devfn >= 0x80) {
687 			devfn -= 0x80;
688 			entry = &root->hi;
689 		}
690 		devfn *= 2;
691 	}
692 	if (*entry & 1)
693 		context = phys_to_virt(*entry & VTD_PAGE_MASK);
694 	else {
695 		unsigned long phy_addr;
696 		if (!alloc)
697 			return NULL;
698 
699 		context = alloc_pgtable_page(iommu->node);
700 		if (!context)
701 			return NULL;
702 
703 		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
704 		phy_addr = virt_to_phys((void *)context);
705 		*entry = phy_addr | 1;
706 		__iommu_flush_cache(iommu, entry, sizeof(*entry));
707 	}
708 	return &context[devfn];
709 }
710 
iommu_dummy(struct device * dev)711 static int iommu_dummy(struct device *dev)
712 {
713 	return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
714 }
715 
device_to_iommu(struct device * dev,u8 * bus,u8 * devfn)716 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
717 {
718 	struct dmar_drhd_unit *drhd = NULL;
719 	struct intel_iommu *iommu;
720 	struct device *tmp;
721 	struct pci_dev *ptmp, *pdev = NULL;
722 	u16 segment = 0;
723 	int i;
724 
725 	if (iommu_dummy(dev))
726 		return NULL;
727 
728 	if (dev_is_pci(dev)) {
729 		pdev = to_pci_dev(dev);
730 		segment = pci_domain_nr(pdev->bus);
731 	} else if (has_acpi_companion(dev))
732 		dev = &ACPI_COMPANION(dev)->dev;
733 
734 	rcu_read_lock();
735 	for_each_active_iommu(iommu, drhd) {
736 		if (pdev && segment != drhd->segment)
737 			continue;
738 
739 		for_each_active_dev_scope(drhd->devices,
740 					  drhd->devices_cnt, i, tmp) {
741 			if (tmp == dev) {
742 				*bus = drhd->devices[i].bus;
743 				*devfn = drhd->devices[i].devfn;
744 				goto out;
745 			}
746 
747 			if (!pdev || !dev_is_pci(tmp))
748 				continue;
749 
750 			ptmp = to_pci_dev(tmp);
751 			if (ptmp->subordinate &&
752 			    ptmp->subordinate->number <= pdev->bus->number &&
753 			    ptmp->subordinate->busn_res.end >= pdev->bus->number)
754 				goto got_pdev;
755 		}
756 
757 		if (pdev && drhd->include_all) {
758 		got_pdev:
759 			*bus = pdev->bus->number;
760 			*devfn = pdev->devfn;
761 			goto out;
762 		}
763 	}
764 	iommu = NULL;
765  out:
766 	rcu_read_unlock();
767 
768 	return iommu;
769 }
770 
domain_flush_cache(struct dmar_domain * domain,void * addr,int size)771 static void domain_flush_cache(struct dmar_domain *domain,
772 			       void *addr, int size)
773 {
774 	if (!domain->iommu_coherency)
775 		clflush_cache_range(addr, size);
776 }
777 
device_context_mapped(struct intel_iommu * iommu,u8 bus,u8 devfn)778 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
779 {
780 	struct context_entry *context;
781 	int ret = 0;
782 	unsigned long flags;
783 
784 	spin_lock_irqsave(&iommu->lock, flags);
785 	context = iommu_context_addr(iommu, bus, devfn, 0);
786 	if (context)
787 		ret = context_present(context);
788 	spin_unlock_irqrestore(&iommu->lock, flags);
789 	return ret;
790 }
791 
clear_context_table(struct intel_iommu * iommu,u8 bus,u8 devfn)792 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
793 {
794 	struct context_entry *context;
795 	unsigned long flags;
796 
797 	spin_lock_irqsave(&iommu->lock, flags);
798 	context = iommu_context_addr(iommu, bus, devfn, 0);
799 	if (context) {
800 		context_clear_entry(context);
801 		__iommu_flush_cache(iommu, context, sizeof(*context));
802 	}
803 	spin_unlock_irqrestore(&iommu->lock, flags);
804 }
805 
free_context_table(struct intel_iommu * iommu)806 static void free_context_table(struct intel_iommu *iommu)
807 {
808 	int i;
809 	unsigned long flags;
810 	struct context_entry *context;
811 
812 	spin_lock_irqsave(&iommu->lock, flags);
813 	if (!iommu->root_entry) {
814 		goto out;
815 	}
816 	for (i = 0; i < ROOT_ENTRY_NR; i++) {
817 		context = iommu_context_addr(iommu, i, 0, 0);
818 		if (context)
819 			free_pgtable_page(context);
820 
821 		if (!ecs_enabled(iommu))
822 			continue;
823 
824 		context = iommu_context_addr(iommu, i, 0x80, 0);
825 		if (context)
826 			free_pgtable_page(context);
827 
828 	}
829 	free_pgtable_page(iommu->root_entry);
830 	iommu->root_entry = NULL;
831 out:
832 	spin_unlock_irqrestore(&iommu->lock, flags);
833 }
834 
pfn_to_dma_pte(struct dmar_domain * domain,unsigned long pfn,int * target_level)835 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
836 				      unsigned long pfn, int *target_level)
837 {
838 	struct dma_pte *parent, *pte = NULL;
839 	int level = agaw_to_level(domain->agaw);
840 	int offset;
841 
842 	BUG_ON(!domain->pgd);
843 
844 	if (!domain_pfn_supported(domain, pfn))
845 		/* Address beyond IOMMU's addressing capabilities. */
846 		return NULL;
847 
848 	parent = domain->pgd;
849 
850 	while (1) {
851 		void *tmp_page;
852 
853 		offset = pfn_level_offset(pfn, level);
854 		pte = &parent[offset];
855 		if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
856 			break;
857 		if (level == *target_level)
858 			break;
859 
860 		if (!dma_pte_present(pte)) {
861 			uint64_t pteval;
862 
863 			tmp_page = alloc_pgtable_page(domain->nid);
864 
865 			if (!tmp_page)
866 				return NULL;
867 
868 			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
869 			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
870 			if (cmpxchg64(&pte->val, 0ULL, pteval))
871 				/* Someone else set it while we were thinking; use theirs. */
872 				free_pgtable_page(tmp_page);
873 			else
874 				domain_flush_cache(domain, pte, sizeof(*pte));
875 		}
876 		if (level == 1)
877 			break;
878 
879 		parent = phys_to_virt(dma_pte_addr(pte));
880 		level--;
881 	}
882 
883 	if (!*target_level)
884 		*target_level = level;
885 
886 	return pte;
887 }
888 
889 
890 /* return address's pte at specific level */
dma_pfn_level_pte(struct dmar_domain * domain,unsigned long pfn,int level,int * large_page)891 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
892 					 unsigned long pfn,
893 					 int level, int *large_page)
894 {
895 	struct dma_pte *parent, *pte = NULL;
896 	int total = agaw_to_level(domain->agaw);
897 	int offset;
898 
899 	parent = domain->pgd;
900 	while (level <= total) {
901 		offset = pfn_level_offset(pfn, total);
902 		pte = &parent[offset];
903 		if (level == total)
904 			return pte;
905 
906 		if (!dma_pte_present(pte)) {
907 			*large_page = total;
908 			break;
909 		}
910 
911 		if (dma_pte_superpage(pte)) {
912 			*large_page = total;
913 			return pte;
914 		}
915 
916 		parent = phys_to_virt(dma_pte_addr(pte));
917 		total--;
918 	}
919 	return NULL;
920 }
921 
922 /* clear last level pte, a tlb flush should be followed */
dma_pte_clear_range(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)923 static void dma_pte_clear_range(struct dmar_domain *domain,
924 				unsigned long start_pfn,
925 				unsigned long last_pfn)
926 {
927 	unsigned int large_page = 1;
928 	struct dma_pte *first_pte, *pte;
929 
930 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
931 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
932 	BUG_ON(start_pfn > last_pfn);
933 
934 	/* we don't need lock here; nobody else touches the iova range */
935 	do {
936 		large_page = 1;
937 		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
938 		if (!pte) {
939 			start_pfn = align_to_level(start_pfn + 1, large_page + 1);
940 			continue;
941 		}
942 		do {
943 			dma_clear_pte(pte);
944 			start_pfn += lvl_to_nr_pages(large_page);
945 			pte++;
946 		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
947 
948 		domain_flush_cache(domain, first_pte,
949 				   (void *)pte - (void *)first_pte);
950 
951 	} while (start_pfn && start_pfn <= last_pfn);
952 }
953 
dma_pte_free_level(struct dmar_domain * domain,int level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn)954 static void dma_pte_free_level(struct dmar_domain *domain, int level,
955 			       struct dma_pte *pte, unsigned long pfn,
956 			       unsigned long start_pfn, unsigned long last_pfn)
957 {
958 	pfn = max(start_pfn, pfn);
959 	pte = &pte[pfn_level_offset(pfn, level)];
960 
961 	do {
962 		unsigned long level_pfn;
963 		struct dma_pte *level_pte;
964 
965 		if (!dma_pte_present(pte) || dma_pte_superpage(pte))
966 			goto next;
967 
968 		level_pfn = pfn & level_mask(level - 1);
969 		level_pte = phys_to_virt(dma_pte_addr(pte));
970 
971 		if (level > 2)
972 			dma_pte_free_level(domain, level - 1, level_pte,
973 					   level_pfn, start_pfn, last_pfn);
974 
975 		/* If range covers entire pagetable, free it */
976 		if (!(start_pfn > level_pfn ||
977 		      last_pfn < level_pfn + level_size(level) - 1)) {
978 			dma_clear_pte(pte);
979 			domain_flush_cache(domain, pte, sizeof(*pte));
980 			free_pgtable_page(level_pte);
981 		}
982 next:
983 		pfn += level_size(level);
984 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
985 }
986 
987 /* free page table pages. last level pte should already be cleared */
dma_pte_free_pagetable(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)988 static void dma_pte_free_pagetable(struct dmar_domain *domain,
989 				   unsigned long start_pfn,
990 				   unsigned long last_pfn)
991 {
992 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
993 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
994 	BUG_ON(start_pfn > last_pfn);
995 
996 	dma_pte_clear_range(domain, start_pfn, last_pfn);
997 
998 	/* We don't need lock here; nobody else touches the iova range */
999 	dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1000 			   domain->pgd, 0, start_pfn, last_pfn);
1001 
1002 	/* free pgd */
1003 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1004 		free_pgtable_page(domain->pgd);
1005 		domain->pgd = NULL;
1006 	}
1007 }
1008 
1009 /* When a page at a given level is being unlinked from its parent, we don't
1010    need to *modify* it at all. All we need to do is make a list of all the
1011    pages which can be freed just as soon as we've flushed the IOTLB and we
1012    know the hardware page-walk will no longer touch them.
1013    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1014    be freed. */
dma_pte_list_pagetables(struct dmar_domain * domain,int level,struct dma_pte * pte,struct page * freelist)1015 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1016 					    int level, struct dma_pte *pte,
1017 					    struct page *freelist)
1018 {
1019 	struct page *pg;
1020 
1021 	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1022 	pg->freelist = freelist;
1023 	freelist = pg;
1024 
1025 	if (level == 1)
1026 		return freelist;
1027 
1028 	pte = page_address(pg);
1029 	do {
1030 		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1031 			freelist = dma_pte_list_pagetables(domain, level - 1,
1032 							   pte, freelist);
1033 		pte++;
1034 	} while (!first_pte_in_page(pte));
1035 
1036 	return freelist;
1037 }
1038 
dma_pte_clear_level(struct dmar_domain * domain,int level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn,struct page * freelist)1039 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1040 					struct dma_pte *pte, unsigned long pfn,
1041 					unsigned long start_pfn,
1042 					unsigned long last_pfn,
1043 					struct page *freelist)
1044 {
1045 	struct dma_pte *first_pte = NULL, *last_pte = NULL;
1046 
1047 	pfn = max(start_pfn, pfn);
1048 	pte = &pte[pfn_level_offset(pfn, level)];
1049 
1050 	do {
1051 		unsigned long level_pfn;
1052 
1053 		if (!dma_pte_present(pte))
1054 			goto next;
1055 
1056 		level_pfn = pfn & level_mask(level);
1057 
1058 		/* If range covers entire pagetable, free it */
1059 		if (start_pfn <= level_pfn &&
1060 		    last_pfn >= level_pfn + level_size(level) - 1) {
1061 			/* These suborbinate page tables are going away entirely. Don't
1062 			   bother to clear them; we're just going to *free* them. */
1063 			if (level > 1 && !dma_pte_superpage(pte))
1064 				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1065 
1066 			dma_clear_pte(pte);
1067 			if (!first_pte)
1068 				first_pte = pte;
1069 			last_pte = pte;
1070 		} else if (level > 1) {
1071 			/* Recurse down into a level that isn't *entirely* obsolete */
1072 			freelist = dma_pte_clear_level(domain, level - 1,
1073 						       phys_to_virt(dma_pte_addr(pte)),
1074 						       level_pfn, start_pfn, last_pfn,
1075 						       freelist);
1076 		}
1077 next:
1078 		pfn += level_size(level);
1079 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1080 
1081 	if (first_pte)
1082 		domain_flush_cache(domain, first_pte,
1083 				   (void *)++last_pte - (void *)first_pte);
1084 
1085 	return freelist;
1086 }
1087 
1088 /* We can't just free the pages because the IOMMU may still be walking
1089    the page tables, and may have cached the intermediate levels. The
1090    pages can only be freed after the IOTLB flush has been done. */
domain_unmap(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)1091 struct page *domain_unmap(struct dmar_domain *domain,
1092 			  unsigned long start_pfn,
1093 			  unsigned long last_pfn)
1094 {
1095 	struct page *freelist = NULL;
1096 
1097 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1098 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1099 	BUG_ON(start_pfn > last_pfn);
1100 
1101 	/* we don't need lock here; nobody else touches the iova range */
1102 	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1103 				       domain->pgd, 0, start_pfn, last_pfn, NULL);
1104 
1105 	/* free pgd */
1106 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1107 		struct page *pgd_page = virt_to_page(domain->pgd);
1108 		pgd_page->freelist = freelist;
1109 		freelist = pgd_page;
1110 
1111 		domain->pgd = NULL;
1112 	}
1113 
1114 	return freelist;
1115 }
1116 
dma_free_pagelist(struct page * freelist)1117 void dma_free_pagelist(struct page *freelist)
1118 {
1119 	struct page *pg;
1120 
1121 	while ((pg = freelist)) {
1122 		freelist = pg->freelist;
1123 		free_pgtable_page(page_address(pg));
1124 	}
1125 }
1126 
1127 /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)1128 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1129 {
1130 	struct root_entry *root;
1131 	unsigned long flags;
1132 
1133 	root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1134 	if (!root) {
1135 		pr_err("IOMMU: allocating root entry for %s failed\n",
1136 			iommu->name);
1137 		return -ENOMEM;
1138 	}
1139 
1140 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
1141 
1142 	spin_lock_irqsave(&iommu->lock, flags);
1143 	iommu->root_entry = root;
1144 	spin_unlock_irqrestore(&iommu->lock, flags);
1145 
1146 	return 0;
1147 }
1148 
iommu_set_root_entry(struct intel_iommu * iommu)1149 static void iommu_set_root_entry(struct intel_iommu *iommu)
1150 {
1151 	u64 addr;
1152 	u32 sts;
1153 	unsigned long flag;
1154 
1155 	addr = virt_to_phys(iommu->root_entry);
1156 	if (ecs_enabled(iommu))
1157 		addr |= DMA_RTADDR_RTT;
1158 
1159 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1160 	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1161 
1162 	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1163 
1164 	/* Make sure hardware complete it */
1165 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1166 		      readl, (sts & DMA_GSTS_RTPS), sts);
1167 
1168 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1169 }
1170 
iommu_flush_write_buffer(struct intel_iommu * iommu)1171 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1172 {
1173 	u32 val;
1174 	unsigned long flag;
1175 
1176 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1177 		return;
1178 
1179 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1180 	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1181 
1182 	/* Make sure hardware complete it */
1183 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1184 		      readl, (!(val & DMA_GSTS_WBFS)), val);
1185 
1186 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1187 }
1188 
1189 /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type)1190 static void __iommu_flush_context(struct intel_iommu *iommu,
1191 				  u16 did, u16 source_id, u8 function_mask,
1192 				  u64 type)
1193 {
1194 	u64 val = 0;
1195 	unsigned long flag;
1196 
1197 	switch (type) {
1198 	case DMA_CCMD_GLOBAL_INVL:
1199 		val = DMA_CCMD_GLOBAL_INVL;
1200 		break;
1201 	case DMA_CCMD_DOMAIN_INVL:
1202 		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1203 		break;
1204 	case DMA_CCMD_DEVICE_INVL:
1205 		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1206 			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1207 		break;
1208 	default:
1209 		BUG();
1210 	}
1211 	val |= DMA_CCMD_ICC;
1212 
1213 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1214 	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1215 
1216 	/* Make sure hardware complete it */
1217 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1218 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1219 
1220 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1221 }
1222 
1223 /* return value determine if we need a write buffer flush */
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type)1224 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1225 				u64 addr, unsigned int size_order, u64 type)
1226 {
1227 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1228 	u64 val = 0, val_iva = 0;
1229 	unsigned long flag;
1230 
1231 	switch (type) {
1232 	case DMA_TLB_GLOBAL_FLUSH:
1233 		/* global flush doesn't need set IVA_REG */
1234 		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1235 		break;
1236 	case DMA_TLB_DSI_FLUSH:
1237 		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1238 		break;
1239 	case DMA_TLB_PSI_FLUSH:
1240 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1241 		/* IH bit is passed in as part of address */
1242 		val_iva = size_order | addr;
1243 		break;
1244 	default:
1245 		BUG();
1246 	}
1247 	/* Note: set drain read/write */
1248 #if 0
1249 	/*
1250 	 * This is probably to be super secure.. Looks like we can
1251 	 * ignore it without any impact.
1252 	 */
1253 	if (cap_read_drain(iommu->cap))
1254 		val |= DMA_TLB_READ_DRAIN;
1255 #endif
1256 	if (cap_write_drain(iommu->cap))
1257 		val |= DMA_TLB_WRITE_DRAIN;
1258 
1259 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1260 	/* Note: Only uses first TLB reg currently */
1261 	if (val_iva)
1262 		dmar_writeq(iommu->reg + tlb_offset, val_iva);
1263 	dmar_writeq(iommu->reg + tlb_offset + 8, val);
1264 
1265 	/* Make sure hardware complete it */
1266 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1267 		dmar_readq, (!(val & DMA_TLB_IVT)), val);
1268 
1269 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1270 
1271 	/* check IOTLB invalidation granularity */
1272 	if (DMA_TLB_IAIG(val) == 0)
1273 		printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1274 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1275 		pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1276 			(unsigned long long)DMA_TLB_IIRG(type),
1277 			(unsigned long long)DMA_TLB_IAIG(val));
1278 }
1279 
1280 static struct device_domain_info *
iommu_support_dev_iotlb(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)1281 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1282 			 u8 bus, u8 devfn)
1283 {
1284 	bool found = false;
1285 	unsigned long flags;
1286 	struct device_domain_info *info;
1287 	struct pci_dev *pdev;
1288 
1289 	if (!ecap_dev_iotlb_support(iommu->ecap))
1290 		return NULL;
1291 
1292 	if (!iommu->qi)
1293 		return NULL;
1294 
1295 	spin_lock_irqsave(&device_domain_lock, flags);
1296 	list_for_each_entry(info, &domain->devices, link)
1297 		if (info->iommu == iommu && info->bus == bus &&
1298 		    info->devfn == devfn) {
1299 			found = true;
1300 			break;
1301 		}
1302 	spin_unlock_irqrestore(&device_domain_lock, flags);
1303 
1304 	if (!found || !info->dev || !dev_is_pci(info->dev))
1305 		return NULL;
1306 
1307 	pdev = to_pci_dev(info->dev);
1308 
1309 	if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
1310 		return NULL;
1311 
1312 	if (!dmar_find_matched_atsr_unit(pdev))
1313 		return NULL;
1314 
1315 	return info;
1316 }
1317 
iommu_enable_dev_iotlb(struct device_domain_info * info)1318 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1319 {
1320 	if (!info || !dev_is_pci(info->dev))
1321 		return;
1322 
1323 	pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
1324 }
1325 
iommu_disable_dev_iotlb(struct device_domain_info * info)1326 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1327 {
1328 	if (!info->dev || !dev_is_pci(info->dev) ||
1329 	    !pci_ats_enabled(to_pci_dev(info->dev)))
1330 		return;
1331 
1332 	pci_disable_ats(to_pci_dev(info->dev));
1333 }
1334 
iommu_flush_dev_iotlb(struct dmar_domain * domain,u64 addr,unsigned mask)1335 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1336 				  u64 addr, unsigned mask)
1337 {
1338 	u16 sid, qdep;
1339 	unsigned long flags;
1340 	struct device_domain_info *info;
1341 
1342 	spin_lock_irqsave(&device_domain_lock, flags);
1343 	list_for_each_entry(info, &domain->devices, link) {
1344 		struct pci_dev *pdev;
1345 		if (!info->dev || !dev_is_pci(info->dev))
1346 			continue;
1347 
1348 		pdev = to_pci_dev(info->dev);
1349 		if (!pci_ats_enabled(pdev))
1350 			continue;
1351 
1352 		sid = info->bus << 8 | info->devfn;
1353 		qdep = pci_ats_queue_depth(pdev);
1354 		qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1355 	}
1356 	spin_unlock_irqrestore(&device_domain_lock, flags);
1357 }
1358 
iommu_flush_iotlb_psi(struct intel_iommu * iommu,u16 did,unsigned long pfn,unsigned int pages,int ih,int map)1359 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1360 				  unsigned long pfn, unsigned int pages, int ih, int map)
1361 {
1362 	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1363 	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1364 
1365 	BUG_ON(pages == 0);
1366 
1367 	if (ih)
1368 		ih = 1 << 6;
1369 	/*
1370 	 * Fallback to domain selective flush if no PSI support or the size is
1371 	 * too big.
1372 	 * PSI requires page size to be 2 ^ x, and the base address is naturally
1373 	 * aligned to the size
1374 	 */
1375 	if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1376 		iommu->flush.flush_iotlb(iommu, did, 0, 0,
1377 						DMA_TLB_DSI_FLUSH);
1378 	else
1379 		iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1380 						DMA_TLB_PSI_FLUSH);
1381 
1382 	/*
1383 	 * In caching mode, changes of pages from non-present to present require
1384 	 * flush. However, device IOTLB doesn't need to be flushed in this case.
1385 	 */
1386 	if (!cap_caching_mode(iommu->cap) || !map)
1387 		iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1388 }
1389 
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)1390 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1391 {
1392 	u32 pmen;
1393 	unsigned long flags;
1394 
1395 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1396 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
1397 	pmen &= ~DMA_PMEN_EPM;
1398 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
1399 
1400 	/* wait for the protected region status bit to clear */
1401 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1402 		readl, !(pmen & DMA_PMEN_PRS), pmen);
1403 
1404 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1405 }
1406 
iommu_enable_translation(struct intel_iommu * iommu)1407 static void iommu_enable_translation(struct intel_iommu *iommu)
1408 {
1409 	u32 sts;
1410 	unsigned long flags;
1411 
1412 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1413 	iommu->gcmd |= DMA_GCMD_TE;
1414 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1415 
1416 	/* Make sure hardware complete it */
1417 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1418 		      readl, (sts & DMA_GSTS_TES), sts);
1419 
1420 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1421 }
1422 
iommu_disable_translation(struct intel_iommu * iommu)1423 static void iommu_disable_translation(struct intel_iommu *iommu)
1424 {
1425 	u32 sts;
1426 	unsigned long flag;
1427 
1428 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1429 	iommu->gcmd &= ~DMA_GCMD_TE;
1430 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1431 
1432 	/* Make sure hardware complete it */
1433 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1434 		      readl, (!(sts & DMA_GSTS_TES)), sts);
1435 
1436 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1437 }
1438 
1439 
iommu_init_domains(struct intel_iommu * iommu)1440 static int iommu_init_domains(struct intel_iommu *iommu)
1441 {
1442 	unsigned long ndomains;
1443 	unsigned long nlongs;
1444 
1445 	ndomains = cap_ndoms(iommu->cap);
1446 	pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1447 		 iommu->seq_id, ndomains);
1448 	nlongs = BITS_TO_LONGS(ndomains);
1449 
1450 	spin_lock_init(&iommu->lock);
1451 
1452 	/* TBD: there might be 64K domains,
1453 	 * consider other allocation for future chip
1454 	 */
1455 	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1456 	if (!iommu->domain_ids) {
1457 		pr_err("IOMMU%d: allocating domain id array failed\n",
1458 		       iommu->seq_id);
1459 		return -ENOMEM;
1460 	}
1461 	iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1462 			GFP_KERNEL);
1463 	if (!iommu->domains) {
1464 		pr_err("IOMMU%d: allocating domain array failed\n",
1465 		       iommu->seq_id);
1466 		kfree(iommu->domain_ids);
1467 		iommu->domain_ids = NULL;
1468 		return -ENOMEM;
1469 	}
1470 
1471 	/*
1472 	 * if Caching mode is set, then invalid translations are tagged
1473 	 * with domainid 0. Hence we need to pre-allocate it.
1474 	 */
1475 	if (cap_caching_mode(iommu->cap))
1476 		set_bit(0, iommu->domain_ids);
1477 	return 0;
1478 }
1479 
disable_dmar_iommu(struct intel_iommu * iommu)1480 static void disable_dmar_iommu(struct intel_iommu *iommu)
1481 {
1482 	struct dmar_domain *domain;
1483 	int i;
1484 
1485 	if ((iommu->domains) && (iommu->domain_ids)) {
1486 		for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1487 			/*
1488 			 * Domain id 0 is reserved for invalid translation
1489 			 * if hardware supports caching mode.
1490 			 */
1491 			if (cap_caching_mode(iommu->cap) && i == 0)
1492 				continue;
1493 
1494 			domain = iommu->domains[i];
1495 			clear_bit(i, iommu->domain_ids);
1496 			if (domain_detach_iommu(domain, iommu) == 0 &&
1497 			    !domain_type_is_vm(domain))
1498 				domain_exit(domain);
1499 		}
1500 	}
1501 
1502 	if (iommu->gcmd & DMA_GCMD_TE)
1503 		iommu_disable_translation(iommu);
1504 }
1505 
free_dmar_iommu(struct intel_iommu * iommu)1506 static void free_dmar_iommu(struct intel_iommu *iommu)
1507 {
1508 	if ((iommu->domains) && (iommu->domain_ids)) {
1509 		kfree(iommu->domains);
1510 		kfree(iommu->domain_ids);
1511 		iommu->domains = NULL;
1512 		iommu->domain_ids = NULL;
1513 	}
1514 
1515 	g_iommus[iommu->seq_id] = NULL;
1516 
1517 	/* free context mapping */
1518 	free_context_table(iommu);
1519 }
1520 
alloc_domain(int flags)1521 static struct dmar_domain *alloc_domain(int flags)
1522 {
1523 	/* domain id for virtual machine, it won't be set in context */
1524 	static atomic_t vm_domid = ATOMIC_INIT(0);
1525 	struct dmar_domain *domain;
1526 
1527 	domain = alloc_domain_mem();
1528 	if (!domain)
1529 		return NULL;
1530 
1531 	memset(domain, 0, sizeof(*domain));
1532 	domain->nid = -1;
1533 	domain->flags = flags;
1534 	spin_lock_init(&domain->iommu_lock);
1535 	INIT_LIST_HEAD(&domain->devices);
1536 	if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1537 		domain->id = atomic_inc_return(&vm_domid);
1538 
1539 	return domain;
1540 }
1541 
__iommu_attach_domain(struct dmar_domain * domain,struct intel_iommu * iommu)1542 static int __iommu_attach_domain(struct dmar_domain *domain,
1543 				 struct intel_iommu *iommu)
1544 {
1545 	int num;
1546 	unsigned long ndomains;
1547 
1548 	ndomains = cap_ndoms(iommu->cap);
1549 	num = find_first_zero_bit(iommu->domain_ids, ndomains);
1550 	if (num < ndomains) {
1551 		set_bit(num, iommu->domain_ids);
1552 		iommu->domains[num] = domain;
1553 	} else {
1554 		num = -ENOSPC;
1555 	}
1556 
1557 	return num;
1558 }
1559 
iommu_attach_domain(struct dmar_domain * domain,struct intel_iommu * iommu)1560 static int iommu_attach_domain(struct dmar_domain *domain,
1561 			       struct intel_iommu *iommu)
1562 {
1563 	int num;
1564 	unsigned long flags;
1565 
1566 	spin_lock_irqsave(&iommu->lock, flags);
1567 	num = __iommu_attach_domain(domain, iommu);
1568 	spin_unlock_irqrestore(&iommu->lock, flags);
1569 	if (num < 0)
1570 		pr_err("IOMMU: no free domain ids\n");
1571 
1572 	return num;
1573 }
1574 
iommu_attach_vm_domain(struct dmar_domain * domain,struct intel_iommu * iommu)1575 static int iommu_attach_vm_domain(struct dmar_domain *domain,
1576 				  struct intel_iommu *iommu)
1577 {
1578 	int num;
1579 	unsigned long ndomains;
1580 
1581 	ndomains = cap_ndoms(iommu->cap);
1582 	for_each_set_bit(num, iommu->domain_ids, ndomains)
1583 		if (iommu->domains[num] == domain)
1584 			return num;
1585 
1586 	return __iommu_attach_domain(domain, iommu);
1587 }
1588 
iommu_detach_domain(struct dmar_domain * domain,struct intel_iommu * iommu)1589 static void iommu_detach_domain(struct dmar_domain *domain,
1590 				struct intel_iommu *iommu)
1591 {
1592 	unsigned long flags;
1593 	int num, ndomains;
1594 
1595 	spin_lock_irqsave(&iommu->lock, flags);
1596 	if (domain_type_is_vm_or_si(domain)) {
1597 		ndomains = cap_ndoms(iommu->cap);
1598 		for_each_set_bit(num, iommu->domain_ids, ndomains) {
1599 			if (iommu->domains[num] == domain) {
1600 				clear_bit(num, iommu->domain_ids);
1601 				iommu->domains[num] = NULL;
1602 				break;
1603 			}
1604 		}
1605 	} else {
1606 		clear_bit(domain->id, iommu->domain_ids);
1607 		iommu->domains[domain->id] = NULL;
1608 	}
1609 	spin_unlock_irqrestore(&iommu->lock, flags);
1610 }
1611 
domain_attach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1612 static void domain_attach_iommu(struct dmar_domain *domain,
1613 			       struct intel_iommu *iommu)
1614 {
1615 	unsigned long flags;
1616 
1617 	spin_lock_irqsave(&domain->iommu_lock, flags);
1618 	if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1619 		domain->iommu_count++;
1620 		if (domain->iommu_count == 1)
1621 			domain->nid = iommu->node;
1622 		domain_update_iommu_cap(domain);
1623 	}
1624 	spin_unlock_irqrestore(&domain->iommu_lock, flags);
1625 }
1626 
domain_detach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1627 static int domain_detach_iommu(struct dmar_domain *domain,
1628 			       struct intel_iommu *iommu)
1629 {
1630 	unsigned long flags;
1631 	int count = INT_MAX;
1632 
1633 	spin_lock_irqsave(&domain->iommu_lock, flags);
1634 	if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1635 		count = --domain->iommu_count;
1636 		domain_update_iommu_cap(domain);
1637 	}
1638 	spin_unlock_irqrestore(&domain->iommu_lock, flags);
1639 
1640 	return count;
1641 }
1642 
1643 static struct iova_domain reserved_iova_list;
1644 static struct lock_class_key reserved_rbtree_key;
1645 
dmar_init_reserved_ranges(void)1646 static int dmar_init_reserved_ranges(void)
1647 {
1648 	struct pci_dev *pdev = NULL;
1649 	struct iova *iova;
1650 	int i;
1651 
1652 	init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1653 			DMA_32BIT_PFN);
1654 
1655 	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1656 		&reserved_rbtree_key);
1657 
1658 	/* IOAPIC ranges shouldn't be accessed by DMA */
1659 	iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1660 		IOVA_PFN(IOAPIC_RANGE_END));
1661 	if (!iova) {
1662 		printk(KERN_ERR "Reserve IOAPIC range failed\n");
1663 		return -ENODEV;
1664 	}
1665 
1666 	/* Reserve all PCI MMIO to avoid peer-to-peer access */
1667 	for_each_pci_dev(pdev) {
1668 		struct resource *r;
1669 
1670 		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1671 			r = &pdev->resource[i];
1672 			if (!r->flags || !(r->flags & IORESOURCE_MEM))
1673 				continue;
1674 			iova = reserve_iova(&reserved_iova_list,
1675 					    IOVA_PFN(r->start),
1676 					    IOVA_PFN(r->end));
1677 			if (!iova) {
1678 				printk(KERN_ERR "Reserve iova failed\n");
1679 				return -ENODEV;
1680 			}
1681 		}
1682 	}
1683 	return 0;
1684 }
1685 
domain_reserve_special_ranges(struct dmar_domain * domain)1686 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1687 {
1688 	copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1689 }
1690 
guestwidth_to_adjustwidth(int gaw)1691 static inline int guestwidth_to_adjustwidth(int gaw)
1692 {
1693 	int agaw;
1694 	int r = (gaw - 12) % 9;
1695 
1696 	if (r == 0)
1697 		agaw = gaw;
1698 	else
1699 		agaw = gaw + 9 - r;
1700 	if (agaw > 64)
1701 		agaw = 64;
1702 	return agaw;
1703 }
1704 
domain_init(struct dmar_domain * domain,int guest_width)1705 static int domain_init(struct dmar_domain *domain, int guest_width)
1706 {
1707 	struct intel_iommu *iommu;
1708 	int adjust_width, agaw;
1709 	unsigned long sagaw;
1710 
1711 	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1712 			DMA_32BIT_PFN);
1713 	domain_reserve_special_ranges(domain);
1714 
1715 	/* calculate AGAW */
1716 	iommu = domain_get_iommu(domain);
1717 	if (guest_width > cap_mgaw(iommu->cap))
1718 		guest_width = cap_mgaw(iommu->cap);
1719 	domain->gaw = guest_width;
1720 	adjust_width = guestwidth_to_adjustwidth(guest_width);
1721 	agaw = width_to_agaw(adjust_width);
1722 	sagaw = cap_sagaw(iommu->cap);
1723 	if (!test_bit(agaw, &sagaw)) {
1724 		/* hardware doesn't support it, choose a bigger one */
1725 		pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1726 		agaw = find_next_bit(&sagaw, 5, agaw);
1727 		if (agaw >= 5)
1728 			return -ENODEV;
1729 	}
1730 	domain->agaw = agaw;
1731 
1732 	if (ecap_coherent(iommu->ecap))
1733 		domain->iommu_coherency = 1;
1734 	else
1735 		domain->iommu_coherency = 0;
1736 
1737 	if (ecap_sc_support(iommu->ecap))
1738 		domain->iommu_snooping = 1;
1739 	else
1740 		domain->iommu_snooping = 0;
1741 
1742 	if (intel_iommu_superpage)
1743 		domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1744 	else
1745 		domain->iommu_superpage = 0;
1746 
1747 	domain->nid = iommu->node;
1748 
1749 	/* always allocate the top pgd */
1750 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1751 	if (!domain->pgd)
1752 		return -ENOMEM;
1753 	__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1754 	return 0;
1755 }
1756 
domain_exit(struct dmar_domain * domain)1757 static void domain_exit(struct dmar_domain *domain)
1758 {
1759 	struct dmar_drhd_unit *drhd;
1760 	struct intel_iommu *iommu;
1761 	struct page *freelist = NULL;
1762 
1763 	/* Domain 0 is reserved, so dont process it */
1764 	if (!domain)
1765 		return;
1766 
1767 	/* Flush any lazy unmaps that may reference this domain */
1768 	if (!intel_iommu_strict)
1769 		flush_unmaps_timeout(0);
1770 
1771 	/* remove associated devices */
1772 	domain_remove_dev_info(domain);
1773 
1774 	/* destroy iovas */
1775 	put_iova_domain(&domain->iovad);
1776 
1777 	freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1778 
1779 	/* clear attached or cached domains */
1780 	rcu_read_lock();
1781 	for_each_active_iommu(iommu, drhd)
1782 		if (domain_type_is_vm(domain) ||
1783 		    test_bit(iommu->seq_id, domain->iommu_bmp))
1784 			iommu_detach_domain(domain, iommu);
1785 	rcu_read_unlock();
1786 
1787 	dma_free_pagelist(freelist);
1788 
1789 	free_domain_mem(domain);
1790 }
1791 
domain_context_mapping_one(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn,int translation)1792 static int domain_context_mapping_one(struct dmar_domain *domain,
1793 				      struct intel_iommu *iommu,
1794 				      u8 bus, u8 devfn, int translation)
1795 {
1796 	struct context_entry *context;
1797 	unsigned long flags;
1798 	struct dma_pte *pgd;
1799 	int id;
1800 	int agaw;
1801 	struct device_domain_info *info = NULL;
1802 
1803 	pr_debug("Set context mapping for %02x:%02x.%d\n",
1804 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1805 
1806 	BUG_ON(!domain->pgd);
1807 	BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1808 	       translation != CONTEXT_TT_MULTI_LEVEL);
1809 
1810 	spin_lock_irqsave(&iommu->lock, flags);
1811 	context = iommu_context_addr(iommu, bus, devfn, 1);
1812 	spin_unlock_irqrestore(&iommu->lock, flags);
1813 	if (!context)
1814 		return -ENOMEM;
1815 	spin_lock_irqsave(&iommu->lock, flags);
1816 	if (context_present(context)) {
1817 		spin_unlock_irqrestore(&iommu->lock, flags);
1818 		return 0;
1819 	}
1820 
1821 	id = domain->id;
1822 	pgd = domain->pgd;
1823 
1824 	if (domain_type_is_vm_or_si(domain)) {
1825 		if (domain_type_is_vm(domain)) {
1826 			id = iommu_attach_vm_domain(domain, iommu);
1827 			if (id < 0) {
1828 				spin_unlock_irqrestore(&iommu->lock, flags);
1829 				pr_err("IOMMU: no free domain ids\n");
1830 				return -EFAULT;
1831 			}
1832 		}
1833 
1834 		/* Skip top levels of page tables for
1835 		 * iommu which has less agaw than default.
1836 		 * Unnecessary for PT mode.
1837 		 */
1838 		if (translation != CONTEXT_TT_PASS_THROUGH) {
1839 			for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1840 				pgd = phys_to_virt(dma_pte_addr(pgd));
1841 				if (!dma_pte_present(pgd)) {
1842 					spin_unlock_irqrestore(&iommu->lock, flags);
1843 					return -ENOMEM;
1844 				}
1845 			}
1846 		}
1847 	}
1848 
1849 	context_set_domain_id(context, id);
1850 
1851 	if (translation != CONTEXT_TT_PASS_THROUGH) {
1852 		info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1853 		translation = info ? CONTEXT_TT_DEV_IOTLB :
1854 				     CONTEXT_TT_MULTI_LEVEL;
1855 	}
1856 	/*
1857 	 * In pass through mode, AW must be programmed to indicate the largest
1858 	 * AGAW value supported by hardware. And ASR is ignored by hardware.
1859 	 */
1860 	if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1861 		context_set_address_width(context, iommu->msagaw);
1862 	else {
1863 		context_set_address_root(context, virt_to_phys(pgd));
1864 		context_set_address_width(context, iommu->agaw);
1865 	}
1866 
1867 	context_set_translation_type(context, translation);
1868 	context_set_fault_enable(context);
1869 	context_set_present(context);
1870 	domain_flush_cache(domain, context, sizeof(*context));
1871 
1872 	/*
1873 	 * It's a non-present to present mapping. If hardware doesn't cache
1874 	 * non-present entry we only need to flush the write-buffer. If the
1875 	 * _does_ cache non-present entries, then it does so in the special
1876 	 * domain #0, which we have to flush:
1877 	 */
1878 	if (cap_caching_mode(iommu->cap)) {
1879 		iommu->flush.flush_context(iommu, 0,
1880 					   (((u16)bus) << 8) | devfn,
1881 					   DMA_CCMD_MASK_NOBIT,
1882 					   DMA_CCMD_DEVICE_INVL);
1883 		iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
1884 	} else {
1885 		iommu_flush_write_buffer(iommu);
1886 	}
1887 	iommu_enable_dev_iotlb(info);
1888 	spin_unlock_irqrestore(&iommu->lock, flags);
1889 
1890 	domain_attach_iommu(domain, iommu);
1891 
1892 	return 0;
1893 }
1894 
1895 struct domain_context_mapping_data {
1896 	struct dmar_domain *domain;
1897 	struct intel_iommu *iommu;
1898 	int translation;
1899 };
1900 
domain_context_mapping_cb(struct pci_dev * pdev,u16 alias,void * opaque)1901 static int domain_context_mapping_cb(struct pci_dev *pdev,
1902 				     u16 alias, void *opaque)
1903 {
1904 	struct domain_context_mapping_data *data = opaque;
1905 
1906 	return domain_context_mapping_one(data->domain, data->iommu,
1907 					  PCI_BUS_NUM(alias), alias & 0xff,
1908 					  data->translation);
1909 }
1910 
1911 static int
domain_context_mapping(struct dmar_domain * domain,struct device * dev,int translation)1912 domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1913 		       int translation)
1914 {
1915 	struct intel_iommu *iommu;
1916 	u8 bus, devfn;
1917 	struct domain_context_mapping_data data;
1918 
1919 	iommu = device_to_iommu(dev, &bus, &devfn);
1920 	if (!iommu)
1921 		return -ENODEV;
1922 
1923 	if (!dev_is_pci(dev))
1924 		return domain_context_mapping_one(domain, iommu, bus, devfn,
1925 						  translation);
1926 
1927 	data.domain = domain;
1928 	data.iommu = iommu;
1929 	data.translation = translation;
1930 
1931 	return pci_for_each_dma_alias(to_pci_dev(dev),
1932 				      &domain_context_mapping_cb, &data);
1933 }
1934 
domain_context_mapped_cb(struct pci_dev * pdev,u16 alias,void * opaque)1935 static int domain_context_mapped_cb(struct pci_dev *pdev,
1936 				    u16 alias, void *opaque)
1937 {
1938 	struct intel_iommu *iommu = opaque;
1939 
1940 	return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
1941 }
1942 
domain_context_mapped(struct device * dev)1943 static int domain_context_mapped(struct device *dev)
1944 {
1945 	struct intel_iommu *iommu;
1946 	u8 bus, devfn;
1947 
1948 	iommu = device_to_iommu(dev, &bus, &devfn);
1949 	if (!iommu)
1950 		return -ENODEV;
1951 
1952 	if (!dev_is_pci(dev))
1953 		return device_context_mapped(iommu, bus, devfn);
1954 
1955 	return !pci_for_each_dma_alias(to_pci_dev(dev),
1956 				       domain_context_mapped_cb, iommu);
1957 }
1958 
1959 /* Returns a number of VTD pages, but aligned to MM page size */
aligned_nrpages(unsigned long host_addr,size_t size)1960 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1961 					    size_t size)
1962 {
1963 	host_addr &= ~PAGE_MASK;
1964 	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1965 }
1966 
1967 /* Return largest possible superpage level for a given mapping */
hardware_largepage_caps(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phy_pfn,unsigned long pages)1968 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1969 					  unsigned long iov_pfn,
1970 					  unsigned long phy_pfn,
1971 					  unsigned long pages)
1972 {
1973 	int support, level = 1;
1974 	unsigned long pfnmerge;
1975 
1976 	support = domain->iommu_superpage;
1977 
1978 	/* To use a large page, the virtual *and* physical addresses
1979 	   must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1980 	   of them will mean we have to use smaller pages. So just
1981 	   merge them and check both at once. */
1982 	pfnmerge = iov_pfn | phy_pfn;
1983 
1984 	while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1985 		pages >>= VTD_STRIDE_SHIFT;
1986 		if (!pages)
1987 			break;
1988 		pfnmerge >>= VTD_STRIDE_SHIFT;
1989 		level++;
1990 		support--;
1991 	}
1992 	return level;
1993 }
1994 
__domain_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long phys_pfn,unsigned long nr_pages,int prot)1995 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1996 			    struct scatterlist *sg, unsigned long phys_pfn,
1997 			    unsigned long nr_pages, int prot)
1998 {
1999 	struct dma_pte *first_pte = NULL, *pte = NULL;
2000 	phys_addr_t uninitialized_var(pteval);
2001 	unsigned long sg_res = 0;
2002 	unsigned int largepage_lvl = 0;
2003 	unsigned long lvl_pages = 0;
2004 
2005 	BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2006 
2007 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2008 		return -EINVAL;
2009 
2010 	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2011 
2012 	if (!sg) {
2013 		sg_res = nr_pages;
2014 		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2015 	}
2016 
2017 	while (nr_pages > 0) {
2018 		uint64_t tmp;
2019 
2020 		if (!sg_res) {
2021 			sg_res = aligned_nrpages(sg->offset, sg->length);
2022 			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2023 			sg->dma_length = sg->length;
2024 			pteval = page_to_phys(sg_page(sg)) | prot;
2025 			phys_pfn = pteval >> VTD_PAGE_SHIFT;
2026 		}
2027 
2028 		if (!pte) {
2029 			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2030 
2031 			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2032 			if (!pte)
2033 				return -ENOMEM;
2034 			/* It is large page*/
2035 			if (largepage_lvl > 1) {
2036 				unsigned long nr_superpages, end_pfn;
2037 
2038 				pteval |= DMA_PTE_LARGE_PAGE;
2039 				lvl_pages = lvl_to_nr_pages(largepage_lvl);
2040 
2041 				nr_superpages = sg_res / lvl_pages;
2042 				end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2043 
2044 				/*
2045 				 * Ensure that old small page tables are
2046 				 * removed to make room for superpage(s).
2047 				 */
2048 				dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
2049 			} else {
2050 				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2051 			}
2052 
2053 		}
2054 		/* We don't need lock here, nobody else
2055 		 * touches the iova range
2056 		 */
2057 		tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2058 		if (tmp) {
2059 			static int dumps = 5;
2060 			printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2061 			       iov_pfn, tmp, (unsigned long long)pteval);
2062 			if (dumps) {
2063 				dumps--;
2064 				debug_dma_dump_mappings(NULL);
2065 			}
2066 			WARN_ON(1);
2067 		}
2068 
2069 		lvl_pages = lvl_to_nr_pages(largepage_lvl);
2070 
2071 		BUG_ON(nr_pages < lvl_pages);
2072 		BUG_ON(sg_res < lvl_pages);
2073 
2074 		nr_pages -= lvl_pages;
2075 		iov_pfn += lvl_pages;
2076 		phys_pfn += lvl_pages;
2077 		pteval += lvl_pages * VTD_PAGE_SIZE;
2078 		sg_res -= lvl_pages;
2079 
2080 		/* If the next PTE would be the first in a new page, then we
2081 		   need to flush the cache on the entries we've just written.
2082 		   And then we'll need to recalculate 'pte', so clear it and
2083 		   let it get set again in the if (!pte) block above.
2084 
2085 		   If we're done (!nr_pages) we need to flush the cache too.
2086 
2087 		   Also if we've been setting superpages, we may need to
2088 		   recalculate 'pte' and switch back to smaller pages for the
2089 		   end of the mapping, if the trailing size is not enough to
2090 		   use another superpage (i.e. sg_res < lvl_pages). */
2091 		pte++;
2092 		if (!nr_pages || first_pte_in_page(pte) ||
2093 		    (largepage_lvl > 1 && sg_res < lvl_pages)) {
2094 			domain_flush_cache(domain, first_pte,
2095 					   (void *)pte - (void *)first_pte);
2096 			pte = NULL;
2097 		}
2098 
2099 		if (!sg_res && nr_pages)
2100 			sg = sg_next(sg);
2101 	}
2102 	return 0;
2103 }
2104 
domain_sg_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long nr_pages,int prot)2105 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2106 				    struct scatterlist *sg, unsigned long nr_pages,
2107 				    int prot)
2108 {
2109 	return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2110 }
2111 
domain_pfn_mapping(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phys_pfn,unsigned long nr_pages,int prot)2112 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2113 				     unsigned long phys_pfn, unsigned long nr_pages,
2114 				     int prot)
2115 {
2116 	return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2117 }
2118 
iommu_detach_dev(struct intel_iommu * iommu,u8 bus,u8 devfn)2119 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
2120 {
2121 	if (!iommu)
2122 		return;
2123 
2124 	clear_context_table(iommu, bus, devfn);
2125 	iommu->flush.flush_context(iommu, 0, 0, 0,
2126 					   DMA_CCMD_GLOBAL_INVL);
2127 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2128 }
2129 
unlink_domain_info(struct device_domain_info * info)2130 static inline void unlink_domain_info(struct device_domain_info *info)
2131 {
2132 	assert_spin_locked(&device_domain_lock);
2133 	list_del(&info->link);
2134 	list_del(&info->global);
2135 	if (info->dev)
2136 		info->dev->archdata.iommu = NULL;
2137 }
2138 
domain_remove_dev_info(struct dmar_domain * domain)2139 static void domain_remove_dev_info(struct dmar_domain *domain)
2140 {
2141 	struct device_domain_info *info, *tmp;
2142 	unsigned long flags;
2143 
2144 	spin_lock_irqsave(&device_domain_lock, flags);
2145 	list_for_each_entry_safe(info, tmp, &domain->devices, link) {
2146 		unlink_domain_info(info);
2147 		spin_unlock_irqrestore(&device_domain_lock, flags);
2148 
2149 		iommu_disable_dev_iotlb(info);
2150 		iommu_detach_dev(info->iommu, info->bus, info->devfn);
2151 
2152 		if (domain_type_is_vm(domain)) {
2153 			iommu_detach_dependent_devices(info->iommu, info->dev);
2154 			domain_detach_iommu(domain, info->iommu);
2155 		}
2156 
2157 		free_devinfo_mem(info);
2158 		spin_lock_irqsave(&device_domain_lock, flags);
2159 	}
2160 	spin_unlock_irqrestore(&device_domain_lock, flags);
2161 }
2162 
2163 /*
2164  * find_domain
2165  * Note: we use struct device->archdata.iommu stores the info
2166  */
find_domain(struct device * dev)2167 static struct dmar_domain *find_domain(struct device *dev)
2168 {
2169 	struct device_domain_info *info;
2170 
2171 	/* No lock here, assumes no domain exit in normal case */
2172 	info = dev->archdata.iommu;
2173 	if (info)
2174 		return info->domain;
2175 	return NULL;
2176 }
2177 
2178 static inline struct device_domain_info *
dmar_search_domain_by_dev_info(int segment,int bus,int devfn)2179 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2180 {
2181 	struct device_domain_info *info;
2182 
2183 	list_for_each_entry(info, &device_domain_list, global)
2184 		if (info->iommu->segment == segment && info->bus == bus &&
2185 		    info->devfn == devfn)
2186 			return info;
2187 
2188 	return NULL;
2189 }
2190 
dmar_insert_dev_info(struct intel_iommu * iommu,int bus,int devfn,struct device * dev,struct dmar_domain * domain)2191 static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
2192 						int bus, int devfn,
2193 						struct device *dev,
2194 						struct dmar_domain *domain)
2195 {
2196 	struct dmar_domain *found = NULL;
2197 	struct device_domain_info *info;
2198 	unsigned long flags;
2199 
2200 	info = alloc_devinfo_mem();
2201 	if (!info)
2202 		return NULL;
2203 
2204 	info->bus = bus;
2205 	info->devfn = devfn;
2206 	info->dev = dev;
2207 	info->domain = domain;
2208 	info->iommu = iommu;
2209 
2210 	spin_lock_irqsave(&device_domain_lock, flags);
2211 	if (dev)
2212 		found = find_domain(dev);
2213 	else {
2214 		struct device_domain_info *info2;
2215 		info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2216 		if (info2)
2217 			found = info2->domain;
2218 	}
2219 	if (found) {
2220 		spin_unlock_irqrestore(&device_domain_lock, flags);
2221 		free_devinfo_mem(info);
2222 		/* Caller must free the original domain */
2223 		return found;
2224 	}
2225 
2226 	list_add(&info->link, &domain->devices);
2227 	list_add(&info->global, &device_domain_list);
2228 	if (dev)
2229 		dev->archdata.iommu = info;
2230 	spin_unlock_irqrestore(&device_domain_lock, flags);
2231 
2232 	return domain;
2233 }
2234 
get_last_alias(struct pci_dev * pdev,u16 alias,void * opaque)2235 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2236 {
2237 	*(u16 *)opaque = alias;
2238 	return 0;
2239 }
2240 
2241 /* domain is initialized */
get_domain_for_dev(struct device * dev,int gaw)2242 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2243 {
2244 	struct dmar_domain *domain, *tmp;
2245 	struct intel_iommu *iommu;
2246 	struct device_domain_info *info;
2247 	u16 dma_alias;
2248 	unsigned long flags;
2249 	u8 bus, devfn;
2250 
2251 	domain = find_domain(dev);
2252 	if (domain)
2253 		return domain;
2254 
2255 	iommu = device_to_iommu(dev, &bus, &devfn);
2256 	if (!iommu)
2257 		return NULL;
2258 
2259 	if (dev_is_pci(dev)) {
2260 		struct pci_dev *pdev = to_pci_dev(dev);
2261 
2262 		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2263 
2264 		spin_lock_irqsave(&device_domain_lock, flags);
2265 		info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2266 						      PCI_BUS_NUM(dma_alias),
2267 						      dma_alias & 0xff);
2268 		if (info) {
2269 			iommu = info->iommu;
2270 			domain = info->domain;
2271 		}
2272 		spin_unlock_irqrestore(&device_domain_lock, flags);
2273 
2274 		/* DMA alias already has a domain, uses it */
2275 		if (info)
2276 			goto found_domain;
2277 	}
2278 
2279 	/* Allocate and initialize new domain for the device */
2280 	domain = alloc_domain(0);
2281 	if (!domain)
2282 		return NULL;
2283 	domain->id = iommu_attach_domain(domain, iommu);
2284 	if (domain->id < 0) {
2285 		free_domain_mem(domain);
2286 		return NULL;
2287 	}
2288 	domain_attach_iommu(domain, iommu);
2289 	if (domain_init(domain, gaw)) {
2290 		domain_exit(domain);
2291 		return NULL;
2292 	}
2293 
2294 	/* register PCI DMA alias device */
2295 	if (dev_is_pci(dev)) {
2296 		tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2297 					   dma_alias & 0xff, NULL, domain);
2298 
2299 		if (!tmp || tmp != domain) {
2300 			domain_exit(domain);
2301 			domain = tmp;
2302 		}
2303 
2304 		if (!domain)
2305 			return NULL;
2306 	}
2307 
2308 found_domain:
2309 	tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2310 
2311 	if (!tmp || tmp != domain) {
2312 		domain_exit(domain);
2313 		domain = tmp;
2314 	}
2315 
2316 	return domain;
2317 }
2318 
2319 static int iommu_identity_mapping;
2320 #define IDENTMAP_ALL		1
2321 #define IDENTMAP_GFX		2
2322 #define IDENTMAP_AZALIA		4
2323 
iommu_domain_identity_map(struct dmar_domain * domain,unsigned long long start,unsigned long long end)2324 static int iommu_domain_identity_map(struct dmar_domain *domain,
2325 				     unsigned long long start,
2326 				     unsigned long long end)
2327 {
2328 	unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2329 	unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2330 
2331 	if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2332 			  dma_to_mm_pfn(last_vpfn))) {
2333 		printk(KERN_ERR "IOMMU: reserve iova failed\n");
2334 		return -ENOMEM;
2335 	}
2336 
2337 	pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2338 		 start, end, domain->id);
2339 	/*
2340 	 * RMRR range might have overlap with physical memory range,
2341 	 * clear it first
2342 	 */
2343 	dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2344 
2345 	return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2346 				  last_vpfn - first_vpfn + 1,
2347 				  DMA_PTE_READ|DMA_PTE_WRITE);
2348 }
2349 
iommu_prepare_identity_map(struct device * dev,unsigned long long start,unsigned long long end)2350 static int iommu_prepare_identity_map(struct device *dev,
2351 				      unsigned long long start,
2352 				      unsigned long long end)
2353 {
2354 	struct dmar_domain *domain;
2355 	int ret;
2356 
2357 	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2358 	if (!domain)
2359 		return -ENOMEM;
2360 
2361 	/* For _hardware_ passthrough, don't bother. But for software
2362 	   passthrough, we do it anyway -- it may indicate a memory
2363 	   range which is reserved in E820, so which didn't get set
2364 	   up to start with in si_domain */
2365 	if (domain == si_domain && hw_pass_through) {
2366 		printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2367 		       dev_name(dev), start, end);
2368 		return 0;
2369 	}
2370 
2371 	printk(KERN_INFO
2372 	       "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2373 	       dev_name(dev), start, end);
2374 
2375 	if (end < start) {
2376 		WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2377 			"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2378 			dmi_get_system_info(DMI_BIOS_VENDOR),
2379 			dmi_get_system_info(DMI_BIOS_VERSION),
2380 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2381 		ret = -EIO;
2382 		goto error;
2383 	}
2384 
2385 	if (end >> agaw_to_width(domain->agaw)) {
2386 		WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2387 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2388 		     agaw_to_width(domain->agaw),
2389 		     dmi_get_system_info(DMI_BIOS_VENDOR),
2390 		     dmi_get_system_info(DMI_BIOS_VERSION),
2391 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2392 		ret = -EIO;
2393 		goto error;
2394 	}
2395 
2396 	ret = iommu_domain_identity_map(domain, start, end);
2397 	if (ret)
2398 		goto error;
2399 
2400 	/* context entry init */
2401 	ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2402 	if (ret)
2403 		goto error;
2404 
2405 	return 0;
2406 
2407  error:
2408 	domain_exit(domain);
2409 	return ret;
2410 }
2411 
iommu_prepare_rmrr_dev(struct dmar_rmrr_unit * rmrr,struct device * dev)2412 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2413 					 struct device *dev)
2414 {
2415 	if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2416 		return 0;
2417 	return iommu_prepare_identity_map(dev, rmrr->base_address,
2418 					  rmrr->end_address);
2419 }
2420 
2421 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
iommu_prepare_isa(void)2422 static inline void iommu_prepare_isa(void)
2423 {
2424 	struct pci_dev *pdev;
2425 	int ret;
2426 
2427 	pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2428 	if (!pdev)
2429 		return;
2430 
2431 	printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2432 	ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2433 
2434 	if (ret)
2435 		printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2436 		       "floppy might not work\n");
2437 
2438 	pci_dev_put(pdev);
2439 }
2440 #else
iommu_prepare_isa(void)2441 static inline void iommu_prepare_isa(void)
2442 {
2443 	return;
2444 }
2445 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2446 
2447 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2448 
si_domain_init(int hw)2449 static int __init si_domain_init(int hw)
2450 {
2451 	struct dmar_drhd_unit *drhd;
2452 	struct intel_iommu *iommu;
2453 	int nid, ret = 0;
2454 	bool first = true;
2455 
2456 	si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2457 	if (!si_domain)
2458 		return -EFAULT;
2459 
2460 	for_each_active_iommu(iommu, drhd) {
2461 		ret = iommu_attach_domain(si_domain, iommu);
2462 		if (ret < 0) {
2463 			domain_exit(si_domain);
2464 			return -EFAULT;
2465 		} else if (first) {
2466 			si_domain->id = ret;
2467 			first = false;
2468 		} else if (si_domain->id != ret) {
2469 			domain_exit(si_domain);
2470 			return -EFAULT;
2471 		}
2472 		domain_attach_iommu(si_domain, iommu);
2473 	}
2474 
2475 	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2476 		domain_exit(si_domain);
2477 		return -EFAULT;
2478 	}
2479 
2480 	pr_debug("IOMMU: identity mapping domain is domain %d\n",
2481 		 si_domain->id);
2482 
2483 	if (hw)
2484 		return 0;
2485 
2486 	for_each_online_node(nid) {
2487 		unsigned long start_pfn, end_pfn;
2488 		int i;
2489 
2490 		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2491 			ret = iommu_domain_identity_map(si_domain,
2492 					PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2493 			if (ret)
2494 				return ret;
2495 		}
2496 	}
2497 
2498 	return 0;
2499 }
2500 
identity_mapping(struct device * dev)2501 static int identity_mapping(struct device *dev)
2502 {
2503 	struct device_domain_info *info;
2504 
2505 	if (likely(!iommu_identity_mapping))
2506 		return 0;
2507 
2508 	info = dev->archdata.iommu;
2509 	if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2510 		return (info->domain == si_domain);
2511 
2512 	return 0;
2513 }
2514 
domain_add_dev_info(struct dmar_domain * domain,struct device * dev,int translation)2515 static int domain_add_dev_info(struct dmar_domain *domain,
2516 			       struct device *dev, int translation)
2517 {
2518 	struct dmar_domain *ndomain;
2519 	struct intel_iommu *iommu;
2520 	u8 bus, devfn;
2521 	int ret;
2522 
2523 	iommu = device_to_iommu(dev, &bus, &devfn);
2524 	if (!iommu)
2525 		return -ENODEV;
2526 
2527 	ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2528 	if (ndomain != domain)
2529 		return -EBUSY;
2530 
2531 	ret = domain_context_mapping(domain, dev, translation);
2532 	if (ret) {
2533 		domain_remove_one_dev_info(domain, dev);
2534 		return ret;
2535 	}
2536 
2537 	return 0;
2538 }
2539 
device_has_rmrr(struct device * dev)2540 static bool device_has_rmrr(struct device *dev)
2541 {
2542 	struct dmar_rmrr_unit *rmrr;
2543 	struct device *tmp;
2544 	int i;
2545 
2546 	rcu_read_lock();
2547 	for_each_rmrr_units(rmrr) {
2548 		/*
2549 		 * Return TRUE if this RMRR contains the device that
2550 		 * is passed in.
2551 		 */
2552 		for_each_active_dev_scope(rmrr->devices,
2553 					  rmrr->devices_cnt, i, tmp)
2554 			if (tmp == dev) {
2555 				rcu_read_unlock();
2556 				return true;
2557 			}
2558 	}
2559 	rcu_read_unlock();
2560 	return false;
2561 }
2562 
2563 /*
2564  * There are a couple cases where we need to restrict the functionality of
2565  * devices associated with RMRRs.  The first is when evaluating a device for
2566  * identity mapping because problems exist when devices are moved in and out
2567  * of domains and their respective RMRR information is lost.  This means that
2568  * a device with associated RMRRs will never be in a "passthrough" domain.
2569  * The second is use of the device through the IOMMU API.  This interface
2570  * expects to have full control of the IOVA space for the device.  We cannot
2571  * satisfy both the requirement that RMRR access is maintained and have an
2572  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2573  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2574  * We therefore prevent devices associated with an RMRR from participating in
2575  * the IOMMU API, which eliminates them from device assignment.
2576  *
2577  * In both cases we assume that PCI USB devices with RMRRs have them largely
2578  * for historical reasons and that the RMRR space is not actively used post
2579  * boot.  This exclusion may change if vendors begin to abuse it.
2580  *
2581  * The same exception is made for graphics devices, with the requirement that
2582  * any use of the RMRR regions will be torn down before assigning the device
2583  * to a guest.
2584  */
device_is_rmrr_locked(struct device * dev)2585 static bool device_is_rmrr_locked(struct device *dev)
2586 {
2587 	if (!device_has_rmrr(dev))
2588 		return false;
2589 
2590 	if (dev_is_pci(dev)) {
2591 		struct pci_dev *pdev = to_pci_dev(dev);
2592 
2593 		if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2594 			return false;
2595 	}
2596 
2597 	return true;
2598 }
2599 
iommu_should_identity_map(struct device * dev,int startup)2600 static int iommu_should_identity_map(struct device *dev, int startup)
2601 {
2602 
2603 	if (dev_is_pci(dev)) {
2604 		struct pci_dev *pdev = to_pci_dev(dev);
2605 
2606 		if (device_is_rmrr_locked(dev))
2607 			return 0;
2608 
2609 		if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2610 			return 1;
2611 
2612 		if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2613 			return 1;
2614 
2615 		if (!(iommu_identity_mapping & IDENTMAP_ALL))
2616 			return 0;
2617 
2618 		/*
2619 		 * We want to start off with all devices in the 1:1 domain, and
2620 		 * take them out later if we find they can't access all of memory.
2621 		 *
2622 		 * However, we can't do this for PCI devices behind bridges,
2623 		 * because all PCI devices behind the same bridge will end up
2624 		 * with the same source-id on their transactions.
2625 		 *
2626 		 * Practically speaking, we can't change things around for these
2627 		 * devices at run-time, because we can't be sure there'll be no
2628 		 * DMA transactions in flight for any of their siblings.
2629 		 *
2630 		 * So PCI devices (unless they're on the root bus) as well as
2631 		 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2632 		 * the 1:1 domain, just in _case_ one of their siblings turns out
2633 		 * not to be able to map all of memory.
2634 		 */
2635 		if (!pci_is_pcie(pdev)) {
2636 			if (!pci_is_root_bus(pdev->bus))
2637 				return 0;
2638 			if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2639 				return 0;
2640 		} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2641 			return 0;
2642 	} else {
2643 		if (device_has_rmrr(dev))
2644 			return 0;
2645 	}
2646 
2647 	/*
2648 	 * At boot time, we don't yet know if devices will be 64-bit capable.
2649 	 * Assume that they will — if they turn out not to be, then we can
2650 	 * take them out of the 1:1 domain later.
2651 	 */
2652 	if (!startup) {
2653 		/*
2654 		 * If the device's dma_mask is less than the system's memory
2655 		 * size then this is not a candidate for identity mapping.
2656 		 */
2657 		u64 dma_mask = *dev->dma_mask;
2658 
2659 		if (dev->coherent_dma_mask &&
2660 		    dev->coherent_dma_mask < dma_mask)
2661 			dma_mask = dev->coherent_dma_mask;
2662 
2663 		return dma_mask >= dma_get_required_mask(dev);
2664 	}
2665 
2666 	return 1;
2667 }
2668 
dev_prepare_static_identity_mapping(struct device * dev,int hw)2669 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2670 {
2671 	int ret;
2672 
2673 	if (!iommu_should_identity_map(dev, 1))
2674 		return 0;
2675 
2676 	ret = domain_add_dev_info(si_domain, dev,
2677 				  hw ? CONTEXT_TT_PASS_THROUGH :
2678 				       CONTEXT_TT_MULTI_LEVEL);
2679 	if (!ret)
2680 		pr_info("IOMMU: %s identity mapping for device %s\n",
2681 			hw ? "hardware" : "software", dev_name(dev));
2682 	else if (ret == -ENODEV)
2683 		/* device not associated with an iommu */
2684 		ret = 0;
2685 
2686 	return ret;
2687 }
2688 
2689 
iommu_prepare_static_identity_mapping(int hw)2690 static int __init iommu_prepare_static_identity_mapping(int hw)
2691 {
2692 	struct pci_dev *pdev = NULL;
2693 	struct dmar_drhd_unit *drhd;
2694 	struct intel_iommu *iommu;
2695 	struct device *dev;
2696 	int i;
2697 	int ret = 0;
2698 
2699 	ret = si_domain_init(hw);
2700 	if (ret)
2701 		return -EFAULT;
2702 
2703 	for_each_pci_dev(pdev) {
2704 		ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2705 		if (ret)
2706 			return ret;
2707 	}
2708 
2709 	for_each_active_iommu(iommu, drhd)
2710 		for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2711 			struct acpi_device_physical_node *pn;
2712 			struct acpi_device *adev;
2713 
2714 			if (dev->bus != &acpi_bus_type)
2715 				continue;
2716 
2717 			adev= to_acpi_device(dev);
2718 			mutex_lock(&adev->physical_node_lock);
2719 			list_for_each_entry(pn, &adev->physical_node_list, node) {
2720 				ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2721 				if (ret)
2722 					break;
2723 			}
2724 			mutex_unlock(&adev->physical_node_lock);
2725 			if (ret)
2726 				return ret;
2727 		}
2728 
2729 	return 0;
2730 }
2731 
intel_iommu_init_qi(struct intel_iommu * iommu)2732 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2733 {
2734 	/*
2735 	 * Start from the sane iommu hardware state.
2736 	 * If the queued invalidation is already initialized by us
2737 	 * (for example, while enabling interrupt-remapping) then
2738 	 * we got the things already rolling from a sane state.
2739 	 */
2740 	if (!iommu->qi) {
2741 		/*
2742 		 * Clear any previous faults.
2743 		 */
2744 		dmar_fault(-1, iommu);
2745 		/*
2746 		 * Disable queued invalidation if supported and already enabled
2747 		 * before OS handover.
2748 		 */
2749 		dmar_disable_qi(iommu);
2750 	}
2751 
2752 	if (dmar_enable_qi(iommu)) {
2753 		/*
2754 		 * Queued Invalidate not enabled, use Register Based Invalidate
2755 		 */
2756 		iommu->flush.flush_context = __iommu_flush_context;
2757 		iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2758 		pr_info("IOMMU: %s using Register based invalidation\n",
2759 			iommu->name);
2760 	} else {
2761 		iommu->flush.flush_context = qi_flush_context;
2762 		iommu->flush.flush_iotlb = qi_flush_iotlb;
2763 		pr_info("IOMMU: %s using Queued invalidation\n", iommu->name);
2764 	}
2765 }
2766 
init_dmars(void)2767 static int __init init_dmars(void)
2768 {
2769 	struct dmar_drhd_unit *drhd;
2770 	struct dmar_rmrr_unit *rmrr;
2771 	struct device *dev;
2772 	struct intel_iommu *iommu;
2773 	int i, ret;
2774 
2775 	/*
2776 	 * for each drhd
2777 	 *    allocate root
2778 	 *    initialize and program root entry to not present
2779 	 * endfor
2780 	 */
2781 	for_each_drhd_unit(drhd) {
2782 		/*
2783 		 * lock not needed as this is only incremented in the single
2784 		 * threaded kernel __init code path all other access are read
2785 		 * only
2786 		 */
2787 		if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
2788 			g_num_of_iommus++;
2789 			continue;
2790 		}
2791 		printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2792 			  DMAR_UNITS_SUPPORTED);
2793 	}
2794 
2795 	/* Preallocate enough resources for IOMMU hot-addition */
2796 	if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
2797 		g_num_of_iommus = DMAR_UNITS_SUPPORTED;
2798 
2799 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2800 			GFP_KERNEL);
2801 	if (!g_iommus) {
2802 		printk(KERN_ERR "Allocating global iommu array failed\n");
2803 		ret = -ENOMEM;
2804 		goto error;
2805 	}
2806 
2807 	deferred_flush = kzalloc(g_num_of_iommus *
2808 		sizeof(struct deferred_flush_tables), GFP_KERNEL);
2809 	if (!deferred_flush) {
2810 		ret = -ENOMEM;
2811 		goto free_g_iommus;
2812 	}
2813 
2814 	for_each_active_iommu(iommu, drhd) {
2815 		g_iommus[iommu->seq_id] = iommu;
2816 
2817 		ret = iommu_init_domains(iommu);
2818 		if (ret)
2819 			goto free_iommu;
2820 
2821 		/*
2822 		 * TBD:
2823 		 * we could share the same root & context tables
2824 		 * among all IOMMU's. Need to Split it later.
2825 		 */
2826 		ret = iommu_alloc_root_entry(iommu);
2827 		if (ret)
2828 			goto free_iommu;
2829 		if (!ecap_pass_through(iommu->ecap))
2830 			hw_pass_through = 0;
2831 	}
2832 
2833 	for_each_active_iommu(iommu, drhd)
2834 		intel_iommu_init_qi(iommu);
2835 
2836 	if (iommu_pass_through)
2837 		iommu_identity_mapping |= IDENTMAP_ALL;
2838 
2839 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2840 	iommu_identity_mapping |= IDENTMAP_GFX;
2841 #endif
2842 
2843 	check_tylersburg_isoch();
2844 
2845 	/*
2846 	 * If pass through is not set or not enabled, setup context entries for
2847 	 * identity mappings for rmrr, gfx, and isa and may fall back to static
2848 	 * identity mapping if iommu_identity_mapping is set.
2849 	 */
2850 	if (iommu_identity_mapping) {
2851 		ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2852 		if (ret) {
2853 			printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2854 			goto free_iommu;
2855 		}
2856 	}
2857 	/*
2858 	 * For each rmrr
2859 	 *   for each dev attached to rmrr
2860 	 *   do
2861 	 *     locate drhd for dev, alloc domain for dev
2862 	 *     allocate free domain
2863 	 *     allocate page table entries for rmrr
2864 	 *     if context not allocated for bus
2865 	 *           allocate and init context
2866 	 *           set present in root table for this bus
2867 	 *     init context with domain, translation etc
2868 	 *    endfor
2869 	 * endfor
2870 	 */
2871 	printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2872 	for_each_rmrr_units(rmrr) {
2873 		/* some BIOS lists non-exist devices in DMAR table. */
2874 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2875 					  i, dev) {
2876 			ret = iommu_prepare_rmrr_dev(rmrr, dev);
2877 			if (ret)
2878 				printk(KERN_ERR
2879 				       "IOMMU: mapping reserved region failed\n");
2880 		}
2881 	}
2882 
2883 	iommu_prepare_isa();
2884 
2885 	/*
2886 	 * for each drhd
2887 	 *   enable fault log
2888 	 *   global invalidate context cache
2889 	 *   global invalidate iotlb
2890 	 *   enable translation
2891 	 */
2892 	for_each_iommu(iommu, drhd) {
2893 		if (drhd->ignored) {
2894 			/*
2895 			 * we always have to disable PMRs or DMA may fail on
2896 			 * this device
2897 			 */
2898 			if (force_on)
2899 				iommu_disable_protect_mem_regions(iommu);
2900 			continue;
2901 		}
2902 
2903 		iommu_flush_write_buffer(iommu);
2904 
2905 		ret = dmar_set_interrupt(iommu);
2906 		if (ret)
2907 			goto free_iommu;
2908 
2909 		iommu_set_root_entry(iommu);
2910 
2911 		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2912 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2913 		iommu_enable_translation(iommu);
2914 		iommu_disable_protect_mem_regions(iommu);
2915 	}
2916 
2917 	return 0;
2918 
2919 free_iommu:
2920 	for_each_active_iommu(iommu, drhd) {
2921 		disable_dmar_iommu(iommu);
2922 		free_dmar_iommu(iommu);
2923 	}
2924 	kfree(deferred_flush);
2925 free_g_iommus:
2926 	kfree(g_iommus);
2927 error:
2928 	return ret;
2929 }
2930 
2931 /* This takes a number of _MM_ pages, not VTD pages */
intel_alloc_iova(struct device * dev,struct dmar_domain * domain,unsigned long nrpages,uint64_t dma_mask)2932 static struct iova *intel_alloc_iova(struct device *dev,
2933 				     struct dmar_domain *domain,
2934 				     unsigned long nrpages, uint64_t dma_mask)
2935 {
2936 	struct iova *iova = NULL;
2937 
2938 	/* Restrict dma_mask to the width that the iommu can handle */
2939 	dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2940 
2941 	if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2942 		/*
2943 		 * First try to allocate an io virtual address in
2944 		 * DMA_BIT_MASK(32) and if that fails then try allocating
2945 		 * from higher range
2946 		 */
2947 		iova = alloc_iova(&domain->iovad, nrpages,
2948 				  IOVA_PFN(DMA_BIT_MASK(32)), 1);
2949 		if (iova)
2950 			return iova;
2951 	}
2952 	iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2953 	if (unlikely(!iova)) {
2954 		printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2955 		       nrpages, dev_name(dev));
2956 		return NULL;
2957 	}
2958 
2959 	return iova;
2960 }
2961 
__get_valid_domain_for_dev(struct device * dev)2962 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
2963 {
2964 	struct dmar_domain *domain;
2965 	int ret;
2966 
2967 	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2968 	if (!domain) {
2969 		printk(KERN_ERR "Allocating domain for %s failed",
2970 		       dev_name(dev));
2971 		return NULL;
2972 	}
2973 
2974 	/* make sure context mapping is ok */
2975 	if (unlikely(!domain_context_mapped(dev))) {
2976 		ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2977 		if (ret) {
2978 			printk(KERN_ERR "Domain context map for %s failed",
2979 			       dev_name(dev));
2980 			return NULL;
2981 		}
2982 	}
2983 
2984 	return domain;
2985 }
2986 
get_valid_domain_for_dev(struct device * dev)2987 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
2988 {
2989 	struct device_domain_info *info;
2990 
2991 	/* No lock here, assumes no domain exit in normal case */
2992 	info = dev->archdata.iommu;
2993 	if (likely(info))
2994 		return info->domain;
2995 
2996 	return __get_valid_domain_for_dev(dev);
2997 }
2998 
2999 /* Check if the dev needs to go through non-identity map and unmap process.*/
iommu_no_mapping(struct device * dev)3000 static int iommu_no_mapping(struct device *dev)
3001 {
3002 	int found;
3003 
3004 	if (iommu_dummy(dev))
3005 		return 1;
3006 
3007 	if (!iommu_identity_mapping)
3008 		return 0;
3009 
3010 	found = identity_mapping(dev);
3011 	if (found) {
3012 		if (iommu_should_identity_map(dev, 0))
3013 			return 1;
3014 		else {
3015 			/*
3016 			 * 32 bit DMA is removed from si_domain and fall back
3017 			 * to non-identity mapping.
3018 			 */
3019 			domain_remove_one_dev_info(si_domain, dev);
3020 			printk(KERN_INFO "32bit %s uses non-identity mapping\n",
3021 			       dev_name(dev));
3022 			return 0;
3023 		}
3024 	} else {
3025 		/*
3026 		 * In case of a detached 64 bit DMA device from vm, the device
3027 		 * is put into si_domain for identity mapping.
3028 		 */
3029 		if (iommu_should_identity_map(dev, 0)) {
3030 			int ret;
3031 			ret = domain_add_dev_info(si_domain, dev,
3032 						  hw_pass_through ?
3033 						  CONTEXT_TT_PASS_THROUGH :
3034 						  CONTEXT_TT_MULTI_LEVEL);
3035 			if (!ret) {
3036 				printk(KERN_INFO "64bit %s uses identity mapping\n",
3037 				       dev_name(dev));
3038 				return 1;
3039 			}
3040 		}
3041 	}
3042 
3043 	return 0;
3044 }
3045 
__intel_map_single(struct device * dev,phys_addr_t paddr,size_t size,int dir,u64 dma_mask)3046 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3047 				     size_t size, int dir, u64 dma_mask)
3048 {
3049 	struct dmar_domain *domain;
3050 	phys_addr_t start_paddr;
3051 	struct iova *iova;
3052 	int prot = 0;
3053 	int ret;
3054 	struct intel_iommu *iommu;
3055 	unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3056 
3057 	BUG_ON(dir == DMA_NONE);
3058 
3059 	if (iommu_no_mapping(dev))
3060 		return paddr;
3061 
3062 	domain = get_valid_domain_for_dev(dev);
3063 	if (!domain)
3064 		return 0;
3065 
3066 	iommu = domain_get_iommu(domain);
3067 	size = aligned_nrpages(paddr, size);
3068 
3069 	iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3070 	if (!iova)
3071 		goto error;
3072 
3073 	/*
3074 	 * Check if DMAR supports zero-length reads on write only
3075 	 * mappings..
3076 	 */
3077 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3078 			!cap_zlr(iommu->cap))
3079 		prot |= DMA_PTE_READ;
3080 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3081 		prot |= DMA_PTE_WRITE;
3082 	/*
3083 	 * paddr - (paddr + size) might be partial page, we should map the whole
3084 	 * page.  Note: if two part of one page are separately mapped, we
3085 	 * might have two guest_addr mapping to the same host paddr, but this
3086 	 * is not a big problem
3087 	 */
3088 	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3089 				 mm_to_dma_pfn(paddr_pfn), size, prot);
3090 	if (ret)
3091 		goto error;
3092 
3093 	/* it's a non-present to present mapping. Only flush if caching mode */
3094 	if (cap_caching_mode(iommu->cap))
3095 		iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
3096 	else
3097 		iommu_flush_write_buffer(iommu);
3098 
3099 	start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3100 	start_paddr += paddr & ~PAGE_MASK;
3101 	return start_paddr;
3102 
3103 error:
3104 	if (iova)
3105 		__free_iova(&domain->iovad, iova);
3106 	printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
3107 		dev_name(dev), size, (unsigned long long)paddr, dir);
3108 	return 0;
3109 }
3110 
intel_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,struct dma_attrs * attrs)3111 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3112 				 unsigned long offset, size_t size,
3113 				 enum dma_data_direction dir,
3114 				 struct dma_attrs *attrs)
3115 {
3116 	return __intel_map_single(dev, page_to_phys(page) + offset, size,
3117 				  dir, *dev->dma_mask);
3118 }
3119 
flush_unmaps(void)3120 static void flush_unmaps(void)
3121 {
3122 	int i, j;
3123 
3124 	timer_on = 0;
3125 
3126 	/* just flush them all */
3127 	for (i = 0; i < g_num_of_iommus; i++) {
3128 		struct intel_iommu *iommu = g_iommus[i];
3129 		if (!iommu)
3130 			continue;
3131 
3132 		if (!deferred_flush[i].next)
3133 			continue;
3134 
3135 		/* In caching mode, global flushes turn emulation expensive */
3136 		if (!cap_caching_mode(iommu->cap))
3137 			iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3138 					 DMA_TLB_GLOBAL_FLUSH);
3139 		for (j = 0; j < deferred_flush[i].next; j++) {
3140 			unsigned long mask;
3141 			struct iova *iova = deferred_flush[i].iova[j];
3142 			struct dmar_domain *domain = deferred_flush[i].domain[j];
3143 
3144 			/* On real hardware multiple invalidations are expensive */
3145 			if (cap_caching_mode(iommu->cap))
3146 				iommu_flush_iotlb_psi(iommu, domain->id,
3147 					iova->pfn_lo, iova_size(iova),
3148 					!deferred_flush[i].freelist[j], 0);
3149 			else {
3150 				mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3151 				iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3152 						(uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3153 			}
3154 			__free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3155 			if (deferred_flush[i].freelist[j])
3156 				dma_free_pagelist(deferred_flush[i].freelist[j]);
3157 		}
3158 		deferred_flush[i].next = 0;
3159 	}
3160 
3161 	list_size = 0;
3162 }
3163 
flush_unmaps_timeout(unsigned long data)3164 static void flush_unmaps_timeout(unsigned long data)
3165 {
3166 	unsigned long flags;
3167 
3168 	spin_lock_irqsave(&async_umap_flush_lock, flags);
3169 	flush_unmaps();
3170 	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3171 }
3172 
add_unmap(struct dmar_domain * dom,struct iova * iova,struct page * freelist)3173 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3174 {
3175 	unsigned long flags;
3176 	int next, iommu_id;
3177 	struct intel_iommu *iommu;
3178 
3179 	spin_lock_irqsave(&async_umap_flush_lock, flags);
3180 	if (list_size == HIGH_WATER_MARK)
3181 		flush_unmaps();
3182 
3183 	iommu = domain_get_iommu(dom);
3184 	iommu_id = iommu->seq_id;
3185 
3186 	next = deferred_flush[iommu_id].next;
3187 	deferred_flush[iommu_id].domain[next] = dom;
3188 	deferred_flush[iommu_id].iova[next] = iova;
3189 	deferred_flush[iommu_id].freelist[next] = freelist;
3190 	deferred_flush[iommu_id].next++;
3191 
3192 	if (!timer_on) {
3193 		mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3194 		timer_on = 1;
3195 	}
3196 	list_size++;
3197 	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3198 }
3199 
intel_unmap(struct device * dev,dma_addr_t dev_addr)3200 static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3201 {
3202 	struct dmar_domain *domain;
3203 	unsigned long start_pfn, last_pfn;
3204 	struct iova *iova;
3205 	struct intel_iommu *iommu;
3206 	struct page *freelist;
3207 
3208 	if (iommu_no_mapping(dev))
3209 		return;
3210 
3211 	domain = find_domain(dev);
3212 	BUG_ON(!domain);
3213 
3214 	iommu = domain_get_iommu(domain);
3215 
3216 	iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3217 	if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3218 		      (unsigned long long)dev_addr))
3219 		return;
3220 
3221 	start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3222 	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3223 
3224 	pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3225 		 dev_name(dev), start_pfn, last_pfn);
3226 
3227 	freelist = domain_unmap(domain, start_pfn, last_pfn);
3228 
3229 	if (intel_iommu_strict) {
3230 		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3231 				      last_pfn - start_pfn + 1, !freelist, 0);
3232 		/* free iova */
3233 		__free_iova(&domain->iovad, iova);
3234 		dma_free_pagelist(freelist);
3235 	} else {
3236 		add_unmap(domain, iova, freelist);
3237 		/*
3238 		 * queue up the release of the unmap to save the 1/6th of the
3239 		 * cpu used up by the iotlb flush operation...
3240 		 */
3241 	}
3242 }
3243 
intel_unmap_page(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,struct dma_attrs * attrs)3244 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3245 			     size_t size, enum dma_data_direction dir,
3246 			     struct dma_attrs *attrs)
3247 {
3248 	intel_unmap(dev, dev_addr);
3249 }
3250 
intel_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_handle,gfp_t flags,struct dma_attrs * attrs)3251 static void *intel_alloc_coherent(struct device *dev, size_t size,
3252 				  dma_addr_t *dma_handle, gfp_t flags,
3253 				  struct dma_attrs *attrs)
3254 {
3255 	struct page *page = NULL;
3256 	int order;
3257 
3258 	size = PAGE_ALIGN(size);
3259 	order = get_order(size);
3260 
3261 	if (!iommu_no_mapping(dev))
3262 		flags &= ~(GFP_DMA | GFP_DMA32);
3263 	else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3264 		if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3265 			flags |= GFP_DMA;
3266 		else
3267 			flags |= GFP_DMA32;
3268 	}
3269 
3270 	if (flags & __GFP_WAIT) {
3271 		unsigned int count = size >> PAGE_SHIFT;
3272 
3273 		page = dma_alloc_from_contiguous(dev, count, order);
3274 		if (page && iommu_no_mapping(dev) &&
3275 		    page_to_phys(page) + size > dev->coherent_dma_mask) {
3276 			dma_release_from_contiguous(dev, page, count);
3277 			page = NULL;
3278 		}
3279 	}
3280 
3281 	if (!page)
3282 		page = alloc_pages(flags, order);
3283 	if (!page)
3284 		return NULL;
3285 	memset(page_address(page), 0, size);
3286 
3287 	*dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3288 					 DMA_BIDIRECTIONAL,
3289 					 dev->coherent_dma_mask);
3290 	if (*dma_handle)
3291 		return page_address(page);
3292 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3293 		__free_pages(page, order);
3294 
3295 	return NULL;
3296 }
3297 
intel_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_handle,struct dma_attrs * attrs)3298 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3299 				dma_addr_t dma_handle, struct dma_attrs *attrs)
3300 {
3301 	int order;
3302 	struct page *page = virt_to_page(vaddr);
3303 
3304 	size = PAGE_ALIGN(size);
3305 	order = get_order(size);
3306 
3307 	intel_unmap(dev, dma_handle);
3308 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3309 		__free_pages(page, order);
3310 }
3311 
intel_unmap_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,struct dma_attrs * attrs)3312 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3313 			   int nelems, enum dma_data_direction dir,
3314 			   struct dma_attrs *attrs)
3315 {
3316 	intel_unmap(dev, sglist[0].dma_address);
3317 }
3318 
intel_nontranslate_map_sg(struct device * hddev,struct scatterlist * sglist,int nelems,int dir)3319 static int intel_nontranslate_map_sg(struct device *hddev,
3320 	struct scatterlist *sglist, int nelems, int dir)
3321 {
3322 	int i;
3323 	struct scatterlist *sg;
3324 
3325 	for_each_sg(sglist, sg, nelems, i) {
3326 		BUG_ON(!sg_page(sg));
3327 		sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3328 		sg->dma_length = sg->length;
3329 	}
3330 	return nelems;
3331 }
3332 
intel_map_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,struct dma_attrs * attrs)3333 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3334 			enum dma_data_direction dir, struct dma_attrs *attrs)
3335 {
3336 	int i;
3337 	struct dmar_domain *domain;
3338 	size_t size = 0;
3339 	int prot = 0;
3340 	struct iova *iova = NULL;
3341 	int ret;
3342 	struct scatterlist *sg;
3343 	unsigned long start_vpfn;
3344 	struct intel_iommu *iommu;
3345 
3346 	BUG_ON(dir == DMA_NONE);
3347 	if (iommu_no_mapping(dev))
3348 		return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3349 
3350 	domain = get_valid_domain_for_dev(dev);
3351 	if (!domain)
3352 		return 0;
3353 
3354 	iommu = domain_get_iommu(domain);
3355 
3356 	for_each_sg(sglist, sg, nelems, i)
3357 		size += aligned_nrpages(sg->offset, sg->length);
3358 
3359 	iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3360 				*dev->dma_mask);
3361 	if (!iova) {
3362 		sglist->dma_length = 0;
3363 		return 0;
3364 	}
3365 
3366 	/*
3367 	 * Check if DMAR supports zero-length reads on write only
3368 	 * mappings..
3369 	 */
3370 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3371 			!cap_zlr(iommu->cap))
3372 		prot |= DMA_PTE_READ;
3373 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3374 		prot |= DMA_PTE_WRITE;
3375 
3376 	start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3377 
3378 	ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3379 	if (unlikely(ret)) {
3380 		dma_pte_free_pagetable(domain, start_vpfn,
3381 				       start_vpfn + size - 1);
3382 		__free_iova(&domain->iovad, iova);
3383 		return 0;
3384 	}
3385 
3386 	/* it's a non-present to present mapping. Only flush if caching mode */
3387 	if (cap_caching_mode(iommu->cap))
3388 		iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
3389 	else
3390 		iommu_flush_write_buffer(iommu);
3391 
3392 	return nelems;
3393 }
3394 
intel_mapping_error(struct device * dev,dma_addr_t dma_addr)3395 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3396 {
3397 	return !dma_addr;
3398 }
3399 
3400 struct dma_map_ops intel_dma_ops = {
3401 	.alloc = intel_alloc_coherent,
3402 	.free = intel_free_coherent,
3403 	.map_sg = intel_map_sg,
3404 	.unmap_sg = intel_unmap_sg,
3405 	.map_page = intel_map_page,
3406 	.unmap_page = intel_unmap_page,
3407 	.mapping_error = intel_mapping_error,
3408 };
3409 
iommu_domain_cache_init(void)3410 static inline int iommu_domain_cache_init(void)
3411 {
3412 	int ret = 0;
3413 
3414 	iommu_domain_cache = kmem_cache_create("iommu_domain",
3415 					 sizeof(struct dmar_domain),
3416 					 0,
3417 					 SLAB_HWCACHE_ALIGN,
3418 
3419 					 NULL);
3420 	if (!iommu_domain_cache) {
3421 		printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3422 		ret = -ENOMEM;
3423 	}
3424 
3425 	return ret;
3426 }
3427 
iommu_devinfo_cache_init(void)3428 static inline int iommu_devinfo_cache_init(void)
3429 {
3430 	int ret = 0;
3431 
3432 	iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3433 					 sizeof(struct device_domain_info),
3434 					 0,
3435 					 SLAB_HWCACHE_ALIGN,
3436 					 NULL);
3437 	if (!iommu_devinfo_cache) {
3438 		printk(KERN_ERR "Couldn't create devinfo cache\n");
3439 		ret = -ENOMEM;
3440 	}
3441 
3442 	return ret;
3443 }
3444 
iommu_init_mempool(void)3445 static int __init iommu_init_mempool(void)
3446 {
3447 	int ret;
3448 	ret = iommu_iova_cache_init();
3449 	if (ret)
3450 		return ret;
3451 
3452 	ret = iommu_domain_cache_init();
3453 	if (ret)
3454 		goto domain_error;
3455 
3456 	ret = iommu_devinfo_cache_init();
3457 	if (!ret)
3458 		return ret;
3459 
3460 	kmem_cache_destroy(iommu_domain_cache);
3461 domain_error:
3462 	iommu_iova_cache_destroy();
3463 
3464 	return -ENOMEM;
3465 }
3466 
iommu_exit_mempool(void)3467 static void __init iommu_exit_mempool(void)
3468 {
3469 	kmem_cache_destroy(iommu_devinfo_cache);
3470 	kmem_cache_destroy(iommu_domain_cache);
3471 	iommu_iova_cache_destroy();
3472 }
3473 
quirk_ioat_snb_local_iommu(struct pci_dev * pdev)3474 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3475 {
3476 	struct dmar_drhd_unit *drhd;
3477 	u32 vtbar;
3478 	int rc;
3479 
3480 	/* We know that this device on this chipset has its own IOMMU.
3481 	 * If we find it under a different IOMMU, then the BIOS is lying
3482 	 * to us. Hope that the IOMMU for this device is actually
3483 	 * disabled, and it needs no translation...
3484 	 */
3485 	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3486 	if (rc) {
3487 		/* "can't" happen */
3488 		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3489 		return;
3490 	}
3491 	vtbar &= 0xffff0000;
3492 
3493 	/* we know that the this iommu should be at offset 0xa000 from vtbar */
3494 	drhd = dmar_find_matched_drhd_unit(pdev);
3495 	if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3496 			    TAINT_FIRMWARE_WORKAROUND,
3497 			    "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3498 		pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3499 }
3500 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3501 
init_no_remapping_devices(void)3502 static void __init init_no_remapping_devices(void)
3503 {
3504 	struct dmar_drhd_unit *drhd;
3505 	struct device *dev;
3506 	int i;
3507 
3508 	for_each_drhd_unit(drhd) {
3509 		if (!drhd->include_all) {
3510 			for_each_active_dev_scope(drhd->devices,
3511 						  drhd->devices_cnt, i, dev)
3512 				break;
3513 			/* ignore DMAR unit if no devices exist */
3514 			if (i == drhd->devices_cnt)
3515 				drhd->ignored = 1;
3516 		}
3517 	}
3518 
3519 	for_each_active_drhd_unit(drhd) {
3520 		if (drhd->include_all)
3521 			continue;
3522 
3523 		for_each_active_dev_scope(drhd->devices,
3524 					  drhd->devices_cnt, i, dev)
3525 			if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3526 				break;
3527 		if (i < drhd->devices_cnt)
3528 			continue;
3529 
3530 		/* This IOMMU has *only* gfx devices. Either bypass it or
3531 		   set the gfx_mapped flag, as appropriate */
3532 		if (dmar_map_gfx) {
3533 			intel_iommu_gfx_mapped = 1;
3534 		} else {
3535 			drhd->ignored = 1;
3536 			for_each_active_dev_scope(drhd->devices,
3537 						  drhd->devices_cnt, i, dev)
3538 				dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3539 		}
3540 	}
3541 }
3542 
3543 #ifdef CONFIG_SUSPEND
init_iommu_hw(void)3544 static int init_iommu_hw(void)
3545 {
3546 	struct dmar_drhd_unit *drhd;
3547 	struct intel_iommu *iommu = NULL;
3548 
3549 	for_each_active_iommu(iommu, drhd)
3550 		if (iommu->qi)
3551 			dmar_reenable_qi(iommu);
3552 
3553 	for_each_iommu(iommu, drhd) {
3554 		if (drhd->ignored) {
3555 			/*
3556 			 * we always have to disable PMRs or DMA may fail on
3557 			 * this device
3558 			 */
3559 			if (force_on)
3560 				iommu_disable_protect_mem_regions(iommu);
3561 			continue;
3562 		}
3563 
3564 		iommu_flush_write_buffer(iommu);
3565 
3566 		iommu_set_root_entry(iommu);
3567 
3568 		iommu->flush.flush_context(iommu, 0, 0, 0,
3569 					   DMA_CCMD_GLOBAL_INVL);
3570 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3571 		iommu_enable_translation(iommu);
3572 		iommu_disable_protect_mem_regions(iommu);
3573 	}
3574 
3575 	return 0;
3576 }
3577 
iommu_flush_all(void)3578 static void iommu_flush_all(void)
3579 {
3580 	struct dmar_drhd_unit *drhd;
3581 	struct intel_iommu *iommu;
3582 
3583 	for_each_active_iommu(iommu, drhd) {
3584 		iommu->flush.flush_context(iommu, 0, 0, 0,
3585 					   DMA_CCMD_GLOBAL_INVL);
3586 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3587 					 DMA_TLB_GLOBAL_FLUSH);
3588 	}
3589 }
3590 
iommu_suspend(void)3591 static int iommu_suspend(void)
3592 {
3593 	struct dmar_drhd_unit *drhd;
3594 	struct intel_iommu *iommu = NULL;
3595 	unsigned long flag;
3596 
3597 	for_each_active_iommu(iommu, drhd) {
3598 		iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3599 						 GFP_ATOMIC);
3600 		if (!iommu->iommu_state)
3601 			goto nomem;
3602 	}
3603 
3604 	iommu_flush_all();
3605 
3606 	for_each_active_iommu(iommu, drhd) {
3607 		iommu_disable_translation(iommu);
3608 
3609 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
3610 
3611 		iommu->iommu_state[SR_DMAR_FECTL_REG] =
3612 			readl(iommu->reg + DMAR_FECTL_REG);
3613 		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3614 			readl(iommu->reg + DMAR_FEDATA_REG);
3615 		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3616 			readl(iommu->reg + DMAR_FEADDR_REG);
3617 		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3618 			readl(iommu->reg + DMAR_FEUADDR_REG);
3619 
3620 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3621 	}
3622 	return 0;
3623 
3624 nomem:
3625 	for_each_active_iommu(iommu, drhd)
3626 		kfree(iommu->iommu_state);
3627 
3628 	return -ENOMEM;
3629 }
3630 
iommu_resume(void)3631 static void iommu_resume(void)
3632 {
3633 	struct dmar_drhd_unit *drhd;
3634 	struct intel_iommu *iommu = NULL;
3635 	unsigned long flag;
3636 
3637 	if (init_iommu_hw()) {
3638 		if (force_on)
3639 			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3640 		else
3641 			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3642 		return;
3643 	}
3644 
3645 	for_each_active_iommu(iommu, drhd) {
3646 
3647 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
3648 
3649 		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3650 			iommu->reg + DMAR_FECTL_REG);
3651 		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3652 			iommu->reg + DMAR_FEDATA_REG);
3653 		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3654 			iommu->reg + DMAR_FEADDR_REG);
3655 		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3656 			iommu->reg + DMAR_FEUADDR_REG);
3657 
3658 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3659 	}
3660 
3661 	for_each_active_iommu(iommu, drhd)
3662 		kfree(iommu->iommu_state);
3663 }
3664 
3665 static struct syscore_ops iommu_syscore_ops = {
3666 	.resume		= iommu_resume,
3667 	.suspend	= iommu_suspend,
3668 };
3669 
init_iommu_pm_ops(void)3670 static void __init init_iommu_pm_ops(void)
3671 {
3672 	register_syscore_ops(&iommu_syscore_ops);
3673 }
3674 
3675 #else
init_iommu_pm_ops(void)3676 static inline void init_iommu_pm_ops(void) {}
3677 #endif	/* CONFIG_PM */
3678 
3679 
dmar_parse_one_rmrr(struct acpi_dmar_header * header,void * arg)3680 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3681 {
3682 	struct acpi_dmar_reserved_memory *rmrr;
3683 	struct dmar_rmrr_unit *rmrru;
3684 
3685 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3686 	if (!rmrru)
3687 		return -ENOMEM;
3688 
3689 	rmrru->hdr = header;
3690 	rmrr = (struct acpi_dmar_reserved_memory *)header;
3691 	rmrru->base_address = rmrr->base_address;
3692 	rmrru->end_address = rmrr->end_address;
3693 	rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3694 				((void *)rmrr) + rmrr->header.length,
3695 				&rmrru->devices_cnt);
3696 	if (rmrru->devices_cnt && rmrru->devices == NULL) {
3697 		kfree(rmrru);
3698 		return -ENOMEM;
3699 	}
3700 
3701 	list_add(&rmrru->list, &dmar_rmrr_units);
3702 
3703 	return 0;
3704 }
3705 
dmar_find_atsr(struct acpi_dmar_atsr * atsr)3706 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3707 {
3708 	struct dmar_atsr_unit *atsru;
3709 	struct acpi_dmar_atsr *tmp;
3710 
3711 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3712 		tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3713 		if (atsr->segment != tmp->segment)
3714 			continue;
3715 		if (atsr->header.length != tmp->header.length)
3716 			continue;
3717 		if (memcmp(atsr, tmp, atsr->header.length) == 0)
3718 			return atsru;
3719 	}
3720 
3721 	return NULL;
3722 }
3723 
dmar_parse_one_atsr(struct acpi_dmar_header * hdr,void * arg)3724 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3725 {
3726 	struct acpi_dmar_atsr *atsr;
3727 	struct dmar_atsr_unit *atsru;
3728 
3729 	if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
3730 		return 0;
3731 
3732 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3733 	atsru = dmar_find_atsr(atsr);
3734 	if (atsru)
3735 		return 0;
3736 
3737 	atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3738 	if (!atsru)
3739 		return -ENOMEM;
3740 
3741 	/*
3742 	 * If memory is allocated from slab by ACPI _DSM method, we need to
3743 	 * copy the memory content because the memory buffer will be freed
3744 	 * on return.
3745 	 */
3746 	atsru->hdr = (void *)(atsru + 1);
3747 	memcpy(atsru->hdr, hdr, hdr->length);
3748 	atsru->include_all = atsr->flags & 0x1;
3749 	if (!atsru->include_all) {
3750 		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3751 				(void *)atsr + atsr->header.length,
3752 				&atsru->devices_cnt);
3753 		if (atsru->devices_cnt && atsru->devices == NULL) {
3754 			kfree(atsru);
3755 			return -ENOMEM;
3756 		}
3757 	}
3758 
3759 	list_add_rcu(&atsru->list, &dmar_atsr_units);
3760 
3761 	return 0;
3762 }
3763 
intel_iommu_free_atsr(struct dmar_atsr_unit * atsru)3764 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3765 {
3766 	dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3767 	kfree(atsru);
3768 }
3769 
dmar_release_one_atsr(struct acpi_dmar_header * hdr,void * arg)3770 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3771 {
3772 	struct acpi_dmar_atsr *atsr;
3773 	struct dmar_atsr_unit *atsru;
3774 
3775 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3776 	atsru = dmar_find_atsr(atsr);
3777 	if (atsru) {
3778 		list_del_rcu(&atsru->list);
3779 		synchronize_rcu();
3780 		intel_iommu_free_atsr(atsru);
3781 	}
3782 
3783 	return 0;
3784 }
3785 
dmar_check_one_atsr(struct acpi_dmar_header * hdr,void * arg)3786 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3787 {
3788 	int i;
3789 	struct device *dev;
3790 	struct acpi_dmar_atsr *atsr;
3791 	struct dmar_atsr_unit *atsru;
3792 
3793 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3794 	atsru = dmar_find_atsr(atsr);
3795 	if (!atsru)
3796 		return 0;
3797 
3798 	if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
3799 		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3800 					  i, dev)
3801 			return -EBUSY;
3802 
3803 	return 0;
3804 }
3805 
intel_iommu_add(struct dmar_drhd_unit * dmaru)3806 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3807 {
3808 	int sp, ret = 0;
3809 	struct intel_iommu *iommu = dmaru->iommu;
3810 
3811 	if (g_iommus[iommu->seq_id])
3812 		return 0;
3813 
3814 	if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3815 		pr_warn("IOMMU: %s doesn't support hardware pass through.\n",
3816 			iommu->name);
3817 		return -ENXIO;
3818 	}
3819 	if (!ecap_sc_support(iommu->ecap) &&
3820 	    domain_update_iommu_snooping(iommu)) {
3821 		pr_warn("IOMMU: %s doesn't support snooping.\n",
3822 			iommu->name);
3823 		return -ENXIO;
3824 	}
3825 	sp = domain_update_iommu_superpage(iommu) - 1;
3826 	if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3827 		pr_warn("IOMMU: %s doesn't support large page.\n",
3828 			iommu->name);
3829 		return -ENXIO;
3830 	}
3831 
3832 	/*
3833 	 * Disable translation if already enabled prior to OS handover.
3834 	 */
3835 	if (iommu->gcmd & DMA_GCMD_TE)
3836 		iommu_disable_translation(iommu);
3837 
3838 	g_iommus[iommu->seq_id] = iommu;
3839 	ret = iommu_init_domains(iommu);
3840 	if (ret == 0)
3841 		ret = iommu_alloc_root_entry(iommu);
3842 	if (ret)
3843 		goto out;
3844 
3845 	if (dmaru->ignored) {
3846 		/*
3847 		 * we always have to disable PMRs or DMA may fail on this device
3848 		 */
3849 		if (force_on)
3850 			iommu_disable_protect_mem_regions(iommu);
3851 		return 0;
3852 	}
3853 
3854 	intel_iommu_init_qi(iommu);
3855 	iommu_flush_write_buffer(iommu);
3856 	ret = dmar_set_interrupt(iommu);
3857 	if (ret)
3858 		goto disable_iommu;
3859 
3860 	iommu_set_root_entry(iommu);
3861 	iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3862 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3863 	iommu_enable_translation(iommu);
3864 
3865 	if (si_domain) {
3866 		ret = iommu_attach_domain(si_domain, iommu);
3867 		if (ret < 0 || si_domain->id != ret)
3868 			goto disable_iommu;
3869 		domain_attach_iommu(si_domain, iommu);
3870 	}
3871 
3872 	iommu_disable_protect_mem_regions(iommu);
3873 	return 0;
3874 
3875 disable_iommu:
3876 	disable_dmar_iommu(iommu);
3877 out:
3878 	free_dmar_iommu(iommu);
3879 	return ret;
3880 }
3881 
dmar_iommu_hotplug(struct dmar_drhd_unit * dmaru,bool insert)3882 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
3883 {
3884 	int ret = 0;
3885 	struct intel_iommu *iommu = dmaru->iommu;
3886 
3887 	if (!intel_iommu_enabled)
3888 		return 0;
3889 	if (iommu == NULL)
3890 		return -EINVAL;
3891 
3892 	if (insert) {
3893 		ret = intel_iommu_add(dmaru);
3894 	} else {
3895 		disable_dmar_iommu(iommu);
3896 		free_dmar_iommu(iommu);
3897 	}
3898 
3899 	return ret;
3900 }
3901 
intel_iommu_free_dmars(void)3902 static void intel_iommu_free_dmars(void)
3903 {
3904 	struct dmar_rmrr_unit *rmrru, *rmrr_n;
3905 	struct dmar_atsr_unit *atsru, *atsr_n;
3906 
3907 	list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3908 		list_del(&rmrru->list);
3909 		dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3910 		kfree(rmrru);
3911 	}
3912 
3913 	list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3914 		list_del(&atsru->list);
3915 		intel_iommu_free_atsr(atsru);
3916 	}
3917 }
3918 
dmar_find_matched_atsr_unit(struct pci_dev * dev)3919 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3920 {
3921 	int i, ret = 1;
3922 	struct pci_bus *bus;
3923 	struct pci_dev *bridge = NULL;
3924 	struct device *tmp;
3925 	struct acpi_dmar_atsr *atsr;
3926 	struct dmar_atsr_unit *atsru;
3927 
3928 	dev = pci_physfn(dev);
3929 	for (bus = dev->bus; bus; bus = bus->parent) {
3930 		bridge = bus->self;
3931 		/* If it's an integrated device, allow ATS */
3932 		if (!bridge)
3933 			return 1;
3934 		/* Connected via non-PCIe: no ATS */
3935 		if (!pci_is_pcie(bridge) ||
3936 		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3937 			return 0;
3938 		/* If we found the root port, look it up in the ATSR */
3939 		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3940 			break;
3941 	}
3942 
3943 	rcu_read_lock();
3944 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3945 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3946 		if (atsr->segment != pci_domain_nr(dev->bus))
3947 			continue;
3948 
3949 		for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3950 			if (tmp == &bridge->dev)
3951 				goto out;
3952 
3953 		if (atsru->include_all)
3954 			goto out;
3955 	}
3956 	ret = 0;
3957 out:
3958 	rcu_read_unlock();
3959 
3960 	return ret;
3961 }
3962 
dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info * info)3963 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3964 {
3965 	int ret = 0;
3966 	struct dmar_rmrr_unit *rmrru;
3967 	struct dmar_atsr_unit *atsru;
3968 	struct acpi_dmar_atsr *atsr;
3969 	struct acpi_dmar_reserved_memory *rmrr;
3970 
3971 	if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3972 		return 0;
3973 
3974 	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3975 		rmrr = container_of(rmrru->hdr,
3976 				    struct acpi_dmar_reserved_memory, header);
3977 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3978 			ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3979 				((void *)rmrr) + rmrr->header.length,
3980 				rmrr->segment, rmrru->devices,
3981 				rmrru->devices_cnt);
3982 			if(ret < 0)
3983 				return ret;
3984 		} else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3985 			dmar_remove_dev_scope(info, rmrr->segment,
3986 				rmrru->devices, rmrru->devices_cnt);
3987 		}
3988 	}
3989 
3990 	list_for_each_entry(atsru, &dmar_atsr_units, list) {
3991 		if (atsru->include_all)
3992 			continue;
3993 
3994 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3995 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3996 			ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3997 					(void *)atsr + atsr->header.length,
3998 					atsr->segment, atsru->devices,
3999 					atsru->devices_cnt);
4000 			if (ret > 0)
4001 				break;
4002 			else if(ret < 0)
4003 				return ret;
4004 		} else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
4005 			if (dmar_remove_dev_scope(info, atsr->segment,
4006 					atsru->devices, atsru->devices_cnt))
4007 				break;
4008 		}
4009 	}
4010 
4011 	return 0;
4012 }
4013 
4014 /*
4015  * Here we only respond to action of unbound device from driver.
4016  *
4017  * Added device is not attached to its DMAR domain here yet. That will happen
4018  * when mapping the device to iova.
4019  */
device_notifier(struct notifier_block * nb,unsigned long action,void * data)4020 static int device_notifier(struct notifier_block *nb,
4021 				  unsigned long action, void *data)
4022 {
4023 	struct device *dev = data;
4024 	struct dmar_domain *domain;
4025 
4026 	if (iommu_dummy(dev))
4027 		return 0;
4028 
4029 	if (action != BUS_NOTIFY_REMOVED_DEVICE)
4030 		return 0;
4031 
4032 	domain = find_domain(dev);
4033 	if (!domain)
4034 		return 0;
4035 
4036 	down_read(&dmar_global_lock);
4037 	domain_remove_one_dev_info(domain, dev);
4038 	if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4039 		domain_exit(domain);
4040 	up_read(&dmar_global_lock);
4041 
4042 	return 0;
4043 }
4044 
4045 static struct notifier_block device_nb = {
4046 	.notifier_call = device_notifier,
4047 };
4048 
intel_iommu_memory_notifier(struct notifier_block * nb,unsigned long val,void * v)4049 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4050 				       unsigned long val, void *v)
4051 {
4052 	struct memory_notify *mhp = v;
4053 	unsigned long long start, end;
4054 	unsigned long start_vpfn, last_vpfn;
4055 
4056 	switch (val) {
4057 	case MEM_GOING_ONLINE:
4058 		start = mhp->start_pfn << PAGE_SHIFT;
4059 		end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4060 		if (iommu_domain_identity_map(si_domain, start, end)) {
4061 			pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
4062 				start, end);
4063 			return NOTIFY_BAD;
4064 		}
4065 		break;
4066 
4067 	case MEM_OFFLINE:
4068 	case MEM_CANCEL_ONLINE:
4069 		start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4070 		last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4071 		while (start_vpfn <= last_vpfn) {
4072 			struct iova *iova;
4073 			struct dmar_drhd_unit *drhd;
4074 			struct intel_iommu *iommu;
4075 			struct page *freelist;
4076 
4077 			iova = find_iova(&si_domain->iovad, start_vpfn);
4078 			if (iova == NULL) {
4079 				pr_debug("dmar: failed get IOVA for PFN %lx\n",
4080 					 start_vpfn);
4081 				break;
4082 			}
4083 
4084 			iova = split_and_remove_iova(&si_domain->iovad, iova,
4085 						     start_vpfn, last_vpfn);
4086 			if (iova == NULL) {
4087 				pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
4088 					start_vpfn, last_vpfn);
4089 				return NOTIFY_BAD;
4090 			}
4091 
4092 			freelist = domain_unmap(si_domain, iova->pfn_lo,
4093 					       iova->pfn_hi);
4094 
4095 			rcu_read_lock();
4096 			for_each_active_iommu(iommu, drhd)
4097 				iommu_flush_iotlb_psi(iommu, si_domain->id,
4098 					iova->pfn_lo, iova_size(iova),
4099 					!freelist, 0);
4100 			rcu_read_unlock();
4101 			dma_free_pagelist(freelist);
4102 
4103 			start_vpfn = iova->pfn_hi + 1;
4104 			free_iova_mem(iova);
4105 		}
4106 		break;
4107 	}
4108 
4109 	return NOTIFY_OK;
4110 }
4111 
4112 static struct notifier_block intel_iommu_memory_nb = {
4113 	.notifier_call = intel_iommu_memory_notifier,
4114 	.priority = 0
4115 };
4116 
4117 
intel_iommu_show_version(struct device * dev,struct device_attribute * attr,char * buf)4118 static ssize_t intel_iommu_show_version(struct device *dev,
4119 					struct device_attribute *attr,
4120 					char *buf)
4121 {
4122 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4123 	u32 ver = readl(iommu->reg + DMAR_VER_REG);
4124 	return sprintf(buf, "%d:%d\n",
4125 		       DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4126 }
4127 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4128 
intel_iommu_show_address(struct device * dev,struct device_attribute * attr,char * buf)4129 static ssize_t intel_iommu_show_address(struct device *dev,
4130 					struct device_attribute *attr,
4131 					char *buf)
4132 {
4133 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4134 	return sprintf(buf, "%llx\n", iommu->reg_phys);
4135 }
4136 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4137 
intel_iommu_show_cap(struct device * dev,struct device_attribute * attr,char * buf)4138 static ssize_t intel_iommu_show_cap(struct device *dev,
4139 				    struct device_attribute *attr,
4140 				    char *buf)
4141 {
4142 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4143 	return sprintf(buf, "%llx\n", iommu->cap);
4144 }
4145 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4146 
intel_iommu_show_ecap(struct device * dev,struct device_attribute * attr,char * buf)4147 static ssize_t intel_iommu_show_ecap(struct device *dev,
4148 				    struct device_attribute *attr,
4149 				    char *buf)
4150 {
4151 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4152 	return sprintf(buf, "%llx\n", iommu->ecap);
4153 }
4154 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4155 
4156 static struct attribute *intel_iommu_attrs[] = {
4157 	&dev_attr_version.attr,
4158 	&dev_attr_address.attr,
4159 	&dev_attr_cap.attr,
4160 	&dev_attr_ecap.attr,
4161 	NULL,
4162 };
4163 
4164 static struct attribute_group intel_iommu_group = {
4165 	.name = "intel-iommu",
4166 	.attrs = intel_iommu_attrs,
4167 };
4168 
4169 const struct attribute_group *intel_iommu_groups[] = {
4170 	&intel_iommu_group,
4171 	NULL,
4172 };
4173 
intel_iommu_init(void)4174 int __init intel_iommu_init(void)
4175 {
4176 	int ret = -ENODEV;
4177 	struct dmar_drhd_unit *drhd;
4178 	struct intel_iommu *iommu;
4179 
4180 	/* VT-d is required for a TXT/tboot launch, so enforce that */
4181 	force_on = tboot_force_iommu();
4182 
4183 	if (iommu_init_mempool()) {
4184 		if (force_on)
4185 			panic("tboot: Failed to initialize iommu memory\n");
4186 		return -ENOMEM;
4187 	}
4188 
4189 	down_write(&dmar_global_lock);
4190 	if (dmar_table_init()) {
4191 		if (force_on)
4192 			panic("tboot: Failed to initialize DMAR table\n");
4193 		goto out_free_dmar;
4194 	}
4195 
4196 	/*
4197 	 * Disable translation if already enabled prior to OS handover.
4198 	 */
4199 	for_each_active_iommu(iommu, drhd)
4200 		if (iommu->gcmd & DMA_GCMD_TE)
4201 			iommu_disable_translation(iommu);
4202 
4203 	if (dmar_dev_scope_init() < 0) {
4204 		if (force_on)
4205 			panic("tboot: Failed to initialize DMAR device scope\n");
4206 		goto out_free_dmar;
4207 	}
4208 
4209 	if (no_iommu || dmar_disabled)
4210 		goto out_free_dmar;
4211 
4212 	if (list_empty(&dmar_rmrr_units))
4213 		printk(KERN_INFO "DMAR: No RMRR found\n");
4214 
4215 	if (list_empty(&dmar_atsr_units))
4216 		printk(KERN_INFO "DMAR: No ATSR found\n");
4217 
4218 	if (dmar_init_reserved_ranges()) {
4219 		if (force_on)
4220 			panic("tboot: Failed to reserve iommu ranges\n");
4221 		goto out_free_reserved_range;
4222 	}
4223 
4224 	init_no_remapping_devices();
4225 
4226 	ret = init_dmars();
4227 	if (ret) {
4228 		if (force_on)
4229 			panic("tboot: Failed to initialize DMARs\n");
4230 		printk(KERN_ERR "IOMMU: dmar init failed\n");
4231 		goto out_free_reserved_range;
4232 	}
4233 	up_write(&dmar_global_lock);
4234 	printk(KERN_INFO
4235 	"PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4236 
4237 	init_timer(&unmap_timer);
4238 #ifdef CONFIG_SWIOTLB
4239 	swiotlb = 0;
4240 #endif
4241 	dma_ops = &intel_dma_ops;
4242 
4243 	init_iommu_pm_ops();
4244 
4245 	for_each_active_iommu(iommu, drhd)
4246 		iommu->iommu_dev = iommu_device_create(NULL, iommu,
4247 						       intel_iommu_groups,
4248 						       iommu->name);
4249 
4250 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4251 	bus_register_notifier(&pci_bus_type, &device_nb);
4252 	if (si_domain && !hw_pass_through)
4253 		register_memory_notifier(&intel_iommu_memory_nb);
4254 
4255 	intel_iommu_enabled = 1;
4256 
4257 	return 0;
4258 
4259 out_free_reserved_range:
4260 	put_iova_domain(&reserved_iova_list);
4261 out_free_dmar:
4262 	intel_iommu_free_dmars();
4263 	up_write(&dmar_global_lock);
4264 	iommu_exit_mempool();
4265 	return ret;
4266 }
4267 
iommu_detach_dev_cb(struct pci_dev * pdev,u16 alias,void * opaque)4268 static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4269 {
4270 	struct intel_iommu *iommu = opaque;
4271 
4272 	iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4273 	return 0;
4274 }
4275 
4276 /*
4277  * NB - intel-iommu lacks any sort of reference counting for the users of
4278  * dependent devices.  If multiple endpoints have intersecting dependent
4279  * devices, unbinding the driver from any one of them will possibly leave
4280  * the others unable to operate.
4281  */
iommu_detach_dependent_devices(struct intel_iommu * iommu,struct device * dev)4282 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
4283 					   struct device *dev)
4284 {
4285 	if (!iommu || !dev || !dev_is_pci(dev))
4286 		return;
4287 
4288 	pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
4289 }
4290 
domain_remove_one_dev_info(struct dmar_domain * domain,struct device * dev)4291 static void domain_remove_one_dev_info(struct dmar_domain *domain,
4292 				       struct device *dev)
4293 {
4294 	struct device_domain_info *info, *tmp;
4295 	struct intel_iommu *iommu;
4296 	unsigned long flags;
4297 	bool found = false;
4298 	u8 bus, devfn;
4299 
4300 	iommu = device_to_iommu(dev, &bus, &devfn);
4301 	if (!iommu)
4302 		return;
4303 
4304 	spin_lock_irqsave(&device_domain_lock, flags);
4305 	list_for_each_entry_safe(info, tmp, &domain->devices, link) {
4306 		if (info->iommu == iommu && info->bus == bus &&
4307 		    info->devfn == devfn) {
4308 			unlink_domain_info(info);
4309 			spin_unlock_irqrestore(&device_domain_lock, flags);
4310 
4311 			iommu_disable_dev_iotlb(info);
4312 			iommu_detach_dev(iommu, info->bus, info->devfn);
4313 			iommu_detach_dependent_devices(iommu, dev);
4314 			free_devinfo_mem(info);
4315 
4316 			spin_lock_irqsave(&device_domain_lock, flags);
4317 
4318 			if (found)
4319 				break;
4320 			else
4321 				continue;
4322 		}
4323 
4324 		/* if there is no other devices under the same iommu
4325 		 * owned by this domain, clear this iommu in iommu_bmp
4326 		 * update iommu count and coherency
4327 		 */
4328 		if (info->iommu == iommu)
4329 			found = true;
4330 	}
4331 
4332 	spin_unlock_irqrestore(&device_domain_lock, flags);
4333 
4334 	if (found == 0) {
4335 		domain_detach_iommu(domain, iommu);
4336 		if (!domain_type_is_vm_or_si(domain))
4337 			iommu_detach_domain(domain, iommu);
4338 	}
4339 }
4340 
md_domain_init(struct dmar_domain * domain,int guest_width)4341 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4342 {
4343 	int adjust_width;
4344 
4345 	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4346 			DMA_32BIT_PFN);
4347 	domain_reserve_special_ranges(domain);
4348 
4349 	/* calculate AGAW */
4350 	domain->gaw = guest_width;
4351 	adjust_width = guestwidth_to_adjustwidth(guest_width);
4352 	domain->agaw = width_to_agaw(adjust_width);
4353 
4354 	domain->iommu_coherency = 0;
4355 	domain->iommu_snooping = 0;
4356 	domain->iommu_superpage = 0;
4357 	domain->max_addr = 0;
4358 
4359 	/* always allocate the top pgd */
4360 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4361 	if (!domain->pgd)
4362 		return -ENOMEM;
4363 	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4364 	return 0;
4365 }
4366 
intel_iommu_domain_alloc(unsigned type)4367 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4368 {
4369 	struct dmar_domain *dmar_domain;
4370 	struct iommu_domain *domain;
4371 
4372 	if (type != IOMMU_DOMAIN_UNMANAGED)
4373 		return NULL;
4374 
4375 	dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4376 	if (!dmar_domain) {
4377 		printk(KERN_ERR
4378 			"intel_iommu_domain_init: dmar_domain == NULL\n");
4379 		return NULL;
4380 	}
4381 	if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4382 		printk(KERN_ERR
4383 			"intel_iommu_domain_init() failed\n");
4384 		domain_exit(dmar_domain);
4385 		return NULL;
4386 	}
4387 	domain_update_iommu_cap(dmar_domain);
4388 
4389 	domain = &dmar_domain->domain;
4390 	domain->geometry.aperture_start = 0;
4391 	domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4392 	domain->geometry.force_aperture = true;
4393 
4394 	return domain;
4395 }
4396 
intel_iommu_domain_free(struct iommu_domain * domain)4397 static void intel_iommu_domain_free(struct iommu_domain *domain)
4398 {
4399 	domain_exit(to_dmar_domain(domain));
4400 }
4401 
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev)4402 static int intel_iommu_attach_device(struct iommu_domain *domain,
4403 				     struct device *dev)
4404 {
4405 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4406 	struct intel_iommu *iommu;
4407 	int addr_width;
4408 	u8 bus, devfn;
4409 
4410 	if (device_is_rmrr_locked(dev)) {
4411 		dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
4412 		return -EPERM;
4413 	}
4414 
4415 	/* normally dev is not mapped */
4416 	if (unlikely(domain_context_mapped(dev))) {
4417 		struct dmar_domain *old_domain;
4418 
4419 		old_domain = find_domain(dev);
4420 		if (old_domain) {
4421 			if (domain_type_is_vm_or_si(dmar_domain))
4422 				domain_remove_one_dev_info(old_domain, dev);
4423 			else
4424 				domain_remove_dev_info(old_domain);
4425 
4426 			if (!domain_type_is_vm_or_si(old_domain) &&
4427 			     list_empty(&old_domain->devices))
4428 				domain_exit(old_domain);
4429 		}
4430 	}
4431 
4432 	iommu = device_to_iommu(dev, &bus, &devfn);
4433 	if (!iommu)
4434 		return -ENODEV;
4435 
4436 	/* check if this iommu agaw is sufficient for max mapped address */
4437 	addr_width = agaw_to_width(iommu->agaw);
4438 	if (addr_width > cap_mgaw(iommu->cap))
4439 		addr_width = cap_mgaw(iommu->cap);
4440 
4441 	if (dmar_domain->max_addr > (1LL << addr_width)) {
4442 		printk(KERN_ERR "%s: iommu width (%d) is not "
4443 		       "sufficient for the mapped address (%llx)\n",
4444 		       __func__, addr_width, dmar_domain->max_addr);
4445 		return -EFAULT;
4446 	}
4447 	dmar_domain->gaw = addr_width;
4448 
4449 	/*
4450 	 * Knock out extra levels of page tables if necessary
4451 	 */
4452 	while (iommu->agaw < dmar_domain->agaw) {
4453 		struct dma_pte *pte;
4454 
4455 		pte = dmar_domain->pgd;
4456 		if (dma_pte_present(pte)) {
4457 			dmar_domain->pgd = (struct dma_pte *)
4458 				phys_to_virt(dma_pte_addr(pte));
4459 			free_pgtable_page(pte);
4460 		}
4461 		dmar_domain->agaw--;
4462 	}
4463 
4464 	return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
4465 }
4466 
intel_iommu_detach_device(struct iommu_domain * domain,struct device * dev)4467 static void intel_iommu_detach_device(struct iommu_domain *domain,
4468 				      struct device *dev)
4469 {
4470 	domain_remove_one_dev_info(to_dmar_domain(domain), dev);
4471 }
4472 
intel_iommu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t hpa,size_t size,int iommu_prot)4473 static int intel_iommu_map(struct iommu_domain *domain,
4474 			   unsigned long iova, phys_addr_t hpa,
4475 			   size_t size, int iommu_prot)
4476 {
4477 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4478 	u64 max_addr;
4479 	int prot = 0;
4480 	int ret;
4481 
4482 	if (iommu_prot & IOMMU_READ)
4483 		prot |= DMA_PTE_READ;
4484 	if (iommu_prot & IOMMU_WRITE)
4485 		prot |= DMA_PTE_WRITE;
4486 	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4487 		prot |= DMA_PTE_SNP;
4488 
4489 	max_addr = iova + size;
4490 	if (dmar_domain->max_addr < max_addr) {
4491 		u64 end;
4492 
4493 		/* check if minimum agaw is sufficient for mapped address */
4494 		end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4495 		if (end < max_addr) {
4496 			printk(KERN_ERR "%s: iommu width (%d) is not "
4497 			       "sufficient for the mapped address (%llx)\n",
4498 			       __func__, dmar_domain->gaw, max_addr);
4499 			return -EFAULT;
4500 		}
4501 		dmar_domain->max_addr = max_addr;
4502 	}
4503 	/* Round up size to next multiple of PAGE_SIZE, if it and
4504 	   the low bits of hpa would take us onto the next page */
4505 	size = aligned_nrpages(hpa, size);
4506 	ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4507 				 hpa >> VTD_PAGE_SHIFT, size, prot);
4508 	return ret;
4509 }
4510 
intel_iommu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size)4511 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4512 				unsigned long iova, size_t size)
4513 {
4514 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4515 	struct page *freelist = NULL;
4516 	struct intel_iommu *iommu;
4517 	unsigned long start_pfn, last_pfn;
4518 	unsigned int npages;
4519 	int iommu_id, num, ndomains, level = 0;
4520 
4521 	/* Cope with horrid API which requires us to unmap more than the
4522 	   size argument if it happens to be a large-page mapping. */
4523 	if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4524 		BUG();
4525 
4526 	if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4527 		size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4528 
4529 	start_pfn = iova >> VTD_PAGE_SHIFT;
4530 	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4531 
4532 	freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4533 
4534 	npages = last_pfn - start_pfn + 1;
4535 
4536 	for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4537                iommu = g_iommus[iommu_id];
4538 
4539                /*
4540                 * find bit position of dmar_domain
4541                 */
4542                ndomains = cap_ndoms(iommu->cap);
4543                for_each_set_bit(num, iommu->domain_ids, ndomains) {
4544                        if (iommu->domains[num] == dmar_domain)
4545                                iommu_flush_iotlb_psi(iommu, num, start_pfn,
4546 						     npages, !freelist, 0);
4547 	       }
4548 
4549 	}
4550 
4551 	dma_free_pagelist(freelist);
4552 
4553 	if (dmar_domain->max_addr == iova + size)
4554 		dmar_domain->max_addr = iova;
4555 
4556 	return size;
4557 }
4558 
intel_iommu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)4559 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4560 					    dma_addr_t iova)
4561 {
4562 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4563 	struct dma_pte *pte;
4564 	int level = 0;
4565 	u64 phys = 0;
4566 
4567 	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4568 	if (pte)
4569 		phys = dma_pte_addr(pte);
4570 
4571 	return phys;
4572 }
4573 
intel_iommu_capable(enum iommu_cap cap)4574 static bool intel_iommu_capable(enum iommu_cap cap)
4575 {
4576 	if (cap == IOMMU_CAP_CACHE_COHERENCY)
4577 		return domain_update_iommu_snooping(NULL) == 1;
4578 	if (cap == IOMMU_CAP_INTR_REMAP)
4579 		return irq_remapping_enabled == 1;
4580 
4581 	return false;
4582 }
4583 
intel_iommu_add_device(struct device * dev)4584 static int intel_iommu_add_device(struct device *dev)
4585 {
4586 	struct intel_iommu *iommu;
4587 	struct iommu_group *group;
4588 	u8 bus, devfn;
4589 
4590 	iommu = device_to_iommu(dev, &bus, &devfn);
4591 	if (!iommu)
4592 		return -ENODEV;
4593 
4594 	iommu_device_link(iommu->iommu_dev, dev);
4595 
4596 	group = iommu_group_get_for_dev(dev);
4597 
4598 	if (IS_ERR(group))
4599 		return PTR_ERR(group);
4600 
4601 	iommu_group_put(group);
4602 	return 0;
4603 }
4604 
intel_iommu_remove_device(struct device * dev)4605 static void intel_iommu_remove_device(struct device *dev)
4606 {
4607 	struct intel_iommu *iommu;
4608 	u8 bus, devfn;
4609 
4610 	iommu = device_to_iommu(dev, &bus, &devfn);
4611 	if (!iommu)
4612 		return;
4613 
4614 	iommu_group_remove_device(dev);
4615 
4616 	iommu_device_unlink(iommu->iommu_dev, dev);
4617 }
4618 
4619 static const struct iommu_ops intel_iommu_ops = {
4620 	.capable	= intel_iommu_capable,
4621 	.domain_alloc	= intel_iommu_domain_alloc,
4622 	.domain_free	= intel_iommu_domain_free,
4623 	.attach_dev	= intel_iommu_attach_device,
4624 	.detach_dev	= intel_iommu_detach_device,
4625 	.map		= intel_iommu_map,
4626 	.unmap		= intel_iommu_unmap,
4627 	.map_sg		= default_iommu_map_sg,
4628 	.iova_to_phys	= intel_iommu_iova_to_phys,
4629 	.add_device	= intel_iommu_add_device,
4630 	.remove_device	= intel_iommu_remove_device,
4631 	.pgsize_bitmap	= INTEL_IOMMU_PGSIZES,
4632 };
4633 
quirk_iommu_g4x_gfx(struct pci_dev * dev)4634 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4635 {
4636 	/* G4x/GM45 integrated gfx dmar support is totally busted. */
4637 	printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4638 	dmar_map_gfx = 0;
4639 }
4640 
4641 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4642 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4643 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4644 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4645 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4646 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4647 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4648 
quirk_iommu_rwbf(struct pci_dev * dev)4649 static void quirk_iommu_rwbf(struct pci_dev *dev)
4650 {
4651 	/*
4652 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
4653 	 * but needs it. Same seems to hold for the desktop versions.
4654 	 */
4655 	printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4656 	rwbf_quirk = 1;
4657 }
4658 
4659 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4660 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4661 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4662 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4663 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4664 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4665 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4666 
4667 #define GGC 0x52
4668 #define GGC_MEMORY_SIZE_MASK	(0xf << 8)
4669 #define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
4670 #define GGC_MEMORY_SIZE_1M	(0x1 << 8)
4671 #define GGC_MEMORY_SIZE_2M	(0x3 << 8)
4672 #define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
4673 #define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
4674 #define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
4675 #define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
4676 
quirk_calpella_no_shadow_gtt(struct pci_dev * dev)4677 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4678 {
4679 	unsigned short ggc;
4680 
4681 	if (pci_read_config_word(dev, GGC, &ggc))
4682 		return;
4683 
4684 	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4685 		printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4686 		dmar_map_gfx = 0;
4687 	} else if (dmar_map_gfx) {
4688 		/* we have to ensure the gfx device is idle before we flush */
4689 		printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4690 		intel_iommu_strict = 1;
4691        }
4692 }
4693 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4694 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4695 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4696 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4697 
4698 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4699    ISOCH DMAR unit for the Azalia sound device, but not give it any
4700    TLB entries, which causes it to deadlock. Check for that.  We do
4701    this in a function called from init_dmars(), instead of in a PCI
4702    quirk, because we don't want to print the obnoxious "BIOS broken"
4703    message if VT-d is actually disabled.
4704 */
check_tylersburg_isoch(void)4705 static void __init check_tylersburg_isoch(void)
4706 {
4707 	struct pci_dev *pdev;
4708 	uint32_t vtisochctrl;
4709 
4710 	/* If there's no Azalia in the system anyway, forget it. */
4711 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4712 	if (!pdev)
4713 		return;
4714 	pci_dev_put(pdev);
4715 
4716 	/* System Management Registers. Might be hidden, in which case
4717 	   we can't do the sanity check. But that's OK, because the
4718 	   known-broken BIOSes _don't_ actually hide it, so far. */
4719 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4720 	if (!pdev)
4721 		return;
4722 
4723 	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4724 		pci_dev_put(pdev);
4725 		return;
4726 	}
4727 
4728 	pci_dev_put(pdev);
4729 
4730 	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4731 	if (vtisochctrl & 1)
4732 		return;
4733 
4734 	/* Drop all bits other than the number of TLB entries */
4735 	vtisochctrl &= 0x1c;
4736 
4737 	/* If we have the recommended number of TLB entries (16), fine. */
4738 	if (vtisochctrl == 0x10)
4739 		return;
4740 
4741 	/* Zero TLB entries? You get to ride the short bus to school. */
4742 	if (!vtisochctrl) {
4743 		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4744 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4745 		     dmi_get_system_info(DMI_BIOS_VENDOR),
4746 		     dmi_get_system_info(DMI_BIOS_VERSION),
4747 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
4748 		iommu_identity_mapping |= IDENTMAP_AZALIA;
4749 		return;
4750 	}
4751 
4752 	printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4753 	       vtisochctrl);
4754 }
4755