1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 *
22 * This file implements early detection/parsing of Remapping Devices
23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24 * tables.
25 *
26 * These routines are used by both DMA-remapping and Interrupt-remapping
27 */
28
29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */
30
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/iova.h>
34#include <linux/intel-iommu.h>
35#include <linux/timer.h>
36#include <linux/irq.h>
37#include <linux/interrupt.h>
38#include <linux/tboot.h>
39#include <linux/dmi.h>
40#include <linux/slab.h>
41#include <linux/iommu.h>
42#include <asm/irq_remapping.h>
43#include <asm/iommu_table.h>
44
45#include "irq_remapping.h"
46
47typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
48struct dmar_res_callback {
49	dmar_res_handler_t	cb[ACPI_DMAR_TYPE_RESERVED];
50	void			*arg[ACPI_DMAR_TYPE_RESERVED];
51	bool			ignore_unhandled;
52	bool			print_entry;
53};
54
55/*
56 * Assumptions:
57 * 1) The hotplug framework guarentees that DMAR unit will be hot-added
58 *    before IO devices managed by that unit.
59 * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
60 *    after IO devices managed by that unit.
61 * 3) Hotplug events are rare.
62 *
63 * Locking rules for DMA and interrupt remapping related global data structures:
64 * 1) Use dmar_global_lock in process context
65 * 2) Use RCU in interrupt context
66 */
67DECLARE_RWSEM(dmar_global_lock);
68LIST_HEAD(dmar_drhd_units);
69
70struct acpi_table_header * __initdata dmar_tbl;
71static acpi_size dmar_tbl_size;
72static int dmar_dev_scope_status = 1;
73static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
74
75static int alloc_iommu(struct dmar_drhd_unit *drhd);
76static void free_iommu(struct intel_iommu *iommu);
77
78static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
79{
80	/*
81	 * add INCLUDE_ALL at the tail, so scan the list will find it at
82	 * the very end.
83	 */
84	if (drhd->include_all)
85		list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
86	else
87		list_add_rcu(&drhd->list, &dmar_drhd_units);
88}
89
90void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
91{
92	struct acpi_dmar_device_scope *scope;
93
94	*cnt = 0;
95	while (start < end) {
96		scope = start;
97		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
98		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
99		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
100			(*cnt)++;
101		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
102			scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
103			pr_warn("Unsupported device scope\n");
104		}
105		start += scope->length;
106	}
107	if (*cnt == 0)
108		return NULL;
109
110	return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
111}
112
113void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
114{
115	int i;
116	struct device *tmp_dev;
117
118	if (*devices && *cnt) {
119		for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
120			put_device(tmp_dev);
121		kfree(*devices);
122	}
123
124	*devices = NULL;
125	*cnt = 0;
126}
127
128/* Optimize out kzalloc()/kfree() for normal cases */
129static char dmar_pci_notify_info_buf[64];
130
131static struct dmar_pci_notify_info *
132dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
133{
134	int level = 0;
135	size_t size;
136	struct pci_dev *tmp;
137	struct dmar_pci_notify_info *info;
138
139	BUG_ON(dev->is_virtfn);
140
141	/* Only generate path[] for device addition event */
142	if (event == BUS_NOTIFY_ADD_DEVICE)
143		for (tmp = dev; tmp; tmp = tmp->bus->self)
144			level++;
145
146	size = sizeof(*info) + level * sizeof(struct acpi_dmar_pci_path);
147	if (size <= sizeof(dmar_pci_notify_info_buf)) {
148		info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
149	} else {
150		info = kzalloc(size, GFP_KERNEL);
151		if (!info) {
152			pr_warn("Out of memory when allocating notify_info "
153				"for %s.\n", pci_name(dev));
154			if (dmar_dev_scope_status == 0)
155				dmar_dev_scope_status = -ENOMEM;
156			return NULL;
157		}
158	}
159
160	info->event = event;
161	info->dev = dev;
162	info->seg = pci_domain_nr(dev->bus);
163	info->level = level;
164	if (event == BUS_NOTIFY_ADD_DEVICE) {
165		for (tmp = dev; tmp; tmp = tmp->bus->self) {
166			level--;
167			info->path[level].bus = tmp->bus->number;
168			info->path[level].device = PCI_SLOT(tmp->devfn);
169			info->path[level].function = PCI_FUNC(tmp->devfn);
170			if (pci_is_root_bus(tmp->bus))
171				info->bus = tmp->bus->number;
172		}
173	}
174
175	return info;
176}
177
178static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
179{
180	if ((void *)info != dmar_pci_notify_info_buf)
181		kfree(info);
182}
183
184static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
185				struct acpi_dmar_pci_path *path, int count)
186{
187	int i;
188
189	if (info->bus != bus)
190		goto fallback;
191	if (info->level != count)
192		goto fallback;
193
194	for (i = 0; i < count; i++) {
195		if (path[i].device != info->path[i].device ||
196		    path[i].function != info->path[i].function)
197			goto fallback;
198	}
199
200	return true;
201
202fallback:
203
204	if (count != 1)
205		return false;
206
207	i = info->level - 1;
208	if (bus              == info->path[i].bus &&
209	    path[0].device   == info->path[i].device &&
210	    path[0].function == info->path[i].function) {
211		pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
212			bus, path[0].device, path[0].function);
213		return true;
214	}
215
216	return false;
217}
218
219/* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
220int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
221			  void *start, void*end, u16 segment,
222			  struct dmar_dev_scope *devices,
223			  int devices_cnt)
224{
225	int i, level;
226	struct device *tmp, *dev = &info->dev->dev;
227	struct acpi_dmar_device_scope *scope;
228	struct acpi_dmar_pci_path *path;
229
230	if (segment != info->seg)
231		return 0;
232
233	for (; start < end; start += scope->length) {
234		scope = start;
235		if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
236		    scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
237			continue;
238
239		path = (struct acpi_dmar_pci_path *)(scope + 1);
240		level = (scope->length - sizeof(*scope)) / sizeof(*path);
241		if (!dmar_match_pci_path(info, scope->bus, path, level))
242			continue;
243
244		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT) ^
245		    (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL)) {
246			pr_warn("Device scope type does not match for %s\n",
247				pci_name(info->dev));
248			return -EINVAL;
249		}
250
251		for_each_dev_scope(devices, devices_cnt, i, tmp)
252			if (tmp == NULL) {
253				devices[i].bus = info->dev->bus->number;
254				devices[i].devfn = info->dev->devfn;
255				rcu_assign_pointer(devices[i].dev,
256						   get_device(dev));
257				return 1;
258			}
259		BUG_ON(i >= devices_cnt);
260	}
261
262	return 0;
263}
264
265int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
266			  struct dmar_dev_scope *devices, int count)
267{
268	int index;
269	struct device *tmp;
270
271	if (info->seg != segment)
272		return 0;
273
274	for_each_active_dev_scope(devices, count, index, tmp)
275		if (tmp == &info->dev->dev) {
276			RCU_INIT_POINTER(devices[index].dev, NULL);
277			synchronize_rcu();
278			put_device(tmp);
279			return 1;
280		}
281
282	return 0;
283}
284
285static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
286{
287	int ret = 0;
288	struct dmar_drhd_unit *dmaru;
289	struct acpi_dmar_hardware_unit *drhd;
290
291	for_each_drhd_unit(dmaru) {
292		if (dmaru->include_all)
293			continue;
294
295		drhd = container_of(dmaru->hdr,
296				    struct acpi_dmar_hardware_unit, header);
297		ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
298				((void *)drhd) + drhd->header.length,
299				dmaru->segment,
300				dmaru->devices, dmaru->devices_cnt);
301		if (ret != 0)
302			break;
303	}
304	if (ret >= 0)
305		ret = dmar_iommu_notify_scope_dev(info);
306	if (ret < 0 && dmar_dev_scope_status == 0)
307		dmar_dev_scope_status = ret;
308
309	return ret;
310}
311
312static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
313{
314	struct dmar_drhd_unit *dmaru;
315
316	for_each_drhd_unit(dmaru)
317		if (dmar_remove_dev_scope(info, dmaru->segment,
318			dmaru->devices, dmaru->devices_cnt))
319			break;
320	dmar_iommu_notify_scope_dev(info);
321}
322
323static int dmar_pci_bus_notifier(struct notifier_block *nb,
324				 unsigned long action, void *data)
325{
326	struct pci_dev *pdev = to_pci_dev(data);
327	struct dmar_pci_notify_info *info;
328
329	/* Only care about add/remove events for physical functions */
330	if (pdev->is_virtfn)
331		return NOTIFY_DONE;
332	if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_DEL_DEVICE)
333		return NOTIFY_DONE;
334
335	info = dmar_alloc_pci_notify_info(pdev, action);
336	if (!info)
337		return NOTIFY_DONE;
338
339	down_write(&dmar_global_lock);
340	if (action == BUS_NOTIFY_ADD_DEVICE)
341		dmar_pci_bus_add_dev(info);
342	else if (action == BUS_NOTIFY_DEL_DEVICE)
343		dmar_pci_bus_del_dev(info);
344	up_write(&dmar_global_lock);
345
346	dmar_free_pci_notify_info(info);
347
348	return NOTIFY_OK;
349}
350
351static struct notifier_block dmar_pci_bus_nb = {
352	.notifier_call = dmar_pci_bus_notifier,
353	.priority = INT_MIN,
354};
355
356static struct dmar_drhd_unit *
357dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
358{
359	struct dmar_drhd_unit *dmaru;
360
361	list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list)
362		if (dmaru->segment == drhd->segment &&
363		    dmaru->reg_base_addr == drhd->address)
364			return dmaru;
365
366	return NULL;
367}
368
369/**
370 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
371 * structure which uniquely represent one DMA remapping hardware unit
372 * present in the platform
373 */
374static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
375{
376	struct acpi_dmar_hardware_unit *drhd;
377	struct dmar_drhd_unit *dmaru;
378	int ret = 0;
379
380	drhd = (struct acpi_dmar_hardware_unit *)header;
381	dmaru = dmar_find_dmaru(drhd);
382	if (dmaru)
383		goto out;
384
385	dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
386	if (!dmaru)
387		return -ENOMEM;
388
389	/*
390	 * If header is allocated from slab by ACPI _DSM method, we need to
391	 * copy the content because the memory buffer will be freed on return.
392	 */
393	dmaru->hdr = (void *)(dmaru + 1);
394	memcpy(dmaru->hdr, header, header->length);
395	dmaru->reg_base_addr = drhd->address;
396	dmaru->segment = drhd->segment;
397	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
398	dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
399					      ((void *)drhd) + drhd->header.length,
400					      &dmaru->devices_cnt);
401	if (dmaru->devices_cnt && dmaru->devices == NULL) {
402		kfree(dmaru);
403		return -ENOMEM;
404	}
405
406	ret = alloc_iommu(dmaru);
407	if (ret) {
408		dmar_free_dev_scope(&dmaru->devices,
409				    &dmaru->devices_cnt);
410		kfree(dmaru);
411		return ret;
412	}
413	dmar_register_drhd_unit(dmaru);
414
415out:
416	if (arg)
417		(*(int *)arg)++;
418
419	return 0;
420}
421
422static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
423{
424	if (dmaru->devices && dmaru->devices_cnt)
425		dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
426	if (dmaru->iommu)
427		free_iommu(dmaru->iommu);
428	kfree(dmaru);
429}
430
431static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
432				      void *arg)
433{
434	struct acpi_dmar_andd *andd = (void *)header;
435
436	/* Check for NUL termination within the designated length */
437	if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
438		WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND,
439			   "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
440			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
441			   dmi_get_system_info(DMI_BIOS_VENDOR),
442			   dmi_get_system_info(DMI_BIOS_VERSION),
443			   dmi_get_system_info(DMI_PRODUCT_VERSION));
444		return -EINVAL;
445	}
446	pr_info("ANDD device: %x name: %s\n", andd->device_number,
447		andd->device_name);
448
449	return 0;
450}
451
452#ifdef CONFIG_ACPI_NUMA
453static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
454{
455	struct acpi_dmar_rhsa *rhsa;
456	struct dmar_drhd_unit *drhd;
457
458	rhsa = (struct acpi_dmar_rhsa *)header;
459	for_each_drhd_unit(drhd) {
460		if (drhd->reg_base_addr == rhsa->base_address) {
461			int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
462
463			if (!node_online(node))
464				node = -1;
465			drhd->iommu->node = node;
466			return 0;
467		}
468	}
469	WARN_TAINT(
470		1, TAINT_FIRMWARE_WORKAROUND,
471		"Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
472		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
473		drhd->reg_base_addr,
474		dmi_get_system_info(DMI_BIOS_VENDOR),
475		dmi_get_system_info(DMI_BIOS_VERSION),
476		dmi_get_system_info(DMI_PRODUCT_VERSION));
477
478	return 0;
479}
480#else
481#define	dmar_parse_one_rhsa		dmar_res_noop
482#endif
483
484static void __init
485dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
486{
487	struct acpi_dmar_hardware_unit *drhd;
488	struct acpi_dmar_reserved_memory *rmrr;
489	struct acpi_dmar_atsr *atsr;
490	struct acpi_dmar_rhsa *rhsa;
491
492	switch (header->type) {
493	case ACPI_DMAR_TYPE_HARDWARE_UNIT:
494		drhd = container_of(header, struct acpi_dmar_hardware_unit,
495				    header);
496		pr_info("DRHD base: %#016Lx flags: %#x\n",
497			(unsigned long long)drhd->address, drhd->flags);
498		break;
499	case ACPI_DMAR_TYPE_RESERVED_MEMORY:
500		rmrr = container_of(header, struct acpi_dmar_reserved_memory,
501				    header);
502		pr_info("RMRR base: %#016Lx end: %#016Lx\n",
503			(unsigned long long)rmrr->base_address,
504			(unsigned long long)rmrr->end_address);
505		break;
506	case ACPI_DMAR_TYPE_ROOT_ATS:
507		atsr = container_of(header, struct acpi_dmar_atsr, header);
508		pr_info("ATSR flags: %#x\n", atsr->flags);
509		break;
510	case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
511		rhsa = container_of(header, struct acpi_dmar_rhsa, header);
512		pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
513		       (unsigned long long)rhsa->base_address,
514		       rhsa->proximity_domain);
515		break;
516	case ACPI_DMAR_TYPE_NAMESPACE:
517		/* We don't print this here because we need to sanity-check
518		   it first. So print it in dmar_parse_one_andd() instead. */
519		break;
520	}
521}
522
523/**
524 * dmar_table_detect - checks to see if the platform supports DMAR devices
525 */
526static int __init dmar_table_detect(void)
527{
528	acpi_status status = AE_OK;
529
530	/* if we could find DMAR table, then there are DMAR devices */
531	status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
532				(struct acpi_table_header **)&dmar_tbl,
533				&dmar_tbl_size);
534
535	if (ACPI_SUCCESS(status) && !dmar_tbl) {
536		pr_warn("Unable to map DMAR\n");
537		status = AE_NOT_FOUND;
538	}
539
540	return (ACPI_SUCCESS(status) ? 1 : 0);
541}
542
543static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
544				       size_t len, struct dmar_res_callback *cb)
545{
546	int ret = 0;
547	struct acpi_dmar_header *iter, *next;
548	struct acpi_dmar_header *end = ((void *)start) + len;
549
550	for (iter = start; iter < end && ret == 0; iter = next) {
551		next = (void *)iter + iter->length;
552		if (iter->length == 0) {
553			/* Avoid looping forever on bad ACPI tables */
554			pr_debug(FW_BUG "Invalid 0-length structure\n");
555			break;
556		} else if (next > end) {
557			/* Avoid passing table end */
558			pr_warn(FW_BUG "record passes table end\n");
559			ret = -EINVAL;
560			break;
561		}
562
563		if (cb->print_entry)
564			dmar_table_print_dmar_entry(iter);
565
566		if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
567			/* continue for forward compatibility */
568			pr_debug("Unknown DMAR structure type %d\n",
569				 iter->type);
570		} else if (cb->cb[iter->type]) {
571			ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
572		} else if (!cb->ignore_unhandled) {
573			pr_warn("No handler for DMAR structure type %d\n",
574				iter->type);
575			ret = -EINVAL;
576		}
577	}
578
579	return ret;
580}
581
582static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
583				       struct dmar_res_callback *cb)
584{
585	return dmar_walk_remapping_entries((void *)(dmar + 1),
586			dmar->header.length - sizeof(*dmar), cb);
587}
588
589/**
590 * parse_dmar_table - parses the DMA reporting table
591 */
592static int __init
593parse_dmar_table(void)
594{
595	struct acpi_table_dmar *dmar;
596	int ret = 0;
597	int drhd_count = 0;
598	struct dmar_res_callback cb = {
599		.print_entry = true,
600		.ignore_unhandled = true,
601		.arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
602		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
603		.cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
604		.cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
605		.cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
606		.cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
607	};
608
609	/*
610	 * Do it again, earlier dmar_tbl mapping could be mapped with
611	 * fixed map.
612	 */
613	dmar_table_detect();
614
615	/*
616	 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
617	 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
618	 */
619	dmar_tbl = tboot_get_dmar_table(dmar_tbl);
620
621	dmar = (struct acpi_table_dmar *)dmar_tbl;
622	if (!dmar)
623		return -ENODEV;
624
625	if (dmar->width < PAGE_SHIFT - 1) {
626		pr_warn("Invalid DMAR haw\n");
627		return -EINVAL;
628	}
629
630	pr_info("Host address width %d\n", dmar->width + 1);
631	ret = dmar_walk_dmar_table(dmar, &cb);
632	if (ret == 0 && drhd_count == 0)
633		pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
634
635	return ret;
636}
637
638static int dmar_pci_device_match(struct dmar_dev_scope devices[],
639				 int cnt, struct pci_dev *dev)
640{
641	int index;
642	struct device *tmp;
643
644	while (dev) {
645		for_each_active_dev_scope(devices, cnt, index, tmp)
646			if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
647				return 1;
648
649		/* Check our parent */
650		dev = dev->bus->self;
651	}
652
653	return 0;
654}
655
656struct dmar_drhd_unit *
657dmar_find_matched_drhd_unit(struct pci_dev *dev)
658{
659	struct dmar_drhd_unit *dmaru;
660	struct acpi_dmar_hardware_unit *drhd;
661
662	dev = pci_physfn(dev);
663
664	rcu_read_lock();
665	for_each_drhd_unit(dmaru) {
666		drhd = container_of(dmaru->hdr,
667				    struct acpi_dmar_hardware_unit,
668				    header);
669
670		if (dmaru->include_all &&
671		    drhd->segment == pci_domain_nr(dev->bus))
672			goto out;
673
674		if (dmar_pci_device_match(dmaru->devices,
675					  dmaru->devices_cnt, dev))
676			goto out;
677	}
678	dmaru = NULL;
679out:
680	rcu_read_unlock();
681
682	return dmaru;
683}
684
685static void __init dmar_acpi_insert_dev_scope(u8 device_number,
686					      struct acpi_device *adev)
687{
688	struct dmar_drhd_unit *dmaru;
689	struct acpi_dmar_hardware_unit *drhd;
690	struct acpi_dmar_device_scope *scope;
691	struct device *tmp;
692	int i;
693	struct acpi_dmar_pci_path *path;
694
695	for_each_drhd_unit(dmaru) {
696		drhd = container_of(dmaru->hdr,
697				    struct acpi_dmar_hardware_unit,
698				    header);
699
700		for (scope = (void *)(drhd + 1);
701		     (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
702		     scope = ((void *)scope) + scope->length) {
703			if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
704				continue;
705			if (scope->enumeration_id != device_number)
706				continue;
707
708			path = (void *)(scope + 1);
709			pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
710				dev_name(&adev->dev), dmaru->reg_base_addr,
711				scope->bus, path->device, path->function);
712			for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
713				if (tmp == NULL) {
714					dmaru->devices[i].bus = scope->bus;
715					dmaru->devices[i].devfn = PCI_DEVFN(path->device,
716									    path->function);
717					rcu_assign_pointer(dmaru->devices[i].dev,
718							   get_device(&adev->dev));
719					return;
720				}
721			BUG_ON(i >= dmaru->devices_cnt);
722		}
723	}
724	pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
725		device_number, dev_name(&adev->dev));
726}
727
728static int __init dmar_acpi_dev_scope_init(void)
729{
730	struct acpi_dmar_andd *andd;
731
732	if (dmar_tbl == NULL)
733		return -ENODEV;
734
735	for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
736	     ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
737	     andd = ((void *)andd) + andd->header.length) {
738		if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
739			acpi_handle h;
740			struct acpi_device *adev;
741
742			if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
743							  andd->device_name,
744							  &h))) {
745				pr_err("Failed to find handle for ACPI object %s\n",
746				       andd->device_name);
747				continue;
748			}
749			if (acpi_bus_get_device(h, &adev)) {
750				pr_err("Failed to get device for ACPI object %s\n",
751				       andd->device_name);
752				continue;
753			}
754			dmar_acpi_insert_dev_scope(andd->device_number, adev);
755		}
756	}
757	return 0;
758}
759
760int __init dmar_dev_scope_init(void)
761{
762	struct pci_dev *dev = NULL;
763	struct dmar_pci_notify_info *info;
764
765	if (dmar_dev_scope_status != 1)
766		return dmar_dev_scope_status;
767
768	if (list_empty(&dmar_drhd_units)) {
769		dmar_dev_scope_status = -ENODEV;
770	} else {
771		dmar_dev_scope_status = 0;
772
773		dmar_acpi_dev_scope_init();
774
775		for_each_pci_dev(dev) {
776			if (dev->is_virtfn)
777				continue;
778
779			info = dmar_alloc_pci_notify_info(dev,
780					BUS_NOTIFY_ADD_DEVICE);
781			if (!info) {
782				return dmar_dev_scope_status;
783			} else {
784				dmar_pci_bus_add_dev(info);
785				dmar_free_pci_notify_info(info);
786			}
787		}
788
789		bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
790	}
791
792	return dmar_dev_scope_status;
793}
794
795
796int __init dmar_table_init(void)
797{
798	static int dmar_table_initialized;
799	int ret;
800
801	if (dmar_table_initialized == 0) {
802		ret = parse_dmar_table();
803		if (ret < 0) {
804			if (ret != -ENODEV)
805				pr_info("parse DMAR table failure.\n");
806		} else  if (list_empty(&dmar_drhd_units)) {
807			pr_info("No DMAR devices found\n");
808			ret = -ENODEV;
809		}
810
811		if (ret < 0)
812			dmar_table_initialized = ret;
813		else
814			dmar_table_initialized = 1;
815	}
816
817	return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
818}
819
820static void warn_invalid_dmar(u64 addr, const char *message)
821{
822	WARN_TAINT_ONCE(
823		1, TAINT_FIRMWARE_WORKAROUND,
824		"Your BIOS is broken; DMAR reported at address %llx%s!\n"
825		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
826		addr, message,
827		dmi_get_system_info(DMI_BIOS_VENDOR),
828		dmi_get_system_info(DMI_BIOS_VERSION),
829		dmi_get_system_info(DMI_PRODUCT_VERSION));
830}
831
832static int __ref
833dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
834{
835	struct acpi_dmar_hardware_unit *drhd;
836	void __iomem *addr;
837	u64 cap, ecap;
838
839	drhd = (void *)entry;
840	if (!drhd->address) {
841		warn_invalid_dmar(0, "");
842		return -EINVAL;
843	}
844
845	if (arg)
846		addr = ioremap(drhd->address, VTD_PAGE_SIZE);
847	else
848		addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
849	if (!addr) {
850		pr_warn("IOMMU: can't validate: %llx\n", drhd->address);
851		return -EINVAL;
852	}
853
854	cap = dmar_readq(addr + DMAR_CAP_REG);
855	ecap = dmar_readq(addr + DMAR_ECAP_REG);
856
857	if (arg)
858		iounmap(addr);
859	else
860		early_iounmap(addr, VTD_PAGE_SIZE);
861
862	if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
863		warn_invalid_dmar(drhd->address, " returns all ones");
864		return -EINVAL;
865	}
866
867	return 0;
868}
869
870int __init detect_intel_iommu(void)
871{
872	int ret;
873	struct dmar_res_callback validate_drhd_cb = {
874		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
875		.ignore_unhandled = true,
876	};
877
878	down_write(&dmar_global_lock);
879	ret = dmar_table_detect();
880	if (ret)
881		ret = !dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
882					    &validate_drhd_cb);
883	if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
884		iommu_detected = 1;
885		/* Make sure ACS will be enabled */
886		pci_request_acs();
887	}
888
889#ifdef CONFIG_X86
890	if (ret)
891		x86_init.iommu.iommu_init = intel_iommu_init;
892#endif
893
894	early_acpi_os_unmap_memory((void __iomem *)dmar_tbl, dmar_tbl_size);
895	dmar_tbl = NULL;
896	up_write(&dmar_global_lock);
897
898	return ret ? 1 : -ENODEV;
899}
900
901
902static void unmap_iommu(struct intel_iommu *iommu)
903{
904	iounmap(iommu->reg);
905	release_mem_region(iommu->reg_phys, iommu->reg_size);
906}
907
908/**
909 * map_iommu: map the iommu's registers
910 * @iommu: the iommu to map
911 * @phys_addr: the physical address of the base resgister
912 *
913 * Memory map the iommu's registers.  Start w/ a single page, and
914 * possibly expand if that turns out to be insufficent.
915 */
916static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
917{
918	int map_size, err=0;
919
920	iommu->reg_phys = phys_addr;
921	iommu->reg_size = VTD_PAGE_SIZE;
922
923	if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
924		pr_err("IOMMU: can't reserve memory\n");
925		err = -EBUSY;
926		goto out;
927	}
928
929	iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
930	if (!iommu->reg) {
931		pr_err("IOMMU: can't map the region\n");
932		err = -ENOMEM;
933		goto release;
934	}
935
936	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
937	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
938
939	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
940		err = -EINVAL;
941		warn_invalid_dmar(phys_addr, " returns all ones");
942		goto unmap;
943	}
944
945	/* the registers might be more than one page */
946	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
947			 cap_max_fault_reg_offset(iommu->cap));
948	map_size = VTD_PAGE_ALIGN(map_size);
949	if (map_size > iommu->reg_size) {
950		iounmap(iommu->reg);
951		release_mem_region(iommu->reg_phys, iommu->reg_size);
952		iommu->reg_size = map_size;
953		if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
954					iommu->name)) {
955			pr_err("IOMMU: can't reserve memory\n");
956			err = -EBUSY;
957			goto out;
958		}
959		iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
960		if (!iommu->reg) {
961			pr_err("IOMMU: can't map the region\n");
962			err = -ENOMEM;
963			goto release;
964		}
965	}
966	err = 0;
967	goto out;
968
969unmap:
970	iounmap(iommu->reg);
971release:
972	release_mem_region(iommu->reg_phys, iommu->reg_size);
973out:
974	return err;
975}
976
977static int dmar_alloc_seq_id(struct intel_iommu *iommu)
978{
979	iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
980					    DMAR_UNITS_SUPPORTED);
981	if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
982		iommu->seq_id = -1;
983	} else {
984		set_bit(iommu->seq_id, dmar_seq_ids);
985		sprintf(iommu->name, "dmar%d", iommu->seq_id);
986	}
987
988	return iommu->seq_id;
989}
990
991static void dmar_free_seq_id(struct intel_iommu *iommu)
992{
993	if (iommu->seq_id >= 0) {
994		clear_bit(iommu->seq_id, dmar_seq_ids);
995		iommu->seq_id = -1;
996	}
997}
998
999static int alloc_iommu(struct dmar_drhd_unit *drhd)
1000{
1001	struct intel_iommu *iommu;
1002	u32 ver, sts;
1003	int agaw = 0;
1004	int msagaw = 0;
1005	int err;
1006
1007	if (!drhd->reg_base_addr) {
1008		warn_invalid_dmar(0, "");
1009		return -EINVAL;
1010	}
1011
1012	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1013	if (!iommu)
1014		return -ENOMEM;
1015
1016	if (dmar_alloc_seq_id(iommu) < 0) {
1017		pr_err("IOMMU: failed to allocate seq_id\n");
1018		err = -ENOSPC;
1019		goto error;
1020	}
1021
1022	err = map_iommu(iommu, drhd->reg_base_addr);
1023	if (err) {
1024		pr_err("IOMMU: failed to map %s\n", iommu->name);
1025		goto error_free_seq_id;
1026	}
1027
1028	err = -EINVAL;
1029	agaw = iommu_calculate_agaw(iommu);
1030	if (agaw < 0) {
1031		pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1032			iommu->seq_id);
1033		goto err_unmap;
1034	}
1035	msagaw = iommu_calculate_max_sagaw(iommu);
1036	if (msagaw < 0) {
1037		pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1038			iommu->seq_id);
1039		goto err_unmap;
1040	}
1041	iommu->agaw = agaw;
1042	iommu->msagaw = msagaw;
1043	iommu->segment = drhd->segment;
1044
1045	iommu->node = -1;
1046
1047	ver = readl(iommu->reg + DMAR_VER_REG);
1048	pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1049		iommu->seq_id,
1050		(unsigned long long)drhd->reg_base_addr,
1051		DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1052		(unsigned long long)iommu->cap,
1053		(unsigned long long)iommu->ecap);
1054
1055	/* Reflect status in gcmd */
1056	sts = readl(iommu->reg + DMAR_GSTS_REG);
1057	if (sts & DMA_GSTS_IRES)
1058		iommu->gcmd |= DMA_GCMD_IRE;
1059	if (sts & DMA_GSTS_TES)
1060		iommu->gcmd |= DMA_GCMD_TE;
1061	if (sts & DMA_GSTS_QIES)
1062		iommu->gcmd |= DMA_GCMD_QIE;
1063
1064	raw_spin_lock_init(&iommu->register_lock);
1065
1066	drhd->iommu = iommu;
1067
1068	if (intel_iommu_enabled)
1069		iommu->iommu_dev = iommu_device_create(NULL, iommu,
1070						       intel_iommu_groups,
1071						       iommu->name);
1072
1073	return 0;
1074
1075err_unmap:
1076	unmap_iommu(iommu);
1077error_free_seq_id:
1078	dmar_free_seq_id(iommu);
1079error:
1080	kfree(iommu);
1081	return err;
1082}
1083
1084static void free_iommu(struct intel_iommu *iommu)
1085{
1086	iommu_device_destroy(iommu->iommu_dev);
1087
1088	if (iommu->irq) {
1089		free_irq(iommu->irq, iommu);
1090		irq_set_handler_data(iommu->irq, NULL);
1091		dmar_free_hwirq(iommu->irq);
1092	}
1093
1094	if (iommu->qi) {
1095		free_page((unsigned long)iommu->qi->desc);
1096		kfree(iommu->qi->desc_status);
1097		kfree(iommu->qi);
1098	}
1099
1100	if (iommu->reg)
1101		unmap_iommu(iommu);
1102
1103	dmar_free_seq_id(iommu);
1104	kfree(iommu);
1105}
1106
1107/*
1108 * Reclaim all the submitted descriptors which have completed its work.
1109 */
1110static inline void reclaim_free_desc(struct q_inval *qi)
1111{
1112	while (qi->desc_status[qi->free_tail] == QI_DONE ||
1113	       qi->desc_status[qi->free_tail] == QI_ABORT) {
1114		qi->desc_status[qi->free_tail] = QI_FREE;
1115		qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1116		qi->free_cnt++;
1117	}
1118}
1119
1120static int qi_check_fault(struct intel_iommu *iommu, int index)
1121{
1122	u32 fault;
1123	int head, tail;
1124	struct q_inval *qi = iommu->qi;
1125	int wait_index = (index + 1) % QI_LENGTH;
1126
1127	if (qi->desc_status[wait_index] == QI_ABORT)
1128		return -EAGAIN;
1129
1130	fault = readl(iommu->reg + DMAR_FSTS_REG);
1131
1132	/*
1133	 * If IQE happens, the head points to the descriptor associated
1134	 * with the error. No new descriptors are fetched until the IQE
1135	 * is cleared.
1136	 */
1137	if (fault & DMA_FSTS_IQE) {
1138		head = readl(iommu->reg + DMAR_IQH_REG);
1139		if ((head >> DMAR_IQ_SHIFT) == index) {
1140			pr_err("VT-d detected invalid descriptor: "
1141				"low=%llx, high=%llx\n",
1142				(unsigned long long)qi->desc[index].low,
1143				(unsigned long long)qi->desc[index].high);
1144			memcpy(&qi->desc[index], &qi->desc[wait_index],
1145					sizeof(struct qi_desc));
1146			__iommu_flush_cache(iommu, &qi->desc[index],
1147					sizeof(struct qi_desc));
1148			writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1149			return -EINVAL;
1150		}
1151	}
1152
1153	/*
1154	 * If ITE happens, all pending wait_desc commands are aborted.
1155	 * No new descriptors are fetched until the ITE is cleared.
1156	 */
1157	if (fault & DMA_FSTS_ITE) {
1158		head = readl(iommu->reg + DMAR_IQH_REG);
1159		head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
1160		head |= 1;
1161		tail = readl(iommu->reg + DMAR_IQT_REG);
1162		tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
1163
1164		writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1165
1166		do {
1167			if (qi->desc_status[head] == QI_IN_USE)
1168				qi->desc_status[head] = QI_ABORT;
1169			head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1170		} while (head != tail);
1171
1172		if (qi->desc_status[wait_index] == QI_ABORT)
1173			return -EAGAIN;
1174	}
1175
1176	if (fault & DMA_FSTS_ICE)
1177		writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1178
1179	return 0;
1180}
1181
1182/*
1183 * Submit the queued invalidation descriptor to the remapping
1184 * hardware unit and wait for its completion.
1185 */
1186int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
1187{
1188	int rc;
1189	struct q_inval *qi = iommu->qi;
1190	struct qi_desc *hw, wait_desc;
1191	int wait_index, index;
1192	unsigned long flags;
1193
1194	if (!qi)
1195		return 0;
1196
1197	hw = qi->desc;
1198
1199restart:
1200	rc = 0;
1201
1202	raw_spin_lock_irqsave(&qi->q_lock, flags);
1203	while (qi->free_cnt < 3) {
1204		raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1205		cpu_relax();
1206		raw_spin_lock_irqsave(&qi->q_lock, flags);
1207	}
1208
1209	index = qi->free_head;
1210	wait_index = (index + 1) % QI_LENGTH;
1211
1212	qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
1213
1214	hw[index] = *desc;
1215
1216	wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
1217			QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1218	wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
1219
1220	hw[wait_index] = wait_desc;
1221
1222	__iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
1223	__iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
1224
1225	qi->free_head = (qi->free_head + 2) % QI_LENGTH;
1226	qi->free_cnt -= 2;
1227
1228	/*
1229	 * update the HW tail register indicating the presence of
1230	 * new descriptors.
1231	 */
1232	writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
1233
1234	while (qi->desc_status[wait_index] != QI_DONE) {
1235		/*
1236		 * We will leave the interrupts disabled, to prevent interrupt
1237		 * context to queue another cmd while a cmd is already submitted
1238		 * and waiting for completion on this cpu. This is to avoid
1239		 * a deadlock where the interrupt context can wait indefinitely
1240		 * for free slots in the queue.
1241		 */
1242		rc = qi_check_fault(iommu, index);
1243		if (rc)
1244			break;
1245
1246		raw_spin_unlock(&qi->q_lock);
1247		cpu_relax();
1248		raw_spin_lock(&qi->q_lock);
1249	}
1250
1251	qi->desc_status[index] = QI_DONE;
1252
1253	reclaim_free_desc(qi);
1254	raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1255
1256	if (rc == -EAGAIN)
1257		goto restart;
1258
1259	return rc;
1260}
1261
1262/*
1263 * Flush the global interrupt entry cache.
1264 */
1265void qi_global_iec(struct intel_iommu *iommu)
1266{
1267	struct qi_desc desc;
1268
1269	desc.low = QI_IEC_TYPE;
1270	desc.high = 0;
1271
1272	/* should never fail */
1273	qi_submit_sync(&desc, iommu);
1274}
1275
1276void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1277		      u64 type)
1278{
1279	struct qi_desc desc;
1280
1281	desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1282			| QI_CC_GRAN(type) | QI_CC_TYPE;
1283	desc.high = 0;
1284
1285	qi_submit_sync(&desc, iommu);
1286}
1287
1288void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1289		    unsigned int size_order, u64 type)
1290{
1291	u8 dw = 0, dr = 0;
1292
1293	struct qi_desc desc;
1294	int ih = 0;
1295
1296	if (cap_write_drain(iommu->cap))
1297		dw = 1;
1298
1299	if (cap_read_drain(iommu->cap))
1300		dr = 1;
1301
1302	desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1303		| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1304	desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1305		| QI_IOTLB_AM(size_order);
1306
1307	qi_submit_sync(&desc, iommu);
1308}
1309
1310void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
1311			u64 addr, unsigned mask)
1312{
1313	struct qi_desc desc;
1314
1315	if (mask) {
1316		BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
1317		addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1318		desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1319	} else
1320		desc.high = QI_DEV_IOTLB_ADDR(addr);
1321
1322	if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1323		qdep = 0;
1324
1325	desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1326		   QI_DIOTLB_TYPE;
1327
1328	qi_submit_sync(&desc, iommu);
1329}
1330
1331/*
1332 * Disable Queued Invalidation interface.
1333 */
1334void dmar_disable_qi(struct intel_iommu *iommu)
1335{
1336	unsigned long flags;
1337	u32 sts;
1338	cycles_t start_time = get_cycles();
1339
1340	if (!ecap_qis(iommu->ecap))
1341		return;
1342
1343	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1344
1345	sts =  readl(iommu->reg + DMAR_GSTS_REG);
1346	if (!(sts & DMA_GSTS_QIES))
1347		goto end;
1348
1349	/*
1350	 * Give a chance to HW to complete the pending invalidation requests.
1351	 */
1352	while ((readl(iommu->reg + DMAR_IQT_REG) !=
1353		readl(iommu->reg + DMAR_IQH_REG)) &&
1354		(DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1355		cpu_relax();
1356
1357	iommu->gcmd &= ~DMA_GCMD_QIE;
1358	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1359
1360	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1361		      !(sts & DMA_GSTS_QIES), sts);
1362end:
1363	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1364}
1365
1366/*
1367 * Enable queued invalidation.
1368 */
1369static void __dmar_enable_qi(struct intel_iommu *iommu)
1370{
1371	u32 sts;
1372	unsigned long flags;
1373	struct q_inval *qi = iommu->qi;
1374
1375	qi->free_head = qi->free_tail = 0;
1376	qi->free_cnt = QI_LENGTH;
1377
1378	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1379
1380	/* write zero to the tail reg */
1381	writel(0, iommu->reg + DMAR_IQT_REG);
1382
1383	dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
1384
1385	iommu->gcmd |= DMA_GCMD_QIE;
1386	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1387
1388	/* Make sure hardware complete it */
1389	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1390
1391	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1392}
1393
1394/*
1395 * Enable Queued Invalidation interface. This is a must to support
1396 * interrupt-remapping. Also used by DMA-remapping, which replaces
1397 * register based IOTLB invalidation.
1398 */
1399int dmar_enable_qi(struct intel_iommu *iommu)
1400{
1401	struct q_inval *qi;
1402	struct page *desc_page;
1403
1404	if (!ecap_qis(iommu->ecap))
1405		return -ENOENT;
1406
1407	/*
1408	 * queued invalidation is already setup and enabled.
1409	 */
1410	if (iommu->qi)
1411		return 0;
1412
1413	iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1414	if (!iommu->qi)
1415		return -ENOMEM;
1416
1417	qi = iommu->qi;
1418
1419
1420	desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
1421	if (!desc_page) {
1422		kfree(qi);
1423		iommu->qi = NULL;
1424		return -ENOMEM;
1425	}
1426
1427	qi->desc = page_address(desc_page);
1428
1429	qi->desc_status = kzalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1430	if (!qi->desc_status) {
1431		free_page((unsigned long) qi->desc);
1432		kfree(qi);
1433		iommu->qi = NULL;
1434		return -ENOMEM;
1435	}
1436
1437	raw_spin_lock_init(&qi->q_lock);
1438
1439	__dmar_enable_qi(iommu);
1440
1441	return 0;
1442}
1443
1444/* iommu interrupt handling. Most stuff are MSI-like. */
1445
1446enum faulttype {
1447	DMA_REMAP,
1448	INTR_REMAP,
1449	UNKNOWN,
1450};
1451
1452static const char *dma_remap_fault_reasons[] =
1453{
1454	"Software",
1455	"Present bit in root entry is clear",
1456	"Present bit in context entry is clear",
1457	"Invalid context entry",
1458	"Access beyond MGAW",
1459	"PTE Write access is not set",
1460	"PTE Read access is not set",
1461	"Next page table ptr is invalid",
1462	"Root table address invalid",
1463	"Context table ptr is invalid",
1464	"non-zero reserved fields in RTP",
1465	"non-zero reserved fields in CTP",
1466	"non-zero reserved fields in PTE",
1467	"PCE for translation request specifies blocking",
1468};
1469
1470static const char *irq_remap_fault_reasons[] =
1471{
1472	"Detected reserved fields in the decoded interrupt-remapped request",
1473	"Interrupt index exceeded the interrupt-remapping table size",
1474	"Present field in the IRTE entry is clear",
1475	"Error accessing interrupt-remapping table pointed by IRTA_REG",
1476	"Detected reserved fields in the IRTE entry",
1477	"Blocked a compatibility format interrupt request",
1478	"Blocked an interrupt request due to source-id verification failure",
1479};
1480
1481static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1482{
1483	if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1484					ARRAY_SIZE(irq_remap_fault_reasons))) {
1485		*fault_type = INTR_REMAP;
1486		return irq_remap_fault_reasons[fault_reason - 0x20];
1487	} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1488		*fault_type = DMA_REMAP;
1489		return dma_remap_fault_reasons[fault_reason];
1490	} else {
1491		*fault_type = UNKNOWN;
1492		return "Unknown";
1493	}
1494}
1495
1496void dmar_msi_unmask(struct irq_data *data)
1497{
1498	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1499	unsigned long flag;
1500
1501	/* unmask it */
1502	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1503	writel(0, iommu->reg + DMAR_FECTL_REG);
1504	/* Read a reg to force flush the post write */
1505	readl(iommu->reg + DMAR_FECTL_REG);
1506	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1507}
1508
1509void dmar_msi_mask(struct irq_data *data)
1510{
1511	unsigned long flag;
1512	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1513
1514	/* mask it */
1515	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1516	writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1517	/* Read a reg to force flush the post write */
1518	readl(iommu->reg + DMAR_FECTL_REG);
1519	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1520}
1521
1522void dmar_msi_write(int irq, struct msi_msg *msg)
1523{
1524	struct intel_iommu *iommu = irq_get_handler_data(irq);
1525	unsigned long flag;
1526
1527	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1528	writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1529	writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1530	writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1531	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1532}
1533
1534void dmar_msi_read(int irq, struct msi_msg *msg)
1535{
1536	struct intel_iommu *iommu = irq_get_handler_data(irq);
1537	unsigned long flag;
1538
1539	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1540	msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1541	msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1542	msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1543	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1544}
1545
1546static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1547		u8 fault_reason, u16 source_id, unsigned long long addr)
1548{
1549	const char *reason;
1550	int fault_type;
1551
1552	reason = dmar_get_fault_reason(fault_reason, &fault_type);
1553
1554	if (fault_type == INTR_REMAP)
1555		pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
1556		       "fault index %llx\n"
1557			"INTR-REMAP:[fault reason %02d] %s\n",
1558			(source_id >> 8), PCI_SLOT(source_id & 0xFF),
1559			PCI_FUNC(source_id & 0xFF), addr >> 48,
1560			fault_reason, reason);
1561	else
1562		pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
1563		       "fault addr %llx \n"
1564		       "DMAR:[fault reason %02d] %s\n",
1565		       (type ? "DMA Read" : "DMA Write"),
1566		       (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1567		       PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1568	return 0;
1569}
1570
1571#define PRIMARY_FAULT_REG_LEN (16)
1572irqreturn_t dmar_fault(int irq, void *dev_id)
1573{
1574	struct intel_iommu *iommu = dev_id;
1575	int reg, fault_index;
1576	u32 fault_status;
1577	unsigned long flag;
1578
1579	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1580	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1581	if (fault_status)
1582		pr_err("DRHD: handling fault status reg %x\n", fault_status);
1583
1584	/* TBD: ignore advanced fault log currently */
1585	if (!(fault_status & DMA_FSTS_PPF))
1586		goto unlock_exit;
1587
1588	fault_index = dma_fsts_fault_record_index(fault_status);
1589	reg = cap_fault_reg_offset(iommu->cap);
1590	while (1) {
1591		u8 fault_reason;
1592		u16 source_id;
1593		u64 guest_addr;
1594		int type;
1595		u32 data;
1596
1597		/* highest 32 bits */
1598		data = readl(iommu->reg + reg +
1599				fault_index * PRIMARY_FAULT_REG_LEN + 12);
1600		if (!(data & DMA_FRCD_F))
1601			break;
1602
1603		fault_reason = dma_frcd_fault_reason(data);
1604		type = dma_frcd_type(data);
1605
1606		data = readl(iommu->reg + reg +
1607				fault_index * PRIMARY_FAULT_REG_LEN + 8);
1608		source_id = dma_frcd_source_id(data);
1609
1610		guest_addr = dmar_readq(iommu->reg + reg +
1611				fault_index * PRIMARY_FAULT_REG_LEN);
1612		guest_addr = dma_frcd_page_addr(guest_addr);
1613		/* clear the fault */
1614		writel(DMA_FRCD_F, iommu->reg + reg +
1615			fault_index * PRIMARY_FAULT_REG_LEN + 12);
1616
1617		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1618
1619		dmar_fault_do_one(iommu, type, fault_reason,
1620				source_id, guest_addr);
1621
1622		fault_index++;
1623		if (fault_index >= cap_num_fault_regs(iommu->cap))
1624			fault_index = 0;
1625		raw_spin_lock_irqsave(&iommu->register_lock, flag);
1626	}
1627
1628	writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG);
1629
1630unlock_exit:
1631	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1632	return IRQ_HANDLED;
1633}
1634
1635int dmar_set_interrupt(struct intel_iommu *iommu)
1636{
1637	int irq, ret;
1638
1639	/*
1640	 * Check if the fault interrupt is already initialized.
1641	 */
1642	if (iommu->irq)
1643		return 0;
1644
1645	irq = dmar_alloc_hwirq();
1646	if (irq <= 0) {
1647		pr_err("IOMMU: no free vectors\n");
1648		return -EINVAL;
1649	}
1650
1651	irq_set_handler_data(irq, iommu);
1652	iommu->irq = irq;
1653
1654	ret = arch_setup_dmar_msi(irq);
1655	if (ret) {
1656		irq_set_handler_data(irq, NULL);
1657		iommu->irq = 0;
1658		dmar_free_hwirq(irq);
1659		return ret;
1660	}
1661
1662	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1663	if (ret)
1664		pr_err("IOMMU: can't request irq\n");
1665	return ret;
1666}
1667
1668int __init enable_drhd_fault_handling(void)
1669{
1670	struct dmar_drhd_unit *drhd;
1671	struct intel_iommu *iommu;
1672
1673	/*
1674	 * Enable fault control interrupt.
1675	 */
1676	for_each_iommu(iommu, drhd) {
1677		u32 fault_status;
1678		int ret = dmar_set_interrupt(iommu);
1679
1680		if (ret) {
1681			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1682			       (unsigned long long)drhd->reg_base_addr, ret);
1683			return -1;
1684		}
1685
1686		/*
1687		 * Clear any previous faults.
1688		 */
1689		dmar_fault(iommu->irq, iommu);
1690		fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1691		writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1692	}
1693
1694	return 0;
1695}
1696
1697/*
1698 * Re-enable Queued Invalidation interface.
1699 */
1700int dmar_reenable_qi(struct intel_iommu *iommu)
1701{
1702	if (!ecap_qis(iommu->ecap))
1703		return -ENOENT;
1704
1705	if (!iommu->qi)
1706		return -ENOENT;
1707
1708	/*
1709	 * First disable queued invalidation.
1710	 */
1711	dmar_disable_qi(iommu);
1712	/*
1713	 * Then enable queued invalidation again. Since there is no pending
1714	 * invalidation requests now, it's safe to re-enable queued
1715	 * invalidation.
1716	 */
1717	__dmar_enable_qi(iommu);
1718
1719	return 0;
1720}
1721
1722/*
1723 * Check interrupt remapping support in DMAR table description.
1724 */
1725int __init dmar_ir_support(void)
1726{
1727	struct acpi_table_dmar *dmar;
1728	dmar = (struct acpi_table_dmar *)dmar_tbl;
1729	if (!dmar)
1730		return 0;
1731	return dmar->flags & 0x1;
1732}
1733
1734/* Check whether DMAR units are in use */
1735static inline bool dmar_in_use(void)
1736{
1737	return irq_remapping_enabled || intel_iommu_enabled;
1738}
1739
1740static int __init dmar_free_unused_resources(void)
1741{
1742	struct dmar_drhd_unit *dmaru, *dmaru_n;
1743
1744	if (dmar_in_use())
1745		return 0;
1746
1747	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
1748		bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
1749
1750	down_write(&dmar_global_lock);
1751	list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
1752		list_del(&dmaru->list);
1753		dmar_free_drhd(dmaru);
1754	}
1755	up_write(&dmar_global_lock);
1756
1757	return 0;
1758}
1759
1760late_initcall(dmar_free_unused_resources);
1761IOMMU_INIT_POST(detect_intel_iommu);
1762
1763/*
1764 * DMAR Hotplug Support
1765 * For more details, please refer to Intel(R) Virtualization Technology
1766 * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
1767 * "Remapping Hardware Unit Hot Plug".
1768 */
1769static u8 dmar_hp_uuid[] = {
1770	/* 0000 */    0xA6, 0xA3, 0xC1, 0xD8, 0x9B, 0xBE, 0x9B, 0x4C,
1771	/* 0008 */    0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF
1772};
1773
1774/*
1775 * Currently there's only one revision and BIOS will not check the revision id,
1776 * so use 0 for safety.
1777 */
1778#define	DMAR_DSM_REV_ID			0
1779#define	DMAR_DSM_FUNC_DRHD		1
1780#define	DMAR_DSM_FUNC_ATSR		2
1781#define	DMAR_DSM_FUNC_RHSA		3
1782
1783static inline bool dmar_detect_dsm(acpi_handle handle, int func)
1784{
1785	return acpi_check_dsm(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, 1 << func);
1786}
1787
1788static int dmar_walk_dsm_resource(acpi_handle handle, int func,
1789				  dmar_res_handler_t handler, void *arg)
1790{
1791	int ret = -ENODEV;
1792	union acpi_object *obj;
1793	struct acpi_dmar_header *start;
1794	struct dmar_res_callback callback;
1795	static int res_type[] = {
1796		[DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
1797		[DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
1798		[DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
1799	};
1800
1801	if (!dmar_detect_dsm(handle, func))
1802		return 0;
1803
1804	obj = acpi_evaluate_dsm_typed(handle, dmar_hp_uuid, DMAR_DSM_REV_ID,
1805				      func, NULL, ACPI_TYPE_BUFFER);
1806	if (!obj)
1807		return -ENODEV;
1808
1809	memset(&callback, 0, sizeof(callback));
1810	callback.cb[res_type[func]] = handler;
1811	callback.arg[res_type[func]] = arg;
1812	start = (struct acpi_dmar_header *)obj->buffer.pointer;
1813	ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
1814
1815	ACPI_FREE(obj);
1816
1817	return ret;
1818}
1819
1820static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
1821{
1822	int ret;
1823	struct dmar_drhd_unit *dmaru;
1824
1825	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
1826	if (!dmaru)
1827		return -ENODEV;
1828
1829	ret = dmar_ir_hotplug(dmaru, true);
1830	if (ret == 0)
1831		ret = dmar_iommu_hotplug(dmaru, true);
1832
1833	return ret;
1834}
1835
1836static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
1837{
1838	int i, ret;
1839	struct device *dev;
1840	struct dmar_drhd_unit *dmaru;
1841
1842	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
1843	if (!dmaru)
1844		return 0;
1845
1846	/*
1847	 * All PCI devices managed by this unit should have been destroyed.
1848	 */
1849	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt)
1850		for_each_active_dev_scope(dmaru->devices,
1851					  dmaru->devices_cnt, i, dev)
1852			return -EBUSY;
1853
1854	ret = dmar_ir_hotplug(dmaru, false);
1855	if (ret == 0)
1856		ret = dmar_iommu_hotplug(dmaru, false);
1857
1858	return ret;
1859}
1860
1861static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
1862{
1863	struct dmar_drhd_unit *dmaru;
1864
1865	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
1866	if (dmaru) {
1867		list_del_rcu(&dmaru->list);
1868		synchronize_rcu();
1869		dmar_free_drhd(dmaru);
1870	}
1871
1872	return 0;
1873}
1874
1875static int dmar_hotplug_insert(acpi_handle handle)
1876{
1877	int ret;
1878	int drhd_count = 0;
1879
1880	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
1881				     &dmar_validate_one_drhd, (void *)1);
1882	if (ret)
1883		goto out;
1884
1885	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
1886				     &dmar_parse_one_drhd, (void *)&drhd_count);
1887	if (ret == 0 && drhd_count == 0) {
1888		pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
1889		goto out;
1890	} else if (ret) {
1891		goto release_drhd;
1892	}
1893
1894	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
1895				     &dmar_parse_one_rhsa, NULL);
1896	if (ret)
1897		goto release_drhd;
1898
1899	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
1900				     &dmar_parse_one_atsr, NULL);
1901	if (ret)
1902		goto release_atsr;
1903
1904	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
1905				     &dmar_hp_add_drhd, NULL);
1906	if (!ret)
1907		return 0;
1908
1909	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
1910			       &dmar_hp_remove_drhd, NULL);
1911release_atsr:
1912	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
1913			       &dmar_release_one_atsr, NULL);
1914release_drhd:
1915	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
1916			       &dmar_hp_release_drhd, NULL);
1917out:
1918	return ret;
1919}
1920
1921static int dmar_hotplug_remove(acpi_handle handle)
1922{
1923	int ret;
1924
1925	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
1926				     &dmar_check_one_atsr, NULL);
1927	if (ret)
1928		return ret;
1929
1930	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
1931				     &dmar_hp_remove_drhd, NULL);
1932	if (ret == 0) {
1933		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
1934					       &dmar_release_one_atsr, NULL));
1935		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
1936					       &dmar_hp_release_drhd, NULL));
1937	} else {
1938		dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
1939				       &dmar_hp_add_drhd, NULL);
1940	}
1941
1942	return ret;
1943}
1944
1945static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
1946				       void *context, void **retval)
1947{
1948	acpi_handle *phdl = retval;
1949
1950	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
1951		*phdl = handle;
1952		return AE_CTRL_TERMINATE;
1953	}
1954
1955	return AE_OK;
1956}
1957
1958static int dmar_device_hotplug(acpi_handle handle, bool insert)
1959{
1960	int ret;
1961	acpi_handle tmp = NULL;
1962	acpi_status status;
1963
1964	if (!dmar_in_use())
1965		return 0;
1966
1967	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
1968		tmp = handle;
1969	} else {
1970		status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
1971					     ACPI_UINT32_MAX,
1972					     dmar_get_dsm_handle,
1973					     NULL, NULL, &tmp);
1974		if (ACPI_FAILURE(status)) {
1975			pr_warn("Failed to locate _DSM method.\n");
1976			return -ENXIO;
1977		}
1978	}
1979	if (tmp == NULL)
1980		return 0;
1981
1982	down_write(&dmar_global_lock);
1983	if (insert)
1984		ret = dmar_hotplug_insert(tmp);
1985	else
1986		ret = dmar_hotplug_remove(tmp);
1987	up_write(&dmar_global_lock);
1988
1989	return ret;
1990}
1991
1992int dmar_device_add(acpi_handle handle)
1993{
1994	return dmar_device_hotplug(handle, true);
1995}
1996
1997int dmar_device_remove(acpi_handle handle)
1998{
1999	return dmar_device_hotplug(handle, false);
2000}
2001