1 /*
2  * IOMMU API for ARM architected SMMUv3 implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright (C) 2015 ARM Limited
17  *
18  * Author: Will Deacon <will.deacon@arm.com>
19  *
20  * This driver is powered by bad coffee and bombay mix.
21  */
22 
23 #include <linux/delay.h>
24 #include <linux/err.h>
25 #include <linux/interrupt.h>
26 #include <linux/iommu.h>
27 #include <linux/iopoll.h>
28 #include <linux/module.h>
29 #include <linux/msi.h>
30 #include <linux/of.h>
31 #include <linux/of_address.h>
32 #include <linux/of_platform.h>
33 #include <linux/pci.h>
34 #include <linux/platform_device.h>
35 
36 #include "io-pgtable.h"
37 
38 /* MMIO registers */
39 #define ARM_SMMU_IDR0			0x0
40 #define IDR0_ST_LVL_SHIFT		27
41 #define IDR0_ST_LVL_MASK		0x3
42 #define IDR0_ST_LVL_2LVL		(1 << IDR0_ST_LVL_SHIFT)
43 #define IDR0_STALL_MODEL		(3 << 24)
44 #define IDR0_TTENDIAN_SHIFT		21
45 #define IDR0_TTENDIAN_MASK		0x3
46 #define IDR0_TTENDIAN_LE		(2 << IDR0_TTENDIAN_SHIFT)
47 #define IDR0_TTENDIAN_BE		(3 << IDR0_TTENDIAN_SHIFT)
48 #define IDR0_TTENDIAN_MIXED		(0 << IDR0_TTENDIAN_SHIFT)
49 #define IDR0_CD2L			(1 << 19)
50 #define IDR0_VMID16			(1 << 18)
51 #define IDR0_PRI			(1 << 16)
52 #define IDR0_SEV			(1 << 14)
53 #define IDR0_MSI			(1 << 13)
54 #define IDR0_ASID16			(1 << 12)
55 #define IDR0_ATS			(1 << 10)
56 #define IDR0_HYP			(1 << 9)
57 #define IDR0_COHACC			(1 << 4)
58 #define IDR0_TTF_SHIFT			2
59 #define IDR0_TTF_MASK			0x3
60 #define IDR0_TTF_AARCH64		(2 << IDR0_TTF_SHIFT)
61 #define IDR0_TTF_AARCH32_64		(3 << IDR0_TTF_SHIFT)
62 #define IDR0_S1P			(1 << 1)
63 #define IDR0_S2P			(1 << 0)
64 
65 #define ARM_SMMU_IDR1			0x4
66 #define IDR1_TABLES_PRESET		(1 << 30)
67 #define IDR1_QUEUES_PRESET		(1 << 29)
68 #define IDR1_REL			(1 << 28)
69 #define IDR1_CMDQ_SHIFT			21
70 #define IDR1_CMDQ_MASK			0x1f
71 #define IDR1_EVTQ_SHIFT			16
72 #define IDR1_EVTQ_MASK			0x1f
73 #define IDR1_PRIQ_SHIFT			11
74 #define IDR1_PRIQ_MASK			0x1f
75 #define IDR1_SSID_SHIFT			6
76 #define IDR1_SSID_MASK			0x1f
77 #define IDR1_SID_SHIFT			0
78 #define IDR1_SID_MASK			0x3f
79 
80 #define ARM_SMMU_IDR5			0x14
81 #define IDR5_STALL_MAX_SHIFT		16
82 #define IDR5_STALL_MAX_MASK		0xffff
83 #define IDR5_GRAN64K			(1 << 6)
84 #define IDR5_GRAN16K			(1 << 5)
85 #define IDR5_GRAN4K			(1 << 4)
86 #define IDR5_OAS_SHIFT			0
87 #define IDR5_OAS_MASK			0x7
88 #define IDR5_OAS_32_BIT			(0 << IDR5_OAS_SHIFT)
89 #define IDR5_OAS_36_BIT			(1 << IDR5_OAS_SHIFT)
90 #define IDR5_OAS_40_BIT			(2 << IDR5_OAS_SHIFT)
91 #define IDR5_OAS_42_BIT			(3 << IDR5_OAS_SHIFT)
92 #define IDR5_OAS_44_BIT			(4 << IDR5_OAS_SHIFT)
93 #define IDR5_OAS_48_BIT			(5 << IDR5_OAS_SHIFT)
94 
95 #define ARM_SMMU_CR0			0x20
96 #define CR0_CMDQEN			(1 << 3)
97 #define CR0_EVTQEN			(1 << 2)
98 #define CR0_PRIQEN			(1 << 1)
99 #define CR0_SMMUEN			(1 << 0)
100 
101 #define ARM_SMMU_CR0ACK			0x24
102 
103 #define ARM_SMMU_CR1			0x28
104 #define CR1_SH_NSH			0
105 #define CR1_SH_OSH			2
106 #define CR1_SH_ISH			3
107 #define CR1_CACHE_NC			0
108 #define CR1_CACHE_WB			1
109 #define CR1_CACHE_WT			2
110 #define CR1_TABLE_SH_SHIFT		10
111 #define CR1_TABLE_OC_SHIFT		8
112 #define CR1_TABLE_IC_SHIFT		6
113 #define CR1_QUEUE_SH_SHIFT		4
114 #define CR1_QUEUE_OC_SHIFT		2
115 #define CR1_QUEUE_IC_SHIFT		0
116 
117 #define ARM_SMMU_CR2			0x2c
118 #define CR2_PTM				(1 << 2)
119 #define CR2_RECINVSID			(1 << 1)
120 #define CR2_E2H				(1 << 0)
121 
122 #define ARM_SMMU_IRQ_CTRL		0x50
123 #define IRQ_CTRL_EVTQ_IRQEN		(1 << 2)
124 #define IRQ_CTRL_PRIQ_IRQEN		(1 << 1)
125 #define IRQ_CTRL_GERROR_IRQEN		(1 << 0)
126 
127 #define ARM_SMMU_IRQ_CTRLACK		0x54
128 
129 #define ARM_SMMU_GERROR			0x60
130 #define GERROR_SFM_ERR			(1 << 8)
131 #define GERROR_MSI_GERROR_ABT_ERR	(1 << 7)
132 #define GERROR_MSI_PRIQ_ABT_ERR		(1 << 6)
133 #define GERROR_MSI_EVTQ_ABT_ERR		(1 << 5)
134 #define GERROR_MSI_CMDQ_ABT_ERR		(1 << 4)
135 #define GERROR_PRIQ_ABT_ERR		(1 << 3)
136 #define GERROR_EVTQ_ABT_ERR		(1 << 2)
137 #define GERROR_CMDQ_ERR			(1 << 0)
138 #define GERROR_ERR_MASK			0xfd
139 
140 #define ARM_SMMU_GERRORN		0x64
141 
142 #define ARM_SMMU_GERROR_IRQ_CFG0	0x68
143 #define ARM_SMMU_GERROR_IRQ_CFG1	0x70
144 #define ARM_SMMU_GERROR_IRQ_CFG2	0x74
145 
146 #define ARM_SMMU_STRTAB_BASE		0x80
147 #define STRTAB_BASE_RA			(1UL << 62)
148 #define STRTAB_BASE_ADDR_SHIFT		6
149 #define STRTAB_BASE_ADDR_MASK		0x3ffffffffffUL
150 
151 #define ARM_SMMU_STRTAB_BASE_CFG	0x88
152 #define STRTAB_BASE_CFG_LOG2SIZE_SHIFT	0
153 #define STRTAB_BASE_CFG_LOG2SIZE_MASK	0x3f
154 #define STRTAB_BASE_CFG_SPLIT_SHIFT	6
155 #define STRTAB_BASE_CFG_SPLIT_MASK	0x1f
156 #define STRTAB_BASE_CFG_FMT_SHIFT	16
157 #define STRTAB_BASE_CFG_FMT_MASK	0x3
158 #define STRTAB_BASE_CFG_FMT_LINEAR	(0 << STRTAB_BASE_CFG_FMT_SHIFT)
159 #define STRTAB_BASE_CFG_FMT_2LVL	(1 << STRTAB_BASE_CFG_FMT_SHIFT)
160 
161 #define ARM_SMMU_CMDQ_BASE		0x90
162 #define ARM_SMMU_CMDQ_PROD		0x98
163 #define ARM_SMMU_CMDQ_CONS		0x9c
164 
165 #define ARM_SMMU_EVTQ_BASE		0xa0
166 #define ARM_SMMU_EVTQ_PROD		0x100a8
167 #define ARM_SMMU_EVTQ_CONS		0x100ac
168 #define ARM_SMMU_EVTQ_IRQ_CFG0		0xb0
169 #define ARM_SMMU_EVTQ_IRQ_CFG1		0xb8
170 #define ARM_SMMU_EVTQ_IRQ_CFG2		0xbc
171 
172 #define ARM_SMMU_PRIQ_BASE		0xc0
173 #define ARM_SMMU_PRIQ_PROD		0x100c8
174 #define ARM_SMMU_PRIQ_CONS		0x100cc
175 #define ARM_SMMU_PRIQ_IRQ_CFG0		0xd0
176 #define ARM_SMMU_PRIQ_IRQ_CFG1		0xd8
177 #define ARM_SMMU_PRIQ_IRQ_CFG2		0xdc
178 
179 /* Common MSI config fields */
180 #define MSI_CFG0_ADDR_SHIFT		2
181 #define MSI_CFG0_ADDR_MASK		0x3fffffffffffUL
182 #define MSI_CFG2_SH_SHIFT		4
183 #define MSI_CFG2_SH_NSH			(0UL << MSI_CFG2_SH_SHIFT)
184 #define MSI_CFG2_SH_OSH			(2UL << MSI_CFG2_SH_SHIFT)
185 #define MSI_CFG2_SH_ISH			(3UL << MSI_CFG2_SH_SHIFT)
186 #define MSI_CFG2_MEMATTR_SHIFT		0
187 #define MSI_CFG2_MEMATTR_DEVICE_nGnRE	(0x1 << MSI_CFG2_MEMATTR_SHIFT)
188 
189 #define Q_IDX(q, p)			((p) & ((1 << (q)->max_n_shift) - 1))
190 #define Q_WRP(q, p)			((p) & (1 << (q)->max_n_shift))
191 #define Q_OVERFLOW_FLAG			(1 << 31)
192 #define Q_OVF(q, p)			((p) & Q_OVERFLOW_FLAG)
193 #define Q_ENT(q, p)			((q)->base +			\
194 					 Q_IDX(q, p) * (q)->ent_dwords)
195 
196 #define Q_BASE_RWA			(1UL << 62)
197 #define Q_BASE_ADDR_SHIFT		5
198 #define Q_BASE_ADDR_MASK		0xfffffffffffUL
199 #define Q_BASE_LOG2SIZE_SHIFT		0
200 #define Q_BASE_LOG2SIZE_MASK		0x1fUL
201 
202 /*
203  * Stream table.
204  *
205  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
206  * 2lvl: 128k L1 entries,
207  *       256 lazy entries per table (each table covers a PCI bus)
208  */
209 #define STRTAB_L1_SZ_SHIFT		20
210 #define STRTAB_SPLIT			8
211 
212 #define STRTAB_L1_DESC_DWORDS		1
213 #define STRTAB_L1_DESC_SPAN_SHIFT	0
214 #define STRTAB_L1_DESC_SPAN_MASK	0x1fUL
215 #define STRTAB_L1_DESC_L2PTR_SHIFT	6
216 #define STRTAB_L1_DESC_L2PTR_MASK	0x3ffffffffffUL
217 
218 #define STRTAB_STE_DWORDS		8
219 #define STRTAB_STE_0_V			(1UL << 0)
220 #define STRTAB_STE_0_CFG_SHIFT		1
221 #define STRTAB_STE_0_CFG_MASK		0x7UL
222 #define STRTAB_STE_0_CFG_ABORT		(0UL << STRTAB_STE_0_CFG_SHIFT)
223 #define STRTAB_STE_0_CFG_BYPASS		(4UL << STRTAB_STE_0_CFG_SHIFT)
224 #define STRTAB_STE_0_CFG_S1_TRANS	(5UL << STRTAB_STE_0_CFG_SHIFT)
225 #define STRTAB_STE_0_CFG_S2_TRANS	(6UL << STRTAB_STE_0_CFG_SHIFT)
226 
227 #define STRTAB_STE_0_S1FMT_SHIFT	4
228 #define STRTAB_STE_0_S1FMT_LINEAR	(0UL << STRTAB_STE_0_S1FMT_SHIFT)
229 #define STRTAB_STE_0_S1CTXPTR_SHIFT	6
230 #define STRTAB_STE_0_S1CTXPTR_MASK	0x3ffffffffffUL
231 #define STRTAB_STE_0_S1CDMAX_SHIFT	59
232 #define STRTAB_STE_0_S1CDMAX_MASK	0x1fUL
233 
234 #define STRTAB_STE_1_S1C_CACHE_NC	0UL
235 #define STRTAB_STE_1_S1C_CACHE_WBRA	1UL
236 #define STRTAB_STE_1_S1C_CACHE_WT	2UL
237 #define STRTAB_STE_1_S1C_CACHE_WB	3UL
238 #define STRTAB_STE_1_S1C_SH_NSH		0UL
239 #define STRTAB_STE_1_S1C_SH_OSH		2UL
240 #define STRTAB_STE_1_S1C_SH_ISH		3UL
241 #define STRTAB_STE_1_S1CIR_SHIFT	2
242 #define STRTAB_STE_1_S1COR_SHIFT	4
243 #define STRTAB_STE_1_S1CSH_SHIFT	6
244 
245 #define STRTAB_STE_1_S1STALLD		(1UL << 27)
246 
247 #define STRTAB_STE_1_EATS_ABT		0UL
248 #define STRTAB_STE_1_EATS_TRANS		1UL
249 #define STRTAB_STE_1_EATS_S1CHK		2UL
250 #define STRTAB_STE_1_EATS_SHIFT		28
251 
252 #define STRTAB_STE_1_STRW_NSEL1		0UL
253 #define STRTAB_STE_1_STRW_EL2		2UL
254 #define STRTAB_STE_1_STRW_SHIFT		30
255 
256 #define STRTAB_STE_2_S2VMID_SHIFT	0
257 #define STRTAB_STE_2_S2VMID_MASK	0xffffUL
258 #define STRTAB_STE_2_VTCR_SHIFT		32
259 #define STRTAB_STE_2_VTCR_MASK		0x7ffffUL
260 #define STRTAB_STE_2_S2AA64		(1UL << 51)
261 #define STRTAB_STE_2_S2ENDI		(1UL << 52)
262 #define STRTAB_STE_2_S2PTW		(1UL << 54)
263 #define STRTAB_STE_2_S2R		(1UL << 58)
264 
265 #define STRTAB_STE_3_S2TTB_SHIFT	4
266 #define STRTAB_STE_3_S2TTB_MASK		0xfffffffffffUL
267 
268 /* Context descriptor (stage-1 only) */
269 #define CTXDESC_CD_DWORDS		8
270 #define CTXDESC_CD_0_TCR_T0SZ_SHIFT	0
271 #define ARM64_TCR_T0SZ_SHIFT		0
272 #define ARM64_TCR_T0SZ_MASK		0x1fUL
273 #define CTXDESC_CD_0_TCR_TG0_SHIFT	6
274 #define ARM64_TCR_TG0_SHIFT		14
275 #define ARM64_TCR_TG0_MASK		0x3UL
276 #define CTXDESC_CD_0_TCR_IRGN0_SHIFT	8
277 #define ARM64_TCR_IRGN0_SHIFT		8
278 #define ARM64_TCR_IRGN0_MASK		0x3UL
279 #define CTXDESC_CD_0_TCR_ORGN0_SHIFT	10
280 #define ARM64_TCR_ORGN0_SHIFT		10
281 #define ARM64_TCR_ORGN0_MASK		0x3UL
282 #define CTXDESC_CD_0_TCR_SH0_SHIFT	12
283 #define ARM64_TCR_SH0_SHIFT		12
284 #define ARM64_TCR_SH0_MASK		0x3UL
285 #define CTXDESC_CD_0_TCR_EPD0_SHIFT	14
286 #define ARM64_TCR_EPD0_SHIFT		7
287 #define ARM64_TCR_EPD0_MASK		0x1UL
288 #define CTXDESC_CD_0_TCR_EPD1_SHIFT	30
289 #define ARM64_TCR_EPD1_SHIFT		23
290 #define ARM64_TCR_EPD1_MASK		0x1UL
291 
292 #define CTXDESC_CD_0_ENDI		(1UL << 15)
293 #define CTXDESC_CD_0_V			(1UL << 31)
294 
295 #define CTXDESC_CD_0_TCR_IPS_SHIFT	32
296 #define ARM64_TCR_IPS_SHIFT		32
297 #define ARM64_TCR_IPS_MASK		0x7UL
298 #define CTXDESC_CD_0_TCR_TBI0_SHIFT	38
299 #define ARM64_TCR_TBI0_SHIFT		37
300 #define ARM64_TCR_TBI0_MASK		0x1UL
301 
302 #define CTXDESC_CD_0_AA64		(1UL << 41)
303 #define CTXDESC_CD_0_R			(1UL << 45)
304 #define CTXDESC_CD_0_A			(1UL << 46)
305 #define CTXDESC_CD_0_ASET_SHIFT		47
306 #define CTXDESC_CD_0_ASET_SHARED	(0UL << CTXDESC_CD_0_ASET_SHIFT)
307 #define CTXDESC_CD_0_ASET_PRIVATE	(1UL << CTXDESC_CD_0_ASET_SHIFT)
308 #define CTXDESC_CD_0_ASID_SHIFT		48
309 #define CTXDESC_CD_0_ASID_MASK		0xffffUL
310 
311 #define CTXDESC_CD_1_TTB0_SHIFT		4
312 #define CTXDESC_CD_1_TTB0_MASK		0xfffffffffffUL
313 
314 #define CTXDESC_CD_3_MAIR_SHIFT		0
315 
316 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
317 #define ARM_SMMU_TCR2CD(tcr, fld)					\
318 	(((tcr) >> ARM64_TCR_##fld##_SHIFT & ARM64_TCR_##fld##_MASK)	\
319 	 << CTXDESC_CD_0_TCR_##fld##_SHIFT)
320 
321 /* Command queue */
322 #define CMDQ_ENT_DWORDS			2
323 #define CMDQ_MAX_SZ_SHIFT		8
324 
325 #define CMDQ_ERR_SHIFT			24
326 #define CMDQ_ERR_MASK			0x7f
327 #define CMDQ_ERR_CERROR_NONE_IDX	0
328 #define CMDQ_ERR_CERROR_ILL_IDX		1
329 #define CMDQ_ERR_CERROR_ABT_IDX		2
330 
331 #define CMDQ_0_OP_SHIFT			0
332 #define CMDQ_0_OP_MASK			0xffUL
333 #define CMDQ_0_SSV			(1UL << 11)
334 
335 #define CMDQ_PREFETCH_0_SID_SHIFT	32
336 #define CMDQ_PREFETCH_1_SIZE_SHIFT	0
337 #define CMDQ_PREFETCH_1_ADDR_MASK	~0xfffUL
338 
339 #define CMDQ_CFGI_0_SID_SHIFT		32
340 #define CMDQ_CFGI_0_SID_MASK		0xffffffffUL
341 #define CMDQ_CFGI_1_LEAF		(1UL << 0)
342 #define CMDQ_CFGI_1_RANGE_SHIFT		0
343 #define CMDQ_CFGI_1_RANGE_MASK		0x1fUL
344 
345 #define CMDQ_TLBI_0_VMID_SHIFT		32
346 #define CMDQ_TLBI_0_ASID_SHIFT		48
347 #define CMDQ_TLBI_1_LEAF		(1UL << 0)
348 #define CMDQ_TLBI_1_VA_MASK		~0xfffUL
349 #define CMDQ_TLBI_1_IPA_MASK		0xfffffffff000UL
350 
351 #define CMDQ_PRI_0_SSID_SHIFT		12
352 #define CMDQ_PRI_0_SSID_MASK		0xfffffUL
353 #define CMDQ_PRI_0_SID_SHIFT		32
354 #define CMDQ_PRI_0_SID_MASK		0xffffffffUL
355 #define CMDQ_PRI_1_GRPID_SHIFT		0
356 #define CMDQ_PRI_1_GRPID_MASK		0x1ffUL
357 #define CMDQ_PRI_1_RESP_SHIFT		12
358 #define CMDQ_PRI_1_RESP_DENY		(0UL << CMDQ_PRI_1_RESP_SHIFT)
359 #define CMDQ_PRI_1_RESP_FAIL		(1UL << CMDQ_PRI_1_RESP_SHIFT)
360 #define CMDQ_PRI_1_RESP_SUCC		(2UL << CMDQ_PRI_1_RESP_SHIFT)
361 
362 #define CMDQ_SYNC_0_CS_SHIFT		12
363 #define CMDQ_SYNC_0_CS_NONE		(0UL << CMDQ_SYNC_0_CS_SHIFT)
364 #define CMDQ_SYNC_0_CS_SEV		(2UL << CMDQ_SYNC_0_CS_SHIFT)
365 
366 /* Event queue */
367 #define EVTQ_ENT_DWORDS			4
368 #define EVTQ_MAX_SZ_SHIFT		7
369 
370 #define EVTQ_0_ID_SHIFT			0
371 #define EVTQ_0_ID_MASK			0xffUL
372 
373 /* PRI queue */
374 #define PRIQ_ENT_DWORDS			2
375 #define PRIQ_MAX_SZ_SHIFT		8
376 
377 #define PRIQ_0_SID_SHIFT		0
378 #define PRIQ_0_SID_MASK			0xffffffffUL
379 #define PRIQ_0_SSID_SHIFT		32
380 #define PRIQ_0_SSID_MASK		0xfffffUL
381 #define PRIQ_0_OF			(1UL << 57)
382 #define PRIQ_0_PERM_PRIV		(1UL << 58)
383 #define PRIQ_0_PERM_EXEC		(1UL << 59)
384 #define PRIQ_0_PERM_READ		(1UL << 60)
385 #define PRIQ_0_PERM_WRITE		(1UL << 61)
386 #define PRIQ_0_PRG_LAST			(1UL << 62)
387 #define PRIQ_0_SSID_V			(1UL << 63)
388 
389 #define PRIQ_1_PRG_IDX_SHIFT		0
390 #define PRIQ_1_PRG_IDX_MASK		0x1ffUL
391 #define PRIQ_1_ADDR_SHIFT		12
392 #define PRIQ_1_ADDR_MASK		0xfffffffffffffUL
393 
394 /* High-level queue structures */
395 #define ARM_SMMU_POLL_TIMEOUT_US	100
396 
397 static bool disable_bypass;
398 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
399 MODULE_PARM_DESC(disable_bypass,
400 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
401 
402 enum pri_resp {
403 	PRI_RESP_DENY,
404 	PRI_RESP_FAIL,
405 	PRI_RESP_SUCC,
406 };
407 
408 enum arm_smmu_msi_index {
409 	EVTQ_MSI_INDEX,
410 	GERROR_MSI_INDEX,
411 	PRIQ_MSI_INDEX,
412 	ARM_SMMU_MAX_MSIS,
413 };
414 
415 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
416 	[EVTQ_MSI_INDEX] = {
417 		ARM_SMMU_EVTQ_IRQ_CFG0,
418 		ARM_SMMU_EVTQ_IRQ_CFG1,
419 		ARM_SMMU_EVTQ_IRQ_CFG2,
420 	},
421 	[GERROR_MSI_INDEX] = {
422 		ARM_SMMU_GERROR_IRQ_CFG0,
423 		ARM_SMMU_GERROR_IRQ_CFG1,
424 		ARM_SMMU_GERROR_IRQ_CFG2,
425 	},
426 	[PRIQ_MSI_INDEX] = {
427 		ARM_SMMU_PRIQ_IRQ_CFG0,
428 		ARM_SMMU_PRIQ_IRQ_CFG1,
429 		ARM_SMMU_PRIQ_IRQ_CFG2,
430 	},
431 };
432 
433 struct arm_smmu_cmdq_ent {
434 	/* Common fields */
435 	u8				opcode;
436 	bool				substream_valid;
437 
438 	/* Command-specific fields */
439 	union {
440 		#define CMDQ_OP_PREFETCH_CFG	0x1
441 		struct {
442 			u32			sid;
443 			u8			size;
444 			u64			addr;
445 		} prefetch;
446 
447 		#define CMDQ_OP_CFGI_STE	0x3
448 		#define CMDQ_OP_CFGI_ALL	0x4
449 		struct {
450 			u32			sid;
451 			union {
452 				bool		leaf;
453 				u8		span;
454 			};
455 		} cfgi;
456 
457 		#define CMDQ_OP_TLBI_NH_ASID	0x11
458 		#define CMDQ_OP_TLBI_NH_VA	0x12
459 		#define CMDQ_OP_TLBI_EL2_ALL	0x20
460 		#define CMDQ_OP_TLBI_S12_VMALL	0x28
461 		#define CMDQ_OP_TLBI_S2_IPA	0x2a
462 		#define CMDQ_OP_TLBI_NSNH_ALL	0x30
463 		struct {
464 			u16			asid;
465 			u16			vmid;
466 			bool			leaf;
467 			u64			addr;
468 		} tlbi;
469 
470 		#define CMDQ_OP_PRI_RESP	0x41
471 		struct {
472 			u32			sid;
473 			u32			ssid;
474 			u16			grpid;
475 			enum pri_resp		resp;
476 		} pri;
477 
478 		#define CMDQ_OP_CMD_SYNC	0x46
479 	};
480 };
481 
482 struct arm_smmu_queue {
483 	int				irq; /* Wired interrupt */
484 
485 	__le64				*base;
486 	dma_addr_t			base_dma;
487 	u64				q_base;
488 
489 	size_t				ent_dwords;
490 	u32				max_n_shift;
491 	u32				prod;
492 	u32				cons;
493 
494 	u32 __iomem			*prod_reg;
495 	u32 __iomem			*cons_reg;
496 };
497 
498 struct arm_smmu_cmdq {
499 	struct arm_smmu_queue		q;
500 	spinlock_t			lock;
501 };
502 
503 struct arm_smmu_evtq {
504 	struct arm_smmu_queue		q;
505 	u32				max_stalls;
506 };
507 
508 struct arm_smmu_priq {
509 	struct arm_smmu_queue		q;
510 };
511 
512 /* High-level stream table and context descriptor structures */
513 struct arm_smmu_strtab_l1_desc {
514 	u8				span;
515 
516 	__le64				*l2ptr;
517 	dma_addr_t			l2ptr_dma;
518 };
519 
520 struct arm_smmu_s1_cfg {
521 	__le64				*cdptr;
522 	dma_addr_t			cdptr_dma;
523 
524 	struct arm_smmu_ctx_desc {
525 		u16	asid;
526 		u64	ttbr;
527 		u64	tcr;
528 		u64	mair;
529 	}				cd;
530 };
531 
532 struct arm_smmu_s2_cfg {
533 	u16				vmid;
534 	u64				vttbr;
535 	u64				vtcr;
536 };
537 
538 struct arm_smmu_strtab_ent {
539 	bool				valid;
540 
541 	bool				bypass;	/* Overrides s1/s2 config */
542 	struct arm_smmu_s1_cfg		*s1_cfg;
543 	struct arm_smmu_s2_cfg		*s2_cfg;
544 };
545 
546 struct arm_smmu_strtab_cfg {
547 	__le64				*strtab;
548 	dma_addr_t			strtab_dma;
549 	struct arm_smmu_strtab_l1_desc	*l1_desc;
550 	unsigned int			num_l1_ents;
551 
552 	u64				strtab_base;
553 	u32				strtab_base_cfg;
554 };
555 
556 /* An SMMUv3 instance */
557 struct arm_smmu_device {
558 	struct device			*dev;
559 	void __iomem			*base;
560 
561 #define ARM_SMMU_FEAT_2_LVL_STRTAB	(1 << 0)
562 #define ARM_SMMU_FEAT_2_LVL_CDTAB	(1 << 1)
563 #define ARM_SMMU_FEAT_TT_LE		(1 << 2)
564 #define ARM_SMMU_FEAT_TT_BE		(1 << 3)
565 #define ARM_SMMU_FEAT_PRI		(1 << 4)
566 #define ARM_SMMU_FEAT_ATS		(1 << 5)
567 #define ARM_SMMU_FEAT_SEV		(1 << 6)
568 #define ARM_SMMU_FEAT_MSI		(1 << 7)
569 #define ARM_SMMU_FEAT_COHERENCY		(1 << 8)
570 #define ARM_SMMU_FEAT_TRANS_S1		(1 << 9)
571 #define ARM_SMMU_FEAT_TRANS_S2		(1 << 10)
572 #define ARM_SMMU_FEAT_STALLS		(1 << 11)
573 #define ARM_SMMU_FEAT_HYP		(1 << 12)
574 	u32				features;
575 
576 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
577 	u32				options;
578 
579 	struct arm_smmu_cmdq		cmdq;
580 	struct arm_smmu_evtq		evtq;
581 	struct arm_smmu_priq		priq;
582 
583 	int				gerr_irq;
584 
585 	unsigned long			ias; /* IPA */
586 	unsigned long			oas; /* PA */
587 
588 #define ARM_SMMU_MAX_ASIDS		(1 << 16)
589 	unsigned int			asid_bits;
590 	DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
591 
592 #define ARM_SMMU_MAX_VMIDS		(1 << 16)
593 	unsigned int			vmid_bits;
594 	DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
595 
596 	unsigned int			ssid_bits;
597 	unsigned int			sid_bits;
598 
599 	struct arm_smmu_strtab_cfg	strtab_cfg;
600 };
601 
602 /* SMMU private data for an IOMMU group */
603 struct arm_smmu_group {
604 	struct arm_smmu_device		*smmu;
605 	struct arm_smmu_domain		*domain;
606 	int				num_sids;
607 	u32				*sids;
608 	struct arm_smmu_strtab_ent	ste;
609 };
610 
611 /* SMMU private data for an IOMMU domain */
612 enum arm_smmu_domain_stage {
613 	ARM_SMMU_DOMAIN_S1 = 0,
614 	ARM_SMMU_DOMAIN_S2,
615 	ARM_SMMU_DOMAIN_NESTED,
616 };
617 
618 struct arm_smmu_domain {
619 	struct arm_smmu_device		*smmu;
620 	struct mutex			init_mutex; /* Protects smmu pointer */
621 
622 	struct io_pgtable_ops		*pgtbl_ops;
623 	spinlock_t			pgtbl_lock;
624 
625 	enum arm_smmu_domain_stage	stage;
626 	union {
627 		struct arm_smmu_s1_cfg	s1_cfg;
628 		struct arm_smmu_s2_cfg	s2_cfg;
629 	};
630 
631 	struct iommu_domain		domain;
632 };
633 
634 struct arm_smmu_option_prop {
635 	u32 opt;
636 	const char *prop;
637 };
638 
639 static struct arm_smmu_option_prop arm_smmu_options[] = {
640 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
641 	{ 0, NULL},
642 };
643 
to_smmu_domain(struct iommu_domain * dom)644 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
645 {
646 	return container_of(dom, struct arm_smmu_domain, domain);
647 }
648 
parse_driver_options(struct arm_smmu_device * smmu)649 static void parse_driver_options(struct arm_smmu_device *smmu)
650 {
651 	int i = 0;
652 
653 	do {
654 		if (of_property_read_bool(smmu->dev->of_node,
655 						arm_smmu_options[i].prop)) {
656 			smmu->options |= arm_smmu_options[i].opt;
657 			dev_notice(smmu->dev, "option %s\n",
658 				arm_smmu_options[i].prop);
659 		}
660 	} while (arm_smmu_options[++i].opt);
661 }
662 
663 /* Low-level queue manipulation functions */
queue_full(struct arm_smmu_queue * q)664 static bool queue_full(struct arm_smmu_queue *q)
665 {
666 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
667 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
668 }
669 
queue_empty(struct arm_smmu_queue * q)670 static bool queue_empty(struct arm_smmu_queue *q)
671 {
672 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
673 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
674 }
675 
queue_sync_cons(struct arm_smmu_queue * q)676 static void queue_sync_cons(struct arm_smmu_queue *q)
677 {
678 	q->cons = readl_relaxed(q->cons_reg);
679 }
680 
queue_inc_cons(struct arm_smmu_queue * q)681 static void queue_inc_cons(struct arm_smmu_queue *q)
682 {
683 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
684 
685 	q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
686 	writel(q->cons, q->cons_reg);
687 }
688 
queue_sync_prod(struct arm_smmu_queue * q)689 static int queue_sync_prod(struct arm_smmu_queue *q)
690 {
691 	int ret = 0;
692 	u32 prod = readl_relaxed(q->prod_reg);
693 
694 	if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
695 		ret = -EOVERFLOW;
696 
697 	q->prod = prod;
698 	return ret;
699 }
700 
queue_inc_prod(struct arm_smmu_queue * q)701 static void queue_inc_prod(struct arm_smmu_queue *q)
702 {
703 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
704 
705 	q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
706 	writel(q->prod, q->prod_reg);
707 }
708 
__queue_cons_before(struct arm_smmu_queue * q,u32 until)709 static bool __queue_cons_before(struct arm_smmu_queue *q, u32 until)
710 {
711 	if (Q_WRP(q, q->cons) == Q_WRP(q, until))
712 		return Q_IDX(q, q->cons) < Q_IDX(q, until);
713 
714 	return Q_IDX(q, q->cons) >= Q_IDX(q, until);
715 }
716 
queue_poll_cons(struct arm_smmu_queue * q,u32 until,bool wfe)717 static int queue_poll_cons(struct arm_smmu_queue *q, u32 until, bool wfe)
718 {
719 	ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
720 
721 	while (queue_sync_cons(q), __queue_cons_before(q, until)) {
722 		if (ktime_compare(ktime_get(), timeout) > 0)
723 			return -ETIMEDOUT;
724 
725 		if (wfe) {
726 			wfe();
727 		} else {
728 			cpu_relax();
729 			udelay(1);
730 		}
731 	}
732 
733 	return 0;
734 }
735 
queue_write(__le64 * dst,u64 * src,size_t n_dwords)736 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
737 {
738 	int i;
739 
740 	for (i = 0; i < n_dwords; ++i)
741 		*dst++ = cpu_to_le64(*src++);
742 }
743 
queue_insert_raw(struct arm_smmu_queue * q,u64 * ent)744 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
745 {
746 	if (queue_full(q))
747 		return -ENOSPC;
748 
749 	queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
750 	queue_inc_prod(q);
751 	return 0;
752 }
753 
queue_read(__le64 * dst,u64 * src,size_t n_dwords)754 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
755 {
756 	int i;
757 
758 	for (i = 0; i < n_dwords; ++i)
759 		*dst++ = le64_to_cpu(*src++);
760 }
761 
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)762 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
763 {
764 	if (queue_empty(q))
765 		return -EAGAIN;
766 
767 	queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
768 	queue_inc_cons(q);
769 	return 0;
770 }
771 
772 /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)773 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
774 {
775 	memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
776 	cmd[0] |= (ent->opcode & CMDQ_0_OP_MASK) << CMDQ_0_OP_SHIFT;
777 
778 	switch (ent->opcode) {
779 	case CMDQ_OP_TLBI_EL2_ALL:
780 	case CMDQ_OP_TLBI_NSNH_ALL:
781 		break;
782 	case CMDQ_OP_PREFETCH_CFG:
783 		cmd[0] |= (u64)ent->prefetch.sid << CMDQ_PREFETCH_0_SID_SHIFT;
784 		cmd[1] |= ent->prefetch.size << CMDQ_PREFETCH_1_SIZE_SHIFT;
785 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
786 		break;
787 	case CMDQ_OP_CFGI_STE:
788 		cmd[0] |= (u64)ent->cfgi.sid << CMDQ_CFGI_0_SID_SHIFT;
789 		cmd[1] |= ent->cfgi.leaf ? CMDQ_CFGI_1_LEAF : 0;
790 		break;
791 	case CMDQ_OP_CFGI_ALL:
792 		/* Cover the entire SID range */
793 		cmd[1] |= CMDQ_CFGI_1_RANGE_MASK << CMDQ_CFGI_1_RANGE_SHIFT;
794 		break;
795 	case CMDQ_OP_TLBI_NH_VA:
796 		cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
797 		cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
798 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
799 		break;
800 	case CMDQ_OP_TLBI_S2_IPA:
801 		cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
802 		cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
803 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
804 		break;
805 	case CMDQ_OP_TLBI_NH_ASID:
806 		cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
807 		/* Fallthrough */
808 	case CMDQ_OP_TLBI_S12_VMALL:
809 		cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
810 		break;
811 	case CMDQ_OP_PRI_RESP:
812 		cmd[0] |= ent->substream_valid ? CMDQ_0_SSV : 0;
813 		cmd[0] |= ent->pri.ssid << CMDQ_PRI_0_SSID_SHIFT;
814 		cmd[0] |= (u64)ent->pri.sid << CMDQ_PRI_0_SID_SHIFT;
815 		cmd[1] |= ent->pri.grpid << CMDQ_PRI_1_GRPID_SHIFT;
816 		switch (ent->pri.resp) {
817 		case PRI_RESP_DENY:
818 			cmd[1] |= CMDQ_PRI_1_RESP_DENY;
819 			break;
820 		case PRI_RESP_FAIL:
821 			cmd[1] |= CMDQ_PRI_1_RESP_FAIL;
822 			break;
823 		case PRI_RESP_SUCC:
824 			cmd[1] |= CMDQ_PRI_1_RESP_SUCC;
825 			break;
826 		default:
827 			return -EINVAL;
828 		}
829 		break;
830 	case CMDQ_OP_CMD_SYNC:
831 		cmd[0] |= CMDQ_SYNC_0_CS_SEV;
832 		break;
833 	default:
834 		return -ENOENT;
835 	}
836 
837 	return 0;
838 }
839 
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)840 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
841 {
842 	static const char *cerror_str[] = {
843 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
844 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
845 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
846 	};
847 
848 	int i;
849 	u64 cmd[CMDQ_ENT_DWORDS];
850 	struct arm_smmu_queue *q = &smmu->cmdq.q;
851 	u32 cons = readl_relaxed(q->cons_reg);
852 	u32 idx = cons >> CMDQ_ERR_SHIFT & CMDQ_ERR_MASK;
853 	struct arm_smmu_cmdq_ent cmd_sync = {
854 		.opcode = CMDQ_OP_CMD_SYNC,
855 	};
856 
857 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
858 		cerror_str[idx]);
859 
860 	switch (idx) {
861 	case CMDQ_ERR_CERROR_ILL_IDX:
862 		break;
863 	case CMDQ_ERR_CERROR_ABT_IDX:
864 		dev_err(smmu->dev, "retrying command fetch\n");
865 	case CMDQ_ERR_CERROR_NONE_IDX:
866 		return;
867 	}
868 
869 	/*
870 	 * We may have concurrent producers, so we need to be careful
871 	 * not to touch any of the shadow cmdq state.
872 	 */
873 	queue_read(cmd, Q_ENT(q, idx), q->ent_dwords);
874 	dev_err(smmu->dev, "skipping command in error state:\n");
875 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
876 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
877 
878 	/* Convert the erroneous command into a CMD_SYNC */
879 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
880 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
881 		return;
882 	}
883 
884 	queue_write(cmd, Q_ENT(q, idx), q->ent_dwords);
885 }
886 
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)887 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
888 				    struct arm_smmu_cmdq_ent *ent)
889 {
890 	u32 until;
891 	u64 cmd[CMDQ_ENT_DWORDS];
892 	bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
893 	struct arm_smmu_queue *q = &smmu->cmdq.q;
894 
895 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
896 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
897 			 ent->opcode);
898 		return;
899 	}
900 
901 	spin_lock(&smmu->cmdq.lock);
902 	while (until = q->prod + 1, queue_insert_raw(q, cmd) == -ENOSPC) {
903 		/*
904 		 * Keep the queue locked, otherwise the producer could wrap
905 		 * twice and we could see a future consumer pointer that looks
906 		 * like it's behind us.
907 		 */
908 		if (queue_poll_cons(q, until, wfe))
909 			dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
910 	}
911 
912 	if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, until, wfe))
913 		dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
914 	spin_unlock(&smmu->cmdq.lock);
915 }
916 
917 /* Context descriptor manipulation functions */
arm_smmu_cpu_tcr_to_cd(u64 tcr)918 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
919 {
920 	u64 val = 0;
921 
922 	/* Repack the TCR. Just care about TTBR0 for now */
923 	val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
924 	val |= ARM_SMMU_TCR2CD(tcr, TG0);
925 	val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
926 	val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
927 	val |= ARM_SMMU_TCR2CD(tcr, SH0);
928 	val |= ARM_SMMU_TCR2CD(tcr, EPD0);
929 	val |= ARM_SMMU_TCR2CD(tcr, EPD1);
930 	val |= ARM_SMMU_TCR2CD(tcr, IPS);
931 	val |= ARM_SMMU_TCR2CD(tcr, TBI0);
932 
933 	return val;
934 }
935 
arm_smmu_write_ctx_desc(struct arm_smmu_device * smmu,struct arm_smmu_s1_cfg * cfg)936 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
937 				    struct arm_smmu_s1_cfg *cfg)
938 {
939 	u64 val;
940 
941 	/*
942 	 * We don't need to issue any invalidation here, as we'll invalidate
943 	 * the STE when installing the new entry anyway.
944 	 */
945 	val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
946 #ifdef __BIG_ENDIAN
947 	      CTXDESC_CD_0_ENDI |
948 #endif
949 	      CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
950 	      CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
951 	      CTXDESC_CD_0_V;
952 	cfg->cdptr[0] = cpu_to_le64(val);
953 
954 	val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
955 	cfg->cdptr[1] = cpu_to_le64(val);
956 
957 	cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
958 }
959 
960 /* Stream table manipulation functions */
961 static void
arm_smmu_write_strtab_l1_desc(__le64 * dst,struct arm_smmu_strtab_l1_desc * desc)962 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
963 {
964 	u64 val = 0;
965 
966 	val |= (desc->span & STRTAB_L1_DESC_SPAN_MASK)
967 		<< STRTAB_L1_DESC_SPAN_SHIFT;
968 	val |= desc->l2ptr_dma &
969 	       STRTAB_L1_DESC_L2PTR_MASK << STRTAB_L1_DESC_L2PTR_SHIFT;
970 
971 	*dst = cpu_to_le64(val);
972 }
973 
arm_smmu_sync_ste_for_sid(struct arm_smmu_device * smmu,u32 sid)974 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
975 {
976 	struct arm_smmu_cmdq_ent cmd = {
977 		.opcode	= CMDQ_OP_CFGI_STE,
978 		.cfgi	= {
979 			.sid	= sid,
980 			.leaf	= true,
981 		},
982 	};
983 
984 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
985 	cmd.opcode = CMDQ_OP_CMD_SYNC;
986 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
987 }
988 
arm_smmu_write_strtab_ent(struct arm_smmu_device * smmu,u32 sid,__le64 * dst,struct arm_smmu_strtab_ent * ste)989 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
990 				      __le64 *dst, struct arm_smmu_strtab_ent *ste)
991 {
992 	/*
993 	 * This is hideously complicated, but we only really care about
994 	 * three cases at the moment:
995 	 *
996 	 * 1. Invalid (all zero) -> bypass  (init)
997 	 * 2. Bypass -> translation (attach)
998 	 * 3. Translation -> bypass (detach)
999 	 *
1000 	 * Given that we can't update the STE atomically and the SMMU
1001 	 * doesn't read the thing in a defined order, that leaves us
1002 	 * with the following maintenance requirements:
1003 	 *
1004 	 * 1. Update Config, return (init time STEs aren't live)
1005 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1006 	 * 3. Update Config, sync
1007 	 */
1008 	u64 val = le64_to_cpu(dst[0]);
1009 	bool ste_live = false;
1010 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1011 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1012 		.prefetch	= {
1013 			.sid	= sid,
1014 		},
1015 	};
1016 
1017 	if (val & STRTAB_STE_0_V) {
1018 		u64 cfg;
1019 
1020 		cfg = val & STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT;
1021 		switch (cfg) {
1022 		case STRTAB_STE_0_CFG_BYPASS:
1023 			break;
1024 		case STRTAB_STE_0_CFG_S1_TRANS:
1025 		case STRTAB_STE_0_CFG_S2_TRANS:
1026 			ste_live = true;
1027 			break;
1028 		default:
1029 			BUG(); /* STE corruption */
1030 		}
1031 	}
1032 
1033 	/* Nuke the existing Config, as we're going to rewrite it */
1034 	val &= ~(STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT);
1035 
1036 	if (ste->valid)
1037 		val |= STRTAB_STE_0_V;
1038 	else
1039 		val &= ~STRTAB_STE_0_V;
1040 
1041 	if (ste->bypass) {
1042 		val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT
1043 				      : STRTAB_STE_0_CFG_BYPASS;
1044 		dst[0] = cpu_to_le64(val);
1045 		dst[2] = 0; /* Nuke the VMID */
1046 		if (ste_live)
1047 			arm_smmu_sync_ste_for_sid(smmu, sid);
1048 		return;
1049 	}
1050 
1051 	if (ste->s1_cfg) {
1052 		BUG_ON(ste_live);
1053 		dst[1] = cpu_to_le64(
1054 			 STRTAB_STE_1_S1C_CACHE_WBRA
1055 			 << STRTAB_STE_1_S1CIR_SHIFT |
1056 			 STRTAB_STE_1_S1C_CACHE_WBRA
1057 			 << STRTAB_STE_1_S1COR_SHIFT |
1058 			 STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
1059 			 STRTAB_STE_1_S1STALLD |
1060 #ifdef CONFIG_PCI_ATS
1061 			 STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
1062 #endif
1063 			 STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
1064 
1065 		val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
1066 		        << STRTAB_STE_0_S1CTXPTR_SHIFT) |
1067 			STRTAB_STE_0_CFG_S1_TRANS;
1068 
1069 	}
1070 
1071 	if (ste->s2_cfg) {
1072 		BUG_ON(ste_live);
1073 		dst[2] = cpu_to_le64(
1074 			 ste->s2_cfg->vmid << STRTAB_STE_2_S2VMID_SHIFT |
1075 			 (ste->s2_cfg->vtcr & STRTAB_STE_2_VTCR_MASK)
1076 			  << STRTAB_STE_2_VTCR_SHIFT |
1077 #ifdef __BIG_ENDIAN
1078 			 STRTAB_STE_2_S2ENDI |
1079 #endif
1080 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1081 			 STRTAB_STE_2_S2R);
1082 
1083 		dst[3] = cpu_to_le64(ste->s2_cfg->vttbr &
1084 			 STRTAB_STE_3_S2TTB_MASK << STRTAB_STE_3_S2TTB_SHIFT);
1085 
1086 		val |= STRTAB_STE_0_CFG_S2_TRANS;
1087 	}
1088 
1089 	arm_smmu_sync_ste_for_sid(smmu, sid);
1090 	dst[0] = cpu_to_le64(val);
1091 	arm_smmu_sync_ste_for_sid(smmu, sid);
1092 
1093 	/* It's likely that we'll want to use the new STE soon */
1094 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1095 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1096 }
1097 
arm_smmu_init_bypass_stes(u64 * strtab,unsigned int nent)1098 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1099 {
1100 	unsigned int i;
1101 	struct arm_smmu_strtab_ent ste = {
1102 		.valid	= true,
1103 		.bypass	= true,
1104 	};
1105 
1106 	for (i = 0; i < nent; ++i) {
1107 		arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1108 		strtab += STRTAB_STE_DWORDS;
1109 	}
1110 }
1111 
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1112 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1113 {
1114 	size_t size;
1115 	void *strtab;
1116 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1117 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1118 
1119 	if (desc->l2ptr)
1120 		return 0;
1121 
1122 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1123 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1124 
1125 	desc->span = STRTAB_SPLIT + 1;
1126 	desc->l2ptr = dma_zalloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1127 					  GFP_KERNEL);
1128 	if (!desc->l2ptr) {
1129 		dev_err(smmu->dev,
1130 			"failed to allocate l2 stream table for SID %u\n",
1131 			sid);
1132 		return -ENOMEM;
1133 	}
1134 
1135 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1136 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1137 	return 0;
1138 }
1139 
1140 /* IRQ and event handlers */
arm_smmu_evtq_thread(int irq,void * dev)1141 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1142 {
1143 	int i;
1144 	struct arm_smmu_device *smmu = dev;
1145 	struct arm_smmu_queue *q = &smmu->evtq.q;
1146 	u64 evt[EVTQ_ENT_DWORDS];
1147 
1148 	while (!queue_remove_raw(q, evt)) {
1149 		u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
1150 
1151 		dev_info(smmu->dev, "event 0x%02x received:\n", id);
1152 		for (i = 0; i < ARRAY_SIZE(evt); ++i)
1153 			dev_info(smmu->dev, "\t0x%016llx\n",
1154 				 (unsigned long long)evt[i]);
1155 	}
1156 
1157 	/* Sync our overflow flag, as we believe we're up to speed */
1158 	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1159 	return IRQ_HANDLED;
1160 }
1161 
arm_smmu_evtq_handler(int irq,void * dev)1162 static irqreturn_t arm_smmu_evtq_handler(int irq, void *dev)
1163 {
1164 	irqreturn_t ret = IRQ_WAKE_THREAD;
1165 	struct arm_smmu_device *smmu = dev;
1166 	struct arm_smmu_queue *q = &smmu->evtq.q;
1167 
1168 	/*
1169 	 * Not much we can do on overflow, so scream and pretend we're
1170 	 * trying harder.
1171 	 */
1172 	if (queue_sync_prod(q) == -EOVERFLOW)
1173 		dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1174 	else if (queue_empty(q))
1175 		ret = IRQ_NONE;
1176 
1177 	return ret;
1178 }
1179 
arm_smmu_priq_thread(int irq,void * dev)1180 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1181 {
1182 	struct arm_smmu_device *smmu = dev;
1183 	struct arm_smmu_queue *q = &smmu->priq.q;
1184 	u64 evt[PRIQ_ENT_DWORDS];
1185 
1186 	while (!queue_remove_raw(q, evt)) {
1187 		u32 sid, ssid;
1188 		u16 grpid;
1189 		bool ssv, last;
1190 
1191 		sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
1192 		ssv = evt[0] & PRIQ_0_SSID_V;
1193 		ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
1194 		last = evt[0] & PRIQ_0_PRG_LAST;
1195 		grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
1196 
1197 		dev_info(smmu->dev, "unexpected PRI request received:\n");
1198 		dev_info(smmu->dev,
1199 			 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1200 			 sid, ssid, grpid, last ? "L" : "",
1201 			 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1202 			 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1203 			 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1204 			 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1205 			 evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
1206 
1207 		if (last) {
1208 			struct arm_smmu_cmdq_ent cmd = {
1209 				.opcode			= CMDQ_OP_PRI_RESP,
1210 				.substream_valid	= ssv,
1211 				.pri			= {
1212 					.sid	= sid,
1213 					.ssid	= ssid,
1214 					.grpid	= grpid,
1215 					.resp	= PRI_RESP_DENY,
1216 				},
1217 			};
1218 
1219 			arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1220 		}
1221 	}
1222 
1223 	/* Sync our overflow flag, as we believe we're up to speed */
1224 	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1225 	return IRQ_HANDLED;
1226 }
1227 
arm_smmu_priq_handler(int irq,void * dev)1228 static irqreturn_t arm_smmu_priq_handler(int irq, void *dev)
1229 {
1230 	irqreturn_t ret = IRQ_WAKE_THREAD;
1231 	struct arm_smmu_device *smmu = dev;
1232 	struct arm_smmu_queue *q = &smmu->priq.q;
1233 
1234 	/* PRIQ overflow indicates a programming error */
1235 	if (queue_sync_prod(q) == -EOVERFLOW)
1236 		dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1237 	else if (queue_empty(q))
1238 		ret = IRQ_NONE;
1239 
1240 	return ret;
1241 }
1242 
arm_smmu_cmdq_sync_handler(int irq,void * dev)1243 static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
1244 {
1245 	/* We don't actually use CMD_SYNC interrupts for anything */
1246 	return IRQ_HANDLED;
1247 }
1248 
1249 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1250 
arm_smmu_gerror_handler(int irq,void * dev)1251 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1252 {
1253 	u32 gerror, gerrorn;
1254 	struct arm_smmu_device *smmu = dev;
1255 
1256 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1257 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1258 
1259 	gerror ^= gerrorn;
1260 	if (!(gerror & GERROR_ERR_MASK))
1261 		return IRQ_NONE; /* No errors pending */
1262 
1263 	dev_warn(smmu->dev,
1264 		 "unexpected global error reported (0x%08x), this could be serious\n",
1265 		 gerror);
1266 
1267 	if (gerror & GERROR_SFM_ERR) {
1268 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1269 		arm_smmu_device_disable(smmu);
1270 	}
1271 
1272 	if (gerror & GERROR_MSI_GERROR_ABT_ERR)
1273 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1274 
1275 	if (gerror & GERROR_MSI_PRIQ_ABT_ERR) {
1276 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1277 		arm_smmu_priq_handler(irq, smmu->dev);
1278 	}
1279 
1280 	if (gerror & GERROR_MSI_EVTQ_ABT_ERR) {
1281 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1282 		arm_smmu_evtq_handler(irq, smmu->dev);
1283 	}
1284 
1285 	if (gerror & GERROR_MSI_CMDQ_ABT_ERR) {
1286 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1287 		arm_smmu_cmdq_sync_handler(irq, smmu->dev);
1288 	}
1289 
1290 	if (gerror & GERROR_PRIQ_ABT_ERR)
1291 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1292 
1293 	if (gerror & GERROR_EVTQ_ABT_ERR)
1294 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1295 
1296 	if (gerror & GERROR_CMDQ_ERR)
1297 		arm_smmu_cmdq_skip_err(smmu);
1298 
1299 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1300 	return IRQ_HANDLED;
1301 }
1302 
1303 /* IO_PGTABLE API */
__arm_smmu_tlb_sync(struct arm_smmu_device * smmu)1304 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1305 {
1306 	struct arm_smmu_cmdq_ent cmd;
1307 
1308 	cmd.opcode = CMDQ_OP_CMD_SYNC;
1309 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1310 }
1311 
arm_smmu_tlb_sync(void * cookie)1312 static void arm_smmu_tlb_sync(void *cookie)
1313 {
1314 	struct arm_smmu_domain *smmu_domain = cookie;
1315 	__arm_smmu_tlb_sync(smmu_domain->smmu);
1316 }
1317 
arm_smmu_tlb_inv_context(void * cookie)1318 static void arm_smmu_tlb_inv_context(void *cookie)
1319 {
1320 	struct arm_smmu_domain *smmu_domain = cookie;
1321 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1322 	struct arm_smmu_cmdq_ent cmd;
1323 
1324 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1325 		cmd.opcode	= CMDQ_OP_TLBI_NH_ASID;
1326 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1327 		cmd.tlbi.vmid	= 0;
1328 	} else {
1329 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1330 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1331 	}
1332 
1333 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1334 	__arm_smmu_tlb_sync(smmu);
1335 }
1336 
arm_smmu_tlb_inv_range_nosync(unsigned long iova,size_t size,bool leaf,void * cookie)1337 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1338 					  bool leaf, void *cookie)
1339 {
1340 	struct arm_smmu_domain *smmu_domain = cookie;
1341 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1342 	struct arm_smmu_cmdq_ent cmd = {
1343 		.tlbi = {
1344 			.leaf	= leaf,
1345 			.addr	= iova,
1346 		},
1347 	};
1348 
1349 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1350 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
1351 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1352 	} else {
1353 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1354 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1355 	}
1356 
1357 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1358 }
1359 
1360 static struct iommu_gather_ops arm_smmu_gather_ops = {
1361 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1362 	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
1363 	.tlb_sync	= arm_smmu_tlb_sync,
1364 };
1365 
1366 /* IOMMU API */
arm_smmu_capable(enum iommu_cap cap)1367 static bool arm_smmu_capable(enum iommu_cap cap)
1368 {
1369 	switch (cap) {
1370 	case IOMMU_CAP_CACHE_COHERENCY:
1371 		return true;
1372 	case IOMMU_CAP_INTR_REMAP:
1373 		return true; /* MSIs are just memory writes */
1374 	case IOMMU_CAP_NOEXEC:
1375 		return true;
1376 	default:
1377 		return false;
1378 	}
1379 }
1380 
arm_smmu_domain_alloc(unsigned type)1381 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1382 {
1383 	struct arm_smmu_domain *smmu_domain;
1384 
1385 	if (type != IOMMU_DOMAIN_UNMANAGED)
1386 		return NULL;
1387 
1388 	/*
1389 	 * Allocate the domain and initialise some of its data structures.
1390 	 * We can't really do anything meaningful until we've added a
1391 	 * master.
1392 	 */
1393 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1394 	if (!smmu_domain)
1395 		return NULL;
1396 
1397 	mutex_init(&smmu_domain->init_mutex);
1398 	spin_lock_init(&smmu_domain->pgtbl_lock);
1399 	return &smmu_domain->domain;
1400 }
1401 
arm_smmu_bitmap_alloc(unsigned long * map,int span)1402 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1403 {
1404 	int idx, size = 1 << span;
1405 
1406 	do {
1407 		idx = find_first_zero_bit(map, size);
1408 		if (idx == size)
1409 			return -ENOSPC;
1410 	} while (test_and_set_bit(idx, map));
1411 
1412 	return idx;
1413 }
1414 
arm_smmu_bitmap_free(unsigned long * map,int idx)1415 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1416 {
1417 	clear_bit(idx, map);
1418 }
1419 
arm_smmu_domain_free(struct iommu_domain * domain)1420 static void arm_smmu_domain_free(struct iommu_domain *domain)
1421 {
1422 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1423 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1424 
1425 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1426 
1427 	/* Free the CD and ASID, if we allocated them */
1428 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1429 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1430 
1431 		if (cfg->cdptr) {
1432 			dma_free_coherent(smmu_domain->smmu->dev,
1433 					  CTXDESC_CD_DWORDS << 3,
1434 					  cfg->cdptr,
1435 					  cfg->cdptr_dma);
1436 
1437 			arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1438 		}
1439 	} else {
1440 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1441 		if (cfg->vmid)
1442 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1443 	}
1444 
1445 	kfree(smmu_domain);
1446 }
1447 
arm_smmu_domain_finalise_s1(struct arm_smmu_domain * smmu_domain,struct io_pgtable_cfg * pgtbl_cfg)1448 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1449 				       struct io_pgtable_cfg *pgtbl_cfg)
1450 {
1451 	int ret;
1452 	int asid;
1453 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1454 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1455 
1456 	asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1457 	if (IS_ERR_VALUE(asid))
1458 		return asid;
1459 
1460 	cfg->cdptr = dma_zalloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1461 					 &cfg->cdptr_dma, GFP_KERNEL);
1462 	if (!cfg->cdptr) {
1463 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1464 		ret = -ENOMEM;
1465 		goto out_free_asid;
1466 	}
1467 
1468 	cfg->cd.asid	= (u16)asid;
1469 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1470 	cfg->cd.tcr	= pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1471 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1472 	return 0;
1473 
1474 out_free_asid:
1475 	arm_smmu_bitmap_free(smmu->asid_map, asid);
1476 	return ret;
1477 }
1478 
arm_smmu_domain_finalise_s2(struct arm_smmu_domain * smmu_domain,struct io_pgtable_cfg * pgtbl_cfg)1479 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1480 				       struct io_pgtable_cfg *pgtbl_cfg)
1481 {
1482 	int vmid;
1483 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1484 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1485 
1486 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1487 	if (IS_ERR_VALUE(vmid))
1488 		return vmid;
1489 
1490 	cfg->vmid	= (u16)vmid;
1491 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1492 	cfg->vtcr	= pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1493 	return 0;
1494 }
1495 
1496 static struct iommu_ops arm_smmu_ops;
1497 
arm_smmu_domain_finalise(struct iommu_domain * domain)1498 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1499 {
1500 	int ret;
1501 	unsigned long ias, oas;
1502 	enum io_pgtable_fmt fmt;
1503 	struct io_pgtable_cfg pgtbl_cfg;
1504 	struct io_pgtable_ops *pgtbl_ops;
1505 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1506 				 struct io_pgtable_cfg *);
1507 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1508 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1509 
1510 	/* Restrict the stage to what we can actually support */
1511 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1512 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1513 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1514 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1515 
1516 	switch (smmu_domain->stage) {
1517 	case ARM_SMMU_DOMAIN_S1:
1518 		ias = VA_BITS;
1519 		oas = smmu->ias;
1520 		fmt = ARM_64_LPAE_S1;
1521 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
1522 		break;
1523 	case ARM_SMMU_DOMAIN_NESTED:
1524 	case ARM_SMMU_DOMAIN_S2:
1525 		ias = smmu->ias;
1526 		oas = smmu->oas;
1527 		fmt = ARM_64_LPAE_S2;
1528 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
1529 		break;
1530 	default:
1531 		return -EINVAL;
1532 	}
1533 
1534 	pgtbl_cfg = (struct io_pgtable_cfg) {
1535 		.pgsize_bitmap	= arm_smmu_ops.pgsize_bitmap,
1536 		.ias		= ias,
1537 		.oas		= oas,
1538 		.tlb		= &arm_smmu_gather_ops,
1539 		.iommu_dev	= smmu->dev,
1540 	};
1541 
1542 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1543 	if (!pgtbl_ops)
1544 		return -ENOMEM;
1545 
1546 	arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1547 	smmu_domain->pgtbl_ops = pgtbl_ops;
1548 
1549 	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1550 	if (IS_ERR_VALUE(ret))
1551 		free_io_pgtable_ops(pgtbl_ops);
1552 
1553 	return ret;
1554 }
1555 
arm_smmu_group_get(struct device * dev)1556 static struct arm_smmu_group *arm_smmu_group_get(struct device *dev)
1557 {
1558 	struct iommu_group *group;
1559 	struct arm_smmu_group *smmu_group;
1560 
1561 	group = iommu_group_get(dev);
1562 	if (!group)
1563 		return NULL;
1564 
1565 	smmu_group = iommu_group_get_iommudata(group);
1566 	iommu_group_put(group);
1567 	return smmu_group;
1568 }
1569 
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)1570 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1571 {
1572 	__le64 *step;
1573 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1574 
1575 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1576 		struct arm_smmu_strtab_l1_desc *l1_desc;
1577 		int idx;
1578 
1579 		/* Two-level walk */
1580 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1581 		l1_desc = &cfg->l1_desc[idx];
1582 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1583 		step = &l1_desc->l2ptr[idx];
1584 	} else {
1585 		/* Simple linear lookup */
1586 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1587 	}
1588 
1589 	return step;
1590 }
1591 
arm_smmu_install_ste_for_group(struct arm_smmu_group * smmu_group)1592 static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
1593 {
1594 	int i;
1595 	struct arm_smmu_domain *smmu_domain = smmu_group->domain;
1596 	struct arm_smmu_strtab_ent *ste = &smmu_group->ste;
1597 	struct arm_smmu_device *smmu = smmu_group->smmu;
1598 
1599 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1600 		ste->s1_cfg = &smmu_domain->s1_cfg;
1601 		ste->s2_cfg = NULL;
1602 		arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1603 	} else {
1604 		ste->s1_cfg = NULL;
1605 		ste->s2_cfg = &smmu_domain->s2_cfg;
1606 	}
1607 
1608 	for (i = 0; i < smmu_group->num_sids; ++i) {
1609 		u32 sid = smmu_group->sids[i];
1610 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1611 
1612 		arm_smmu_write_strtab_ent(smmu, sid, step, ste);
1613 	}
1614 
1615 	return 0;
1616 }
1617 
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)1618 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1619 {
1620 	int ret = 0;
1621 	struct arm_smmu_device *smmu;
1622 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1623 	struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1624 
1625 	if (!smmu_group)
1626 		return -ENOENT;
1627 
1628 	/* Already attached to a different domain? */
1629 	if (smmu_group->domain && smmu_group->domain != smmu_domain)
1630 		return -EEXIST;
1631 
1632 	smmu = smmu_group->smmu;
1633 	mutex_lock(&smmu_domain->init_mutex);
1634 
1635 	if (!smmu_domain->smmu) {
1636 		smmu_domain->smmu = smmu;
1637 		ret = arm_smmu_domain_finalise(domain);
1638 		if (ret) {
1639 			smmu_domain->smmu = NULL;
1640 			goto out_unlock;
1641 		}
1642 	} else if (smmu_domain->smmu != smmu) {
1643 		dev_err(dev,
1644 			"cannot attach to SMMU %s (upstream of %s)\n",
1645 			dev_name(smmu_domain->smmu->dev),
1646 			dev_name(smmu->dev));
1647 		ret = -ENXIO;
1648 		goto out_unlock;
1649 	}
1650 
1651 	/* Group already attached to this domain? */
1652 	if (smmu_group->domain)
1653 		goto out_unlock;
1654 
1655 	smmu_group->domain	= smmu_domain;
1656 	smmu_group->ste.bypass	= false;
1657 
1658 	ret = arm_smmu_install_ste_for_group(smmu_group);
1659 	if (IS_ERR_VALUE(ret))
1660 		smmu_group->domain = NULL;
1661 
1662 out_unlock:
1663 	mutex_unlock(&smmu_domain->init_mutex);
1664 	return ret;
1665 }
1666 
arm_smmu_detach_dev(struct iommu_domain * domain,struct device * dev)1667 static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)
1668 {
1669 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1670 	struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1671 
1672 	BUG_ON(!smmu_domain);
1673 	BUG_ON(!smmu_group);
1674 
1675 	mutex_lock(&smmu_domain->init_mutex);
1676 	BUG_ON(smmu_group->domain != smmu_domain);
1677 
1678 	smmu_group->ste.bypass = true;
1679 	if (IS_ERR_VALUE(arm_smmu_install_ste_for_group(smmu_group)))
1680 		dev_warn(dev, "failed to install bypass STE\n");
1681 
1682 	smmu_group->domain = NULL;
1683 	mutex_unlock(&smmu_domain->init_mutex);
1684 }
1685 
arm_smmu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t size,int prot)1686 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1687 			phys_addr_t paddr, size_t size, int prot)
1688 {
1689 	int ret;
1690 	unsigned long flags;
1691 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1692 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1693 
1694 	if (!ops)
1695 		return -ENODEV;
1696 
1697 	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1698 	ret = ops->map(ops, iova, paddr, size, prot);
1699 	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1700 	return ret;
1701 }
1702 
1703 static size_t
arm_smmu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size)1704 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1705 {
1706 	size_t ret;
1707 	unsigned long flags;
1708 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1709 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1710 
1711 	if (!ops)
1712 		return 0;
1713 
1714 	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1715 	ret = ops->unmap(ops, iova, size);
1716 	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1717 	return ret;
1718 }
1719 
1720 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)1721 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1722 {
1723 	phys_addr_t ret;
1724 	unsigned long flags;
1725 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1726 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1727 
1728 	if (!ops)
1729 		return 0;
1730 
1731 	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1732 	ret = ops->iova_to_phys(ops, iova);
1733 	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1734 
1735 	return ret;
1736 }
1737 
__arm_smmu_get_pci_sid(struct pci_dev * pdev,u16 alias,void * sidp)1738 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *sidp)
1739 {
1740 	*(u32 *)sidp = alias;
1741 	return 0; /* Continue walking */
1742 }
1743 
__arm_smmu_release_pci_iommudata(void * data)1744 static void __arm_smmu_release_pci_iommudata(void *data)
1745 {
1746 	kfree(data);
1747 }
1748 
arm_smmu_get_for_pci_dev(struct pci_dev * pdev)1749 static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
1750 {
1751 	struct device_node *of_node;
1752 	struct platform_device *smmu_pdev;
1753 	struct arm_smmu_device *smmu = NULL;
1754 	struct pci_bus *bus = pdev->bus;
1755 
1756 	/* Walk up to the root bus */
1757 	while (!pci_is_root_bus(bus))
1758 		bus = bus->parent;
1759 
1760 	/* Follow the "iommus" phandle from the host controller */
1761 	of_node = of_parse_phandle(bus->bridge->parent->of_node, "iommus", 0);
1762 	if (!of_node)
1763 		return NULL;
1764 
1765 	/* See if we can find an SMMU corresponding to the phandle */
1766 	smmu_pdev = of_find_device_by_node(of_node);
1767 	if (smmu_pdev)
1768 		smmu = platform_get_drvdata(smmu_pdev);
1769 
1770 	of_node_put(of_node);
1771 	return smmu;
1772 }
1773 
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)1774 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1775 {
1776 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1777 
1778 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1779 		limit *= 1UL << STRTAB_SPLIT;
1780 
1781 	return sid < limit;
1782 }
1783 
arm_smmu_add_device(struct device * dev)1784 static int arm_smmu_add_device(struct device *dev)
1785 {
1786 	int i, ret;
1787 	u32 sid, *sids;
1788 	struct pci_dev *pdev;
1789 	struct iommu_group *group;
1790 	struct arm_smmu_group *smmu_group;
1791 	struct arm_smmu_device *smmu;
1792 
1793 	/* We only support PCI, for now */
1794 	if (!dev_is_pci(dev))
1795 		return -ENODEV;
1796 
1797 	pdev = to_pci_dev(dev);
1798 	group = iommu_group_get_for_dev(dev);
1799 	if (IS_ERR(group))
1800 		return PTR_ERR(group);
1801 
1802 	smmu_group = iommu_group_get_iommudata(group);
1803 	if (!smmu_group) {
1804 		smmu = arm_smmu_get_for_pci_dev(pdev);
1805 		if (!smmu) {
1806 			ret = -ENOENT;
1807 			goto out_put_group;
1808 		}
1809 
1810 		smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL);
1811 		if (!smmu_group) {
1812 			ret = -ENOMEM;
1813 			goto out_put_group;
1814 		}
1815 
1816 		smmu_group->ste.valid	= true;
1817 		smmu_group->smmu	= smmu;
1818 		iommu_group_set_iommudata(group, smmu_group,
1819 					  __arm_smmu_release_pci_iommudata);
1820 	} else {
1821 		smmu = smmu_group->smmu;
1822 	}
1823 
1824 	/* Assume SID == RID until firmware tells us otherwise */
1825 	pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
1826 	for (i = 0; i < smmu_group->num_sids; ++i) {
1827 		/* If we already know about this SID, then we're done */
1828 		if (smmu_group->sids[i] == sid)
1829 			return 0;
1830 	}
1831 
1832 	/* Check the SID is in range of the SMMU and our stream table */
1833 	if (!arm_smmu_sid_in_range(smmu, sid)) {
1834 		ret = -ERANGE;
1835 		goto out_put_group;
1836 	}
1837 
1838 	/* Ensure l2 strtab is initialised */
1839 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1840 		ret = arm_smmu_init_l2_strtab(smmu, sid);
1841 		if (ret)
1842 			goto out_put_group;
1843 	}
1844 
1845 	/* Resize the SID array for the group */
1846 	smmu_group->num_sids++;
1847 	sids = krealloc(smmu_group->sids, smmu_group->num_sids * sizeof(*sids),
1848 			GFP_KERNEL);
1849 	if (!sids) {
1850 		smmu_group->num_sids--;
1851 		ret = -ENOMEM;
1852 		goto out_put_group;
1853 	}
1854 
1855 	/* Add the new SID */
1856 	sids[smmu_group->num_sids - 1] = sid;
1857 	smmu_group->sids = sids;
1858 	return 0;
1859 
1860 out_put_group:
1861 	iommu_group_put(group);
1862 	return ret;
1863 }
1864 
arm_smmu_remove_device(struct device * dev)1865 static void arm_smmu_remove_device(struct device *dev)
1866 {
1867 	iommu_group_remove_device(dev);
1868 }
1869 
arm_smmu_domain_get_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)1870 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1871 				    enum iommu_attr attr, void *data)
1872 {
1873 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1874 
1875 	switch (attr) {
1876 	case DOMAIN_ATTR_NESTING:
1877 		*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1878 		return 0;
1879 	default:
1880 		return -ENODEV;
1881 	}
1882 }
1883 
arm_smmu_domain_set_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)1884 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1885 				    enum iommu_attr attr, void *data)
1886 {
1887 	int ret = 0;
1888 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1889 
1890 	mutex_lock(&smmu_domain->init_mutex);
1891 
1892 	switch (attr) {
1893 	case DOMAIN_ATTR_NESTING:
1894 		if (smmu_domain->smmu) {
1895 			ret = -EPERM;
1896 			goto out_unlock;
1897 		}
1898 
1899 		if (*(int *)data)
1900 			smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1901 		else
1902 			smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1903 
1904 		break;
1905 	default:
1906 		ret = -ENODEV;
1907 	}
1908 
1909 out_unlock:
1910 	mutex_unlock(&smmu_domain->init_mutex);
1911 	return ret;
1912 }
1913 
1914 static struct iommu_ops arm_smmu_ops = {
1915 	.capable		= arm_smmu_capable,
1916 	.domain_alloc		= arm_smmu_domain_alloc,
1917 	.domain_free		= arm_smmu_domain_free,
1918 	.attach_dev		= arm_smmu_attach_dev,
1919 	.detach_dev		= arm_smmu_detach_dev,
1920 	.map			= arm_smmu_map,
1921 	.unmap			= arm_smmu_unmap,
1922 	.iova_to_phys		= arm_smmu_iova_to_phys,
1923 	.add_device		= arm_smmu_add_device,
1924 	.remove_device		= arm_smmu_remove_device,
1925 	.device_group		= pci_device_group,
1926 	.domain_get_attr	= arm_smmu_domain_get_attr,
1927 	.domain_set_attr	= arm_smmu_domain_set_attr,
1928 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1929 };
1930 
1931 /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,unsigned long prod_off,unsigned long cons_off,size_t dwords)1932 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
1933 				   struct arm_smmu_queue *q,
1934 				   unsigned long prod_off,
1935 				   unsigned long cons_off,
1936 				   size_t dwords)
1937 {
1938 	size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
1939 
1940 	q->base = dma_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
1941 	if (!q->base) {
1942 		dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
1943 			qsz);
1944 		return -ENOMEM;
1945 	}
1946 
1947 	q->prod_reg	= smmu->base + prod_off;
1948 	q->cons_reg	= smmu->base + cons_off;
1949 	q->ent_dwords	= dwords;
1950 
1951 	q->q_base  = Q_BASE_RWA;
1952 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK << Q_BASE_ADDR_SHIFT;
1953 	q->q_base |= (q->max_n_shift & Q_BASE_LOG2SIZE_MASK)
1954 		     << Q_BASE_LOG2SIZE_SHIFT;
1955 
1956 	q->prod = q->cons = 0;
1957 	return 0;
1958 }
1959 
arm_smmu_free_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q)1960 static void arm_smmu_free_one_queue(struct arm_smmu_device *smmu,
1961 				    struct arm_smmu_queue *q)
1962 {
1963 	size_t qsz = ((1 << q->max_n_shift) * q->ent_dwords) << 3;
1964 
1965 	dma_free_coherent(smmu->dev, qsz, q->base, q->base_dma);
1966 }
1967 
arm_smmu_free_queues(struct arm_smmu_device * smmu)1968 static void arm_smmu_free_queues(struct arm_smmu_device *smmu)
1969 {
1970 	arm_smmu_free_one_queue(smmu, &smmu->cmdq.q);
1971 	arm_smmu_free_one_queue(smmu, &smmu->evtq.q);
1972 
1973 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1974 		arm_smmu_free_one_queue(smmu, &smmu->priq.q);
1975 }
1976 
arm_smmu_init_queues(struct arm_smmu_device * smmu)1977 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
1978 {
1979 	int ret;
1980 
1981 	/* cmdq */
1982 	spin_lock_init(&smmu->cmdq.lock);
1983 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
1984 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
1985 	if (ret)
1986 		goto out;
1987 
1988 	/* evtq */
1989 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
1990 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
1991 	if (ret)
1992 		goto out_free_cmdq;
1993 
1994 	/* priq */
1995 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
1996 		return 0;
1997 
1998 	ret = arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
1999 				      ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2000 	if (ret)
2001 		goto out_free_evtq;
2002 
2003 	return 0;
2004 
2005 out_free_evtq:
2006 	arm_smmu_free_one_queue(smmu, &smmu->evtq.q);
2007 out_free_cmdq:
2008 	arm_smmu_free_one_queue(smmu, &smmu->cmdq.q);
2009 out:
2010 	return ret;
2011 }
2012 
arm_smmu_free_l2_strtab(struct arm_smmu_device * smmu)2013 static void arm_smmu_free_l2_strtab(struct arm_smmu_device *smmu)
2014 {
2015 	int i;
2016 	size_t size;
2017 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2018 
2019 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
2020 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2021 		struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[i];
2022 
2023 		if (!desc->l2ptr)
2024 			continue;
2025 
2026 		dma_free_coherent(smmu->dev, size, desc->l2ptr,
2027 				  desc->l2ptr_dma);
2028 	}
2029 }
2030 
arm_smmu_init_l1_strtab(struct arm_smmu_device * smmu)2031 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2032 {
2033 	unsigned int i;
2034 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2035 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2036 	void *strtab = smmu->strtab_cfg.strtab;
2037 
2038 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2039 	if (!cfg->l1_desc) {
2040 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2041 		return -ENOMEM;
2042 	}
2043 
2044 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2045 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2046 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2047 	}
2048 
2049 	return 0;
2050 }
2051 
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)2052 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2053 {
2054 	void *strtab;
2055 	u64 reg;
2056 	u32 size, l1size;
2057 	int ret;
2058 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2059 
2060 	/*
2061 	 * If we can resolve everything with a single L2 table, then we
2062 	 * just need a single L1 descriptor. Otherwise, calculate the L1
2063 	 * size, capped to the SIDSIZE.
2064 	 */
2065 	if (smmu->sid_bits < STRTAB_SPLIT) {
2066 		size = 0;
2067 	} else {
2068 		size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2069 		size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2070 	}
2071 	cfg->num_l1_ents = 1 << size;
2072 
2073 	size += STRTAB_SPLIT;
2074 	if (size < smmu->sid_bits)
2075 		dev_warn(smmu->dev,
2076 			 "2-level strtab only covers %u/%u bits of SID\n",
2077 			 size, smmu->sid_bits);
2078 
2079 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2080 	strtab = dma_zalloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2081 				     GFP_KERNEL);
2082 	if (!strtab) {
2083 		dev_err(smmu->dev,
2084 			"failed to allocate l1 stream table (%u bytes)\n",
2085 			size);
2086 		return -ENOMEM;
2087 	}
2088 	cfg->strtab = strtab;
2089 
2090 	/* Configure strtab_base_cfg for 2 levels */
2091 	reg  = STRTAB_BASE_CFG_FMT_2LVL;
2092 	reg |= (size & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2093 		<< STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2094 	reg |= (STRTAB_SPLIT & STRTAB_BASE_CFG_SPLIT_MASK)
2095 		<< STRTAB_BASE_CFG_SPLIT_SHIFT;
2096 	cfg->strtab_base_cfg = reg;
2097 
2098 	ret = arm_smmu_init_l1_strtab(smmu);
2099 	if (ret)
2100 		dma_free_coherent(smmu->dev,
2101 				  l1size,
2102 				  strtab,
2103 				  cfg->strtab_dma);
2104 	return ret;
2105 }
2106 
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)2107 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2108 {
2109 	void *strtab;
2110 	u64 reg;
2111 	u32 size;
2112 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2113 
2114 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2115 	strtab = dma_zalloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2116 				     GFP_KERNEL);
2117 	if (!strtab) {
2118 		dev_err(smmu->dev,
2119 			"failed to allocate linear stream table (%u bytes)\n",
2120 			size);
2121 		return -ENOMEM;
2122 	}
2123 	cfg->strtab = strtab;
2124 	cfg->num_l1_ents = 1 << smmu->sid_bits;
2125 
2126 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2127 	reg  = STRTAB_BASE_CFG_FMT_LINEAR;
2128 	reg |= (smmu->sid_bits & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2129 		<< STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2130 	cfg->strtab_base_cfg = reg;
2131 
2132 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2133 	return 0;
2134 }
2135 
arm_smmu_init_strtab(struct arm_smmu_device * smmu)2136 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2137 {
2138 	u64 reg;
2139 	int ret;
2140 
2141 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2142 		ret = arm_smmu_init_strtab_2lvl(smmu);
2143 	else
2144 		ret = arm_smmu_init_strtab_linear(smmu);
2145 
2146 	if (ret)
2147 		return ret;
2148 
2149 	/* Set the strtab base address */
2150 	reg  = smmu->strtab_cfg.strtab_dma &
2151 	       STRTAB_BASE_ADDR_MASK << STRTAB_BASE_ADDR_SHIFT;
2152 	reg |= STRTAB_BASE_RA;
2153 	smmu->strtab_cfg.strtab_base = reg;
2154 
2155 	/* Allocate the first VMID for stage-2 bypass STEs */
2156 	set_bit(0, smmu->vmid_map);
2157 	return 0;
2158 }
2159 
arm_smmu_free_strtab(struct arm_smmu_device * smmu)2160 static void arm_smmu_free_strtab(struct arm_smmu_device *smmu)
2161 {
2162 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2163 	u32 size = cfg->num_l1_ents;
2164 
2165 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2166 		arm_smmu_free_l2_strtab(smmu);
2167 		size *= STRTAB_L1_DESC_DWORDS << 3;
2168 	} else {
2169 		size *= STRTAB_STE_DWORDS * 3;
2170 	}
2171 
2172 	dma_free_coherent(smmu->dev, size, cfg->strtab, cfg->strtab_dma);
2173 }
2174 
arm_smmu_init_structures(struct arm_smmu_device * smmu)2175 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2176 {
2177 	int ret;
2178 
2179 	ret = arm_smmu_init_queues(smmu);
2180 	if (ret)
2181 		return ret;
2182 
2183 	ret = arm_smmu_init_strtab(smmu);
2184 	if (ret)
2185 		goto out_free_queues;
2186 
2187 	return 0;
2188 
2189 out_free_queues:
2190 	arm_smmu_free_queues(smmu);
2191 	return ret;
2192 }
2193 
arm_smmu_free_structures(struct arm_smmu_device * smmu)2194 static void arm_smmu_free_structures(struct arm_smmu_device *smmu)
2195 {
2196 	arm_smmu_free_strtab(smmu);
2197 	arm_smmu_free_queues(smmu);
2198 }
2199 
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)2200 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2201 				   unsigned int reg_off, unsigned int ack_off)
2202 {
2203 	u32 reg;
2204 
2205 	writel_relaxed(val, smmu->base + reg_off);
2206 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2207 					  1, ARM_SMMU_POLL_TIMEOUT_US);
2208 }
2209 
arm_smmu_free_msis(void * data)2210 static void arm_smmu_free_msis(void *data)
2211 {
2212 	struct device *dev = data;
2213 	platform_msi_domain_free_irqs(dev);
2214 }
2215 
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)2216 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2217 {
2218 	phys_addr_t doorbell;
2219 	struct device *dev = msi_desc_to_dev(desc);
2220 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2221 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2222 
2223 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2224 	doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
2225 
2226 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2227 	writel_relaxed(msg->data, smmu->base + cfg[1]);
2228 	writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2229 }
2230 
arm_smmu_setup_msis(struct arm_smmu_device * smmu)2231 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2232 {
2233 	struct msi_desc *desc;
2234 	int ret, nvec = ARM_SMMU_MAX_MSIS;
2235 	struct device *dev = smmu->dev;
2236 
2237 	/* Clear the MSI address regs */
2238 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2239 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2240 
2241 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2242 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2243 	else
2244 		nvec--;
2245 
2246 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2247 		return;
2248 
2249 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2250 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2251 	if (ret) {
2252 		dev_warn(dev, "failed to allocate MSIs\n");
2253 		return;
2254 	}
2255 
2256 	for_each_msi_entry(desc, dev) {
2257 		switch (desc->platform.msi_index) {
2258 		case EVTQ_MSI_INDEX:
2259 			smmu->evtq.q.irq = desc->irq;
2260 			break;
2261 		case GERROR_MSI_INDEX:
2262 			smmu->gerr_irq = desc->irq;
2263 			break;
2264 		case PRIQ_MSI_INDEX:
2265 			smmu->priq.q.irq = desc->irq;
2266 			break;
2267 		default:	/* Unknown */
2268 			continue;
2269 		}
2270 	}
2271 
2272 	/* Add callback to free MSIs on teardown */
2273 	devm_add_action(dev, arm_smmu_free_msis, dev);
2274 }
2275 
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)2276 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2277 {
2278 	int ret, irq;
2279 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2280 
2281 	/* Disable IRQs first */
2282 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2283 				      ARM_SMMU_IRQ_CTRLACK);
2284 	if (ret) {
2285 		dev_err(smmu->dev, "failed to disable irqs\n");
2286 		return ret;
2287 	}
2288 
2289 	arm_smmu_setup_msis(smmu);
2290 
2291 	/* Request interrupt lines */
2292 	irq = smmu->evtq.q.irq;
2293 	if (irq) {
2294 		ret = devm_request_threaded_irq(smmu->dev, irq,
2295 						arm_smmu_evtq_handler,
2296 						arm_smmu_evtq_thread,
2297 						0, "arm-smmu-v3-evtq", smmu);
2298 		if (IS_ERR_VALUE(ret))
2299 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
2300 	}
2301 
2302 	irq = smmu->cmdq.q.irq;
2303 	if (irq) {
2304 		ret = devm_request_irq(smmu->dev, irq,
2305 				       arm_smmu_cmdq_sync_handler, 0,
2306 				       "arm-smmu-v3-cmdq-sync", smmu);
2307 		if (IS_ERR_VALUE(ret))
2308 			dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
2309 	}
2310 
2311 	irq = smmu->gerr_irq;
2312 	if (irq) {
2313 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2314 				       0, "arm-smmu-v3-gerror", smmu);
2315 		if (IS_ERR_VALUE(ret))
2316 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
2317 	}
2318 
2319 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2320 		irq = smmu->priq.q.irq;
2321 		if (irq) {
2322 			ret = devm_request_threaded_irq(smmu->dev, irq,
2323 							arm_smmu_priq_handler,
2324 							arm_smmu_priq_thread,
2325 							0, "arm-smmu-v3-priq",
2326 							smmu);
2327 			if (IS_ERR_VALUE(ret))
2328 				dev_warn(smmu->dev,
2329 					 "failed to enable priq irq\n");
2330 			else
2331 				irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2332 		}
2333 	}
2334 
2335 	/* Enable interrupt generation on the SMMU */
2336 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2337 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2338 	if (ret)
2339 		dev_warn(smmu->dev, "failed to enable irqs\n");
2340 
2341 	return 0;
2342 }
2343 
arm_smmu_device_disable(struct arm_smmu_device * smmu)2344 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2345 {
2346 	int ret;
2347 
2348 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2349 	if (ret)
2350 		dev_err(smmu->dev, "failed to clear cr0\n");
2351 
2352 	return ret;
2353 }
2354 
arm_smmu_device_reset(struct arm_smmu_device * smmu)2355 static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
2356 {
2357 	int ret;
2358 	u32 reg, enables;
2359 	struct arm_smmu_cmdq_ent cmd;
2360 
2361 	/* Clear CR0 and sync (disables SMMU and queue processing) */
2362 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2363 	if (reg & CR0_SMMUEN)
2364 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2365 
2366 	ret = arm_smmu_device_disable(smmu);
2367 	if (ret)
2368 		return ret;
2369 
2370 	/* CR1 (table and queue memory attributes) */
2371 	reg = (CR1_SH_ISH << CR1_TABLE_SH_SHIFT) |
2372 	      (CR1_CACHE_WB << CR1_TABLE_OC_SHIFT) |
2373 	      (CR1_CACHE_WB << CR1_TABLE_IC_SHIFT) |
2374 	      (CR1_SH_ISH << CR1_QUEUE_SH_SHIFT) |
2375 	      (CR1_CACHE_WB << CR1_QUEUE_OC_SHIFT) |
2376 	      (CR1_CACHE_WB << CR1_QUEUE_IC_SHIFT);
2377 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2378 
2379 	/* CR2 (random crap) */
2380 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2381 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2382 
2383 	/* Stream table */
2384 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
2385 		       smmu->base + ARM_SMMU_STRTAB_BASE);
2386 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2387 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2388 
2389 	/* Command queue */
2390 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2391 	writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2392 	writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2393 
2394 	enables = CR0_CMDQEN;
2395 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2396 				      ARM_SMMU_CR0ACK);
2397 	if (ret) {
2398 		dev_err(smmu->dev, "failed to enable command queue\n");
2399 		return ret;
2400 	}
2401 
2402 	/* Invalidate any cached configuration */
2403 	cmd.opcode = CMDQ_OP_CFGI_ALL;
2404 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2405 	cmd.opcode = CMDQ_OP_CMD_SYNC;
2406 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2407 
2408 	/* Invalidate any stale TLB entries */
2409 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
2410 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2411 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2412 	}
2413 
2414 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2415 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2416 	cmd.opcode = CMDQ_OP_CMD_SYNC;
2417 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2418 
2419 	/* Event queue */
2420 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2421 	writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD);
2422 	writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS);
2423 
2424 	enables |= CR0_EVTQEN;
2425 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2426 				      ARM_SMMU_CR0ACK);
2427 	if (ret) {
2428 		dev_err(smmu->dev, "failed to enable event queue\n");
2429 		return ret;
2430 	}
2431 
2432 	/* PRI queue */
2433 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2434 		writeq_relaxed(smmu->priq.q.q_base,
2435 			       smmu->base + ARM_SMMU_PRIQ_BASE);
2436 		writel_relaxed(smmu->priq.q.prod,
2437 			       smmu->base + ARM_SMMU_PRIQ_PROD);
2438 		writel_relaxed(smmu->priq.q.cons,
2439 			       smmu->base + ARM_SMMU_PRIQ_CONS);
2440 
2441 		enables |= CR0_PRIQEN;
2442 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2443 					      ARM_SMMU_CR0ACK);
2444 		if (ret) {
2445 			dev_err(smmu->dev, "failed to enable PRI queue\n");
2446 			return ret;
2447 		}
2448 	}
2449 
2450 	ret = arm_smmu_setup_irqs(smmu);
2451 	if (ret) {
2452 		dev_err(smmu->dev, "failed to setup irqs\n");
2453 		return ret;
2454 	}
2455 
2456 	/* Enable the SMMU interface */
2457 	enables |= CR0_SMMUEN;
2458 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2459 				      ARM_SMMU_CR0ACK);
2460 	if (ret) {
2461 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
2462 		return ret;
2463 	}
2464 
2465 	return 0;
2466 }
2467 
arm_smmu_device_probe(struct arm_smmu_device * smmu)2468 static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
2469 {
2470 	u32 reg;
2471 	bool coherent;
2472 	unsigned long pgsize_bitmap = 0;
2473 
2474 	/* IDR0 */
2475 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2476 
2477 	/* 2-level structures */
2478 	if ((reg & IDR0_ST_LVL_MASK << IDR0_ST_LVL_SHIFT) == IDR0_ST_LVL_2LVL)
2479 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2480 
2481 	if (reg & IDR0_CD2L)
2482 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2483 
2484 	/*
2485 	 * Translation table endianness.
2486 	 * We currently require the same endianness as the CPU, but this
2487 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
2488 	 */
2489 	switch (reg & IDR0_TTENDIAN_MASK << IDR0_TTENDIAN_SHIFT) {
2490 	case IDR0_TTENDIAN_MIXED:
2491 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2492 		break;
2493 #ifdef __BIG_ENDIAN
2494 	case IDR0_TTENDIAN_BE:
2495 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
2496 		break;
2497 #else
2498 	case IDR0_TTENDIAN_LE:
2499 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
2500 		break;
2501 #endif
2502 	default:
2503 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2504 		return -ENXIO;
2505 	}
2506 
2507 	/* Boolean feature flags */
2508 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2509 		smmu->features |= ARM_SMMU_FEAT_PRI;
2510 
2511 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2512 		smmu->features |= ARM_SMMU_FEAT_ATS;
2513 
2514 	if (reg & IDR0_SEV)
2515 		smmu->features |= ARM_SMMU_FEAT_SEV;
2516 
2517 	if (reg & IDR0_MSI)
2518 		smmu->features |= ARM_SMMU_FEAT_MSI;
2519 
2520 	if (reg & IDR0_HYP)
2521 		smmu->features |= ARM_SMMU_FEAT_HYP;
2522 
2523 	/*
2524 	 * The dma-coherent property is used in preference to the ID
2525 	 * register, but warn on mismatch.
2526 	 */
2527 	coherent = of_dma_is_coherent(smmu->dev->of_node);
2528 	if (coherent)
2529 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2530 
2531 	if (!!(reg & IDR0_COHACC) != coherent)
2532 		dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n",
2533 			 coherent ? "true" : "false");
2534 
2535 	if (reg & IDR0_STALL_MODEL)
2536 		smmu->features |= ARM_SMMU_FEAT_STALLS;
2537 
2538 	if (reg & IDR0_S1P)
2539 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2540 
2541 	if (reg & IDR0_S2P)
2542 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2543 
2544 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2545 		dev_err(smmu->dev, "no translation support!\n");
2546 		return -ENXIO;
2547 	}
2548 
2549 	/* We only support the AArch64 table format at present */
2550 	switch (reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) {
2551 	case IDR0_TTF_AARCH32_64:
2552 		smmu->ias = 40;
2553 		/* Fallthrough */
2554 	case IDR0_TTF_AARCH64:
2555 		break;
2556 	default:
2557 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
2558 		return -ENXIO;
2559 	}
2560 
2561 	/* ASID/VMID sizes */
2562 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2563 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2564 
2565 	/* IDR1 */
2566 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2567 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2568 		dev_err(smmu->dev, "embedded implementation not supported\n");
2569 		return -ENXIO;
2570 	}
2571 
2572 	/* Queue sizes, capped at 4k */
2573 	smmu->cmdq.q.max_n_shift = min((u32)CMDQ_MAX_SZ_SHIFT,
2574 				       reg >> IDR1_CMDQ_SHIFT & IDR1_CMDQ_MASK);
2575 	if (!smmu->cmdq.q.max_n_shift) {
2576 		/* Odd alignment restrictions on the base, so ignore for now */
2577 		dev_err(smmu->dev, "unit-length command queue not supported\n");
2578 		return -ENXIO;
2579 	}
2580 
2581 	smmu->evtq.q.max_n_shift = min((u32)EVTQ_MAX_SZ_SHIFT,
2582 				       reg >> IDR1_EVTQ_SHIFT & IDR1_EVTQ_MASK);
2583 	smmu->priq.q.max_n_shift = min((u32)PRIQ_MAX_SZ_SHIFT,
2584 				       reg >> IDR1_PRIQ_SHIFT & IDR1_PRIQ_MASK);
2585 
2586 	/* SID/SSID sizes */
2587 	smmu->ssid_bits = reg >> IDR1_SSID_SHIFT & IDR1_SSID_MASK;
2588 	smmu->sid_bits = reg >> IDR1_SID_SHIFT & IDR1_SID_MASK;
2589 
2590 	/* IDR5 */
2591 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2592 
2593 	/* Maximum number of outstanding stalls */
2594 	smmu->evtq.max_stalls = reg >> IDR5_STALL_MAX_SHIFT
2595 				& IDR5_STALL_MAX_MASK;
2596 
2597 	/* Page sizes */
2598 	if (reg & IDR5_GRAN64K)
2599 		pgsize_bitmap |= SZ_64K | SZ_512M;
2600 	if (reg & IDR5_GRAN16K)
2601 		pgsize_bitmap |= SZ_16K | SZ_32M;
2602 	if (reg & IDR5_GRAN4K)
2603 		pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2604 
2605 	arm_smmu_ops.pgsize_bitmap &= pgsize_bitmap;
2606 
2607 	/* Output address size */
2608 	switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
2609 	case IDR5_OAS_32_BIT:
2610 		smmu->oas = 32;
2611 		break;
2612 	case IDR5_OAS_36_BIT:
2613 		smmu->oas = 36;
2614 		break;
2615 	case IDR5_OAS_40_BIT:
2616 		smmu->oas = 40;
2617 		break;
2618 	case IDR5_OAS_42_BIT:
2619 		smmu->oas = 42;
2620 		break;
2621 	case IDR5_OAS_44_BIT:
2622 		smmu->oas = 44;
2623 		break;
2624 	default:
2625 		dev_info(smmu->dev,
2626 			"unknown output address size. Truncating to 48-bit\n");
2627 		/* Fallthrough */
2628 	case IDR5_OAS_48_BIT:
2629 		smmu->oas = 48;
2630 	}
2631 
2632 	/* Set the DMA mask for our table walker */
2633 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2634 		dev_warn(smmu->dev,
2635 			 "failed to set DMA mask for table walker\n");
2636 
2637 	smmu->ias = max(smmu->ias, smmu->oas);
2638 
2639 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2640 		 smmu->ias, smmu->oas, smmu->features);
2641 	return 0;
2642 }
2643 
arm_smmu_device_dt_probe(struct platform_device * pdev)2644 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
2645 {
2646 	int irq, ret;
2647 	struct resource *res;
2648 	struct arm_smmu_device *smmu;
2649 	struct device *dev = &pdev->dev;
2650 
2651 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2652 	if (!smmu) {
2653 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2654 		return -ENOMEM;
2655 	}
2656 	smmu->dev = dev;
2657 
2658 	/* Base address */
2659 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2660 	if (resource_size(res) + 1 < SZ_128K) {
2661 		dev_err(dev, "MMIO region too small (%pr)\n", res);
2662 		return -EINVAL;
2663 	}
2664 
2665 	smmu->base = devm_ioremap_resource(dev, res);
2666 	if (IS_ERR(smmu->base))
2667 		return PTR_ERR(smmu->base);
2668 
2669 	/* Interrupt lines */
2670 	irq = platform_get_irq_byname(pdev, "eventq");
2671 	if (irq > 0)
2672 		smmu->evtq.q.irq = irq;
2673 
2674 	irq = platform_get_irq_byname(pdev, "priq");
2675 	if (irq > 0)
2676 		smmu->priq.q.irq = irq;
2677 
2678 	irq = platform_get_irq_byname(pdev, "cmdq-sync");
2679 	if (irq > 0)
2680 		smmu->cmdq.q.irq = irq;
2681 
2682 	irq = platform_get_irq_byname(pdev, "gerror");
2683 	if (irq > 0)
2684 		smmu->gerr_irq = irq;
2685 
2686 	parse_driver_options(smmu);
2687 
2688 	/* Probe the h/w */
2689 	ret = arm_smmu_device_probe(smmu);
2690 	if (ret)
2691 		return ret;
2692 
2693 	/* Initialise in-memory data structures */
2694 	ret = arm_smmu_init_structures(smmu);
2695 	if (ret)
2696 		return ret;
2697 
2698 	/* Record our private device structure */
2699 	platform_set_drvdata(pdev, smmu);
2700 
2701 	/* Reset the device */
2702 	ret = arm_smmu_device_reset(smmu);
2703 	if (ret)
2704 		goto out_free_structures;
2705 
2706 	return 0;
2707 
2708 out_free_structures:
2709 	arm_smmu_free_structures(smmu);
2710 	return ret;
2711 }
2712 
arm_smmu_device_remove(struct platform_device * pdev)2713 static int arm_smmu_device_remove(struct platform_device *pdev)
2714 {
2715 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2716 
2717 	arm_smmu_device_disable(smmu);
2718 	arm_smmu_free_structures(smmu);
2719 	return 0;
2720 }
2721 
2722 static struct of_device_id arm_smmu_of_match[] = {
2723 	{ .compatible = "arm,smmu-v3", },
2724 	{ },
2725 };
2726 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2727 
2728 static struct platform_driver arm_smmu_driver = {
2729 	.driver	= {
2730 		.name		= "arm-smmu-v3",
2731 		.of_match_table	= of_match_ptr(arm_smmu_of_match),
2732 	},
2733 	.probe	= arm_smmu_device_dt_probe,
2734 	.remove	= arm_smmu_device_remove,
2735 };
2736 
arm_smmu_init(void)2737 static int __init arm_smmu_init(void)
2738 {
2739 	struct device_node *np;
2740 	int ret;
2741 
2742 	np = of_find_matching_node(NULL, arm_smmu_of_match);
2743 	if (!np)
2744 		return 0;
2745 
2746 	of_node_put(np);
2747 
2748 	ret = platform_driver_register(&arm_smmu_driver);
2749 	if (ret)
2750 		return ret;
2751 
2752 	return bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2753 }
2754 
arm_smmu_exit(void)2755 static void __exit arm_smmu_exit(void)
2756 {
2757 	return platform_driver_unregister(&arm_smmu_driver);
2758 }
2759 
2760 subsys_initcall(arm_smmu_init);
2761 module_exit(arm_smmu_exit);
2762 
2763 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2764 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2765 MODULE_LICENSE("GPL v2");
2766