1/*
2 * Copyright (C) 2012 ARM Ltd.
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include <linux/cpu.h>
20#include <linux/kvm.h>
21#include <linux/kvm_host.h>
22#include <linux/interrupt.h>
23#include <linux/io.h>
24#include <linux/of.h>
25#include <linux/of_address.h>
26#include <linux/of_irq.h>
27#include <linux/uaccess.h>
28
29#include <linux/irqchip/arm-gic.h>
30
31#include <asm/kvm_emulate.h>
32#include <asm/kvm_arm.h>
33#include <asm/kvm_mmu.h>
34#include <trace/events/kvm.h>
35#include <asm/kvm.h>
36#include <kvm/iodev.h>
37
38/*
39 * How the whole thing works (courtesy of Christoffer Dall):
40 *
41 * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
42 *   something is pending on the CPU interface.
43 * - Interrupts that are pending on the distributor are stored on the
44 *   vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
45 *   ioctls and guest mmio ops, and other in-kernel peripherals such as the
46 *   arch. timers).
47 * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
48 *   recalculated
49 * - To calculate the oracle, we need info for each cpu from
50 *   compute_pending_for_cpu, which considers:
51 *   - PPI: dist->irq_pending & dist->irq_enable
52 *   - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
53 *   - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
54 *     registers, stored on each vcpu. We only keep one bit of
55 *     information per interrupt, making sure that only one vcpu can
56 *     accept the interrupt.
57 * - If any of the above state changes, we must recalculate the oracle.
58 * - The same is true when injecting an interrupt, except that we only
59 *   consider a single interrupt at a time. The irq_spi_cpu array
60 *   contains the target CPU for each SPI.
61 *
62 * The handling of level interrupts adds some extra complexity. We
63 * need to track when the interrupt has been EOIed, so we can sample
64 * the 'line' again. This is achieved as such:
65 *
66 * - When a level interrupt is moved onto a vcpu, the corresponding
67 *   bit in irq_queued is set. As long as this bit is set, the line
68 *   will be ignored for further interrupts. The interrupt is injected
69 *   into the vcpu with the GICH_LR_EOI bit set (generate a
70 *   maintenance interrupt on EOI).
71 * - When the interrupt is EOIed, the maintenance interrupt fires,
72 *   and clears the corresponding bit in irq_queued. This allows the
73 *   interrupt line to be sampled again.
74 * - Note that level-triggered interrupts can also be set to pending from
75 *   writes to GICD_ISPENDRn and lowering the external input line does not
76 *   cause the interrupt to become inactive in such a situation.
77 *   Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
78 *   inactive as long as the external input line is held high.
79 */
80
81#include "vgic.h"
82
83static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
84static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
85static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
86static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
87
88static const struct vgic_ops *vgic_ops;
89static const struct vgic_params *vgic;
90
91static void add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
92{
93	vcpu->kvm->arch.vgic.vm_ops.add_sgi_source(vcpu, irq, source);
94}
95
96static bool queue_sgi(struct kvm_vcpu *vcpu, int irq)
97{
98	return vcpu->kvm->arch.vgic.vm_ops.queue_sgi(vcpu, irq);
99}
100
101int kvm_vgic_map_resources(struct kvm *kvm)
102{
103	return kvm->arch.vgic.vm_ops.map_resources(kvm, vgic);
104}
105
106/*
107 * struct vgic_bitmap contains a bitmap made of unsigned longs, but
108 * extracts u32s out of them.
109 *
110 * This does not work on 64-bit BE systems, because the bitmap access
111 * will store two consecutive 32-bit words with the higher-addressed
112 * register's bits at the lower index and the lower-addressed register's
113 * bits at the higher index.
114 *
115 * Therefore, swizzle the register index when accessing the 32-bit word
116 * registers to access the right register's value.
117 */
118#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64
119#define REG_OFFSET_SWIZZLE	1
120#else
121#define REG_OFFSET_SWIZZLE	0
122#endif
123
124static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
125{
126	int nr_longs;
127
128	nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
129
130	b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
131	if (!b->private)
132		return -ENOMEM;
133
134	b->shared = b->private + nr_cpus;
135
136	return 0;
137}
138
139static void vgic_free_bitmap(struct vgic_bitmap *b)
140{
141	kfree(b->private);
142	b->private = NULL;
143	b->shared = NULL;
144}
145
146/*
147 * Call this function to convert a u64 value to an unsigned long * bitmask
148 * in a way that works on both 32-bit and 64-bit LE and BE platforms.
149 *
150 * Warning: Calling this function may modify *val.
151 */
152static unsigned long *u64_to_bitmask(u64 *val)
153{
154#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32
155	*val = (*val >> 32) | (*val << 32);
156#endif
157	return (unsigned long *)val;
158}
159
160u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset)
161{
162	offset >>= 2;
163	if (!offset)
164		return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
165	else
166		return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
167}
168
169static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
170				   int cpuid, int irq)
171{
172	if (irq < VGIC_NR_PRIVATE_IRQS)
173		return test_bit(irq, x->private + cpuid);
174
175	return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
176}
177
178void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
179			     int irq, int val)
180{
181	unsigned long *reg;
182
183	if (irq < VGIC_NR_PRIVATE_IRQS) {
184		reg = x->private + cpuid;
185	} else {
186		reg = x->shared;
187		irq -= VGIC_NR_PRIVATE_IRQS;
188	}
189
190	if (val)
191		set_bit(irq, reg);
192	else
193		clear_bit(irq, reg);
194}
195
196static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
197{
198	return x->private + cpuid;
199}
200
201unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
202{
203	return x->shared;
204}
205
206static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
207{
208	int size;
209
210	size  = nr_cpus * VGIC_NR_PRIVATE_IRQS;
211	size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
212
213	x->private = kzalloc(size, GFP_KERNEL);
214	if (!x->private)
215		return -ENOMEM;
216
217	x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
218	return 0;
219}
220
221static void vgic_free_bytemap(struct vgic_bytemap *b)
222{
223	kfree(b->private);
224	b->private = NULL;
225	b->shared = NULL;
226}
227
228u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
229{
230	u32 *reg;
231
232	if (offset < VGIC_NR_PRIVATE_IRQS) {
233		reg = x->private;
234		offset += cpuid * VGIC_NR_PRIVATE_IRQS;
235	} else {
236		reg = x->shared;
237		offset -= VGIC_NR_PRIVATE_IRQS;
238	}
239
240	return reg + (offset / sizeof(u32));
241}
242
243#define VGIC_CFG_LEVEL	0
244#define VGIC_CFG_EDGE	1
245
246static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq)
247{
248	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
249	int irq_val;
250
251	irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq);
252	return irq_val == VGIC_CFG_EDGE;
253}
254
255static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
256{
257	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
258
259	return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
260}
261
262static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
263{
264	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
265
266	return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
267}
268
269static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
270{
271	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
272
273	return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
274}
275
276static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
277{
278	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
279
280	vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
281}
282
283static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
284{
285	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
286
287	vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
288}
289
290static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
291{
292	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
293
294	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
295}
296
297static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
298{
299	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
300
301	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
302}
303
304static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
305{
306	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
307
308	return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
309}
310
311static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
312{
313	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
314
315	vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
316}
317
318static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
319{
320	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
321
322	vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
323}
324
325static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
326{
327	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
328
329	return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
330}
331
332static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
333{
334	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
335
336	vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
337}
338
339static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
340{
341	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
342
343	return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
344}
345
346void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
347{
348	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
349
350	vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
351}
352
353void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
354{
355	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
356
357	vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
358}
359
360static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
361{
362	if (irq < VGIC_NR_PRIVATE_IRQS)
363		set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
364	else
365		set_bit(irq - VGIC_NR_PRIVATE_IRQS,
366			vcpu->arch.vgic_cpu.pending_shared);
367}
368
369void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
370{
371	if (irq < VGIC_NR_PRIVATE_IRQS)
372		clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
373	else
374		clear_bit(irq - VGIC_NR_PRIVATE_IRQS,
375			  vcpu->arch.vgic_cpu.pending_shared);
376}
377
378static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
379{
380	return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
381}
382
383/**
384 * vgic_reg_access - access vgic register
385 * @mmio:   pointer to the data describing the mmio access
386 * @reg:    pointer to the virtual backing of vgic distributor data
387 * @offset: least significant 2 bits used for word offset
388 * @mode:   ACCESS_ mode (see defines above)
389 *
390 * Helper to make vgic register access easier using one of the access
391 * modes defined for vgic register access
392 * (read,raz,write-ignored,setbit,clearbit,write)
393 */
394void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
395		     phys_addr_t offset, int mode)
396{
397	int word_offset = (offset & 3) * 8;
398	u32 mask = (1UL << (mmio->len * 8)) - 1;
399	u32 regval;
400
401	/*
402	 * Any alignment fault should have been delivered to the guest
403	 * directly (ARM ARM B3.12.7 "Prioritization of aborts").
404	 */
405
406	if (reg) {
407		regval = *reg;
408	} else {
409		BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED));
410		regval = 0;
411	}
412
413	if (mmio->is_write) {
414		u32 data = mmio_data_read(mmio, mask) << word_offset;
415		switch (ACCESS_WRITE_MASK(mode)) {
416		case ACCESS_WRITE_IGNORED:
417			return;
418
419		case ACCESS_WRITE_SETBIT:
420			regval |= data;
421			break;
422
423		case ACCESS_WRITE_CLEARBIT:
424			regval &= ~data;
425			break;
426
427		case ACCESS_WRITE_VALUE:
428			regval = (regval & ~(mask << word_offset)) | data;
429			break;
430		}
431		*reg = regval;
432	} else {
433		switch (ACCESS_READ_MASK(mode)) {
434		case ACCESS_READ_RAZ:
435			regval = 0;
436			/* fall through */
437
438		case ACCESS_READ_VALUE:
439			mmio_data_write(mmio, mask, regval >> word_offset);
440		}
441	}
442}
443
444bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
445			phys_addr_t offset)
446{
447	vgic_reg_access(mmio, NULL, offset,
448			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
449	return false;
450}
451
452bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
453			    phys_addr_t offset, int vcpu_id, int access)
454{
455	u32 *reg;
456	int mode = ACCESS_READ_VALUE | access;
457	struct kvm_vcpu *target_vcpu = kvm_get_vcpu(kvm, vcpu_id);
458
459	reg = vgic_bitmap_get_reg(&kvm->arch.vgic.irq_enabled, vcpu_id, offset);
460	vgic_reg_access(mmio, reg, offset, mode);
461	if (mmio->is_write) {
462		if (access & ACCESS_WRITE_CLEARBIT) {
463			if (offset < 4) /* Force SGI enabled */
464				*reg |= 0xffff;
465			vgic_retire_disabled_irqs(target_vcpu);
466		}
467		vgic_update_state(kvm);
468		return true;
469	}
470
471	return false;
472}
473
474bool vgic_handle_set_pending_reg(struct kvm *kvm,
475				 struct kvm_exit_mmio *mmio,
476				 phys_addr_t offset, int vcpu_id)
477{
478	u32 *reg, orig;
479	u32 level_mask;
480	int mode = ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT;
481	struct vgic_dist *dist = &kvm->arch.vgic;
482
483	reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu_id, offset);
484	level_mask = (~(*reg));
485
486	/* Mark both level and edge triggered irqs as pending */
487	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
488	orig = *reg;
489	vgic_reg_access(mmio, reg, offset, mode);
490
491	if (mmio->is_write) {
492		/* Set the soft-pending flag only for level-triggered irqs */
493		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
494					  vcpu_id, offset);
495		vgic_reg_access(mmio, reg, offset, mode);
496		*reg &= level_mask;
497
498		/* Ignore writes to SGIs */
499		if (offset < 2) {
500			*reg &= ~0xffff;
501			*reg |= orig & 0xffff;
502		}
503
504		vgic_update_state(kvm);
505		return true;
506	}
507
508	return false;
509}
510
511bool vgic_handle_clear_pending_reg(struct kvm *kvm,
512				   struct kvm_exit_mmio *mmio,
513				   phys_addr_t offset, int vcpu_id)
514{
515	u32 *level_active;
516	u32 *reg, orig;
517	int mode = ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT;
518	struct vgic_dist *dist = &kvm->arch.vgic;
519
520	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
521	orig = *reg;
522	vgic_reg_access(mmio, reg, offset, mode);
523	if (mmio->is_write) {
524		/* Re-set level triggered level-active interrupts */
525		level_active = vgic_bitmap_get_reg(&dist->irq_level,
526					  vcpu_id, offset);
527		reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
528		*reg |= *level_active;
529
530		/* Ignore writes to SGIs */
531		if (offset < 2) {
532			*reg &= ~0xffff;
533			*reg |= orig & 0xffff;
534		}
535
536		/* Clear soft-pending flags */
537		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
538					  vcpu_id, offset);
539		vgic_reg_access(mmio, reg, offset, mode);
540
541		vgic_update_state(kvm);
542		return true;
543	}
544	return false;
545}
546
547bool vgic_handle_set_active_reg(struct kvm *kvm,
548				struct kvm_exit_mmio *mmio,
549				phys_addr_t offset, int vcpu_id)
550{
551	u32 *reg;
552	struct vgic_dist *dist = &kvm->arch.vgic;
553
554	reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
555	vgic_reg_access(mmio, reg, offset,
556			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
557
558	if (mmio->is_write) {
559		vgic_update_state(kvm);
560		return true;
561	}
562
563	return false;
564}
565
566bool vgic_handle_clear_active_reg(struct kvm *kvm,
567				  struct kvm_exit_mmio *mmio,
568				  phys_addr_t offset, int vcpu_id)
569{
570	u32 *reg;
571	struct vgic_dist *dist = &kvm->arch.vgic;
572
573	reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
574	vgic_reg_access(mmio, reg, offset,
575			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
576
577	if (mmio->is_write) {
578		vgic_update_state(kvm);
579		return true;
580	}
581
582	return false;
583}
584
585static u32 vgic_cfg_expand(u16 val)
586{
587	u32 res = 0;
588	int i;
589
590	/*
591	 * Turn a 16bit value like abcd...mnop into a 32bit word
592	 * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is.
593	 */
594	for (i = 0; i < 16; i++)
595		res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1);
596
597	return res;
598}
599
600static u16 vgic_cfg_compress(u32 val)
601{
602	u16 res = 0;
603	int i;
604
605	/*
606	 * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like
607	 * abcd...mnop which is what we really care about.
608	 */
609	for (i = 0; i < 16; i++)
610		res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i;
611
612	return res;
613}
614
615/*
616 * The distributor uses 2 bits per IRQ for the CFG register, but the
617 * LSB is always 0. As such, we only keep the upper bit, and use the
618 * two above functions to compress/expand the bits
619 */
620bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
621			 phys_addr_t offset)
622{
623	u32 val;
624
625	if (offset & 4)
626		val = *reg >> 16;
627	else
628		val = *reg & 0xffff;
629
630	val = vgic_cfg_expand(val);
631	vgic_reg_access(mmio, &val, offset,
632			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
633	if (mmio->is_write) {
634		if (offset < 8) {
635			*reg = ~0U; /* Force PPIs/SGIs to 1 */
636			return false;
637		}
638
639		val = vgic_cfg_compress(val);
640		if (offset & 4) {
641			*reg &= 0xffff;
642			*reg |= val << 16;
643		} else {
644			*reg &= 0xffff << 16;
645			*reg |= val;
646		}
647	}
648
649	return false;
650}
651
652/**
653 * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor
654 * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
655 *
656 * Move any IRQs that have already been assigned to LRs back to the
657 * emulated distributor state so that the complete emulated state can be read
658 * from the main emulation structures without investigating the LRs.
659 */
660void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
661{
662	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
663	int i;
664
665	for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
666		struct vgic_lr lr = vgic_get_lr(vcpu, i);
667
668		/*
669		 * There are three options for the state bits:
670		 *
671		 * 01: pending
672		 * 10: active
673		 * 11: pending and active
674		 */
675		BUG_ON(!(lr.state & LR_STATE_MASK));
676
677		/* Reestablish SGI source for pending and active IRQs */
678		if (lr.irq < VGIC_NR_SGIS)
679			add_sgi_source(vcpu, lr.irq, lr.source);
680
681		/*
682		 * If the LR holds an active (10) or a pending and active (11)
683		 * interrupt then move the active state to the
684		 * distributor tracking bit.
685		 */
686		if (lr.state & LR_STATE_ACTIVE) {
687			vgic_irq_set_active(vcpu, lr.irq);
688			lr.state &= ~LR_STATE_ACTIVE;
689		}
690
691		/*
692		 * Reestablish the pending state on the distributor and the
693		 * CPU interface.  It may have already been pending, but that
694		 * is fine, then we are only setting a few bits that were
695		 * already set.
696		 */
697		if (lr.state & LR_STATE_PENDING) {
698			vgic_dist_irq_set_pending(vcpu, lr.irq);
699			lr.state &= ~LR_STATE_PENDING;
700		}
701
702		vgic_set_lr(vcpu, i, lr);
703
704		/*
705		 * Mark the LR as free for other use.
706		 */
707		BUG_ON(lr.state & LR_STATE_MASK);
708		vgic_retire_lr(i, lr.irq, vcpu);
709		vgic_irq_clear_queued(vcpu, lr.irq);
710
711		/* Finally update the VGIC state. */
712		vgic_update_state(vcpu->kvm);
713	}
714}
715
716const
717struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
718				      int len, gpa_t offset)
719{
720	while (ranges->len) {
721		if (offset >= ranges->base &&
722		    (offset + len) <= (ranges->base + ranges->len))
723			return ranges;
724		ranges++;
725	}
726
727	return NULL;
728}
729
730static bool vgic_validate_access(const struct vgic_dist *dist,
731				 const struct vgic_io_range *range,
732				 unsigned long offset)
733{
734	int irq;
735
736	if (!range->bits_per_irq)
737		return true;	/* Not an irq-based access */
738
739	irq = offset * 8 / range->bits_per_irq;
740	if (irq >= dist->nr_irqs)
741		return false;
742
743	return true;
744}
745
746/*
747 * Call the respective handler function for the given range.
748 * We split up any 64 bit accesses into two consecutive 32 bit
749 * handler calls and merge the result afterwards.
750 * We do this in a little endian fashion regardless of the host's
751 * or guest's endianness, because the GIC is always LE and the rest of
752 * the code (vgic_reg_access) also puts it in a LE fashion already.
753 * At this point we have already identified the handle function, so
754 * range points to that one entry and offset is relative to this.
755 */
756static bool call_range_handler(struct kvm_vcpu *vcpu,
757			       struct kvm_exit_mmio *mmio,
758			       unsigned long offset,
759			       const struct vgic_io_range *range)
760{
761	struct kvm_exit_mmio mmio32;
762	bool ret;
763
764	if (likely(mmio->len <= 4))
765		return range->handle_mmio(vcpu, mmio, offset);
766
767	/*
768	 * Any access bigger than 4 bytes (that we currently handle in KVM)
769	 * is actually 8 bytes long, caused by a 64-bit access
770	 */
771
772	mmio32.len = 4;
773	mmio32.is_write = mmio->is_write;
774	mmio32.private = mmio->private;
775
776	mmio32.phys_addr = mmio->phys_addr + 4;
777	mmio32.data = &((u32 *)mmio->data)[1];
778	ret = range->handle_mmio(vcpu, &mmio32, offset + 4);
779
780	mmio32.phys_addr = mmio->phys_addr;
781	mmio32.data = &((u32 *)mmio->data)[0];
782	ret |= range->handle_mmio(vcpu, &mmio32, offset);
783
784	return ret;
785}
786
787/**
788 * vgic_handle_mmio_access - handle an in-kernel MMIO access
789 * This is called by the read/write KVM IO device wrappers below.
790 * @vcpu:	pointer to the vcpu performing the access
791 * @this:	pointer to the KVM IO device in charge
792 * @addr:	guest physical address of the access
793 * @len:	size of the access
794 * @val:	pointer to the data region
795 * @is_write:	read or write access
796 *
797 * returns true if the MMIO access could be performed
798 */
799static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
800				   struct kvm_io_device *this, gpa_t addr,
801				   int len, void *val, bool is_write)
802{
803	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
804	struct vgic_io_device *iodev = container_of(this,
805						    struct vgic_io_device, dev);
806	struct kvm_run *run = vcpu->run;
807	const struct vgic_io_range *range;
808	struct kvm_exit_mmio mmio;
809	bool updated_state;
810	gpa_t offset;
811
812	offset = addr - iodev->addr;
813	range = vgic_find_range(iodev->reg_ranges, len, offset);
814	if (unlikely(!range || !range->handle_mmio)) {
815		pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len);
816		return -ENXIO;
817	}
818
819	mmio.phys_addr = addr;
820	mmio.len = len;
821	mmio.is_write = is_write;
822	mmio.data = val;
823	mmio.private = iodev->redist_vcpu;
824
825	spin_lock(&dist->lock);
826	offset -= range->base;
827	if (vgic_validate_access(dist, range, offset)) {
828		updated_state = call_range_handler(vcpu, &mmio, offset, range);
829	} else {
830		if (!is_write)
831			memset(val, 0, len);
832		updated_state = false;
833	}
834	spin_unlock(&dist->lock);
835	run->mmio.is_write	= is_write;
836	run->mmio.len		= len;
837	run->mmio.phys_addr	= addr;
838	memcpy(run->mmio.data, val, len);
839
840	kvm_handle_mmio_return(vcpu, run);
841
842	if (updated_state)
843		vgic_kick_vcpus(vcpu->kvm);
844
845	return 0;
846}
847
848static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu,
849				 struct kvm_io_device *this,
850				 gpa_t addr, int len, void *val)
851{
852	return vgic_handle_mmio_access(vcpu, this, addr, len, val, false);
853}
854
855static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu,
856				  struct kvm_io_device *this,
857				  gpa_t addr, int len, const void *val)
858{
859	return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val,
860				       true);
861}
862
863struct kvm_io_device_ops vgic_io_ops = {
864	.read	= vgic_handle_mmio_read,
865	.write	= vgic_handle_mmio_write,
866};
867
868/**
869 * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus
870 * @kvm:            The VM structure pointer
871 * @base:           The (guest) base address for the register frame
872 * @len:            Length of the register frame window
873 * @ranges:         Describing the handler functions for each register
874 * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call
875 * @iodev:          Points to memory to be passed on to the handler
876 *
877 * @iodev stores the parameters of this function to be usable by the handler
878 * respectively the dispatcher function (since the KVM I/O bus framework lacks
879 * an opaque parameter). Initialization is done in this function, but the
880 * reference should be valid and unique for the whole VGIC lifetime.
881 * If the register frame is not mapped for a specific VCPU, pass -1 to
882 * @redist_vcpu_id.
883 */
884int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
885			     const struct vgic_io_range *ranges,
886			     int redist_vcpu_id,
887			     struct vgic_io_device *iodev)
888{
889	struct kvm_vcpu *vcpu = NULL;
890	int ret;
891
892	if (redist_vcpu_id >= 0)
893		vcpu = kvm_get_vcpu(kvm, redist_vcpu_id);
894
895	iodev->addr		= base;
896	iodev->len		= len;
897	iodev->reg_ranges	= ranges;
898	iodev->redist_vcpu	= vcpu;
899
900	kvm_iodevice_init(&iodev->dev, &vgic_io_ops);
901
902	mutex_lock(&kvm->slots_lock);
903
904	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len,
905				      &iodev->dev);
906	mutex_unlock(&kvm->slots_lock);
907
908	/* Mark the iodev as invalid if registration fails. */
909	if (ret)
910		iodev->dev.ops = NULL;
911
912	return ret;
913}
914
915static int vgic_nr_shared_irqs(struct vgic_dist *dist)
916{
917	return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
918}
919
920static int compute_active_for_cpu(struct kvm_vcpu *vcpu)
921{
922	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
923	unsigned long *active, *enabled, *act_percpu, *act_shared;
924	unsigned long active_private, active_shared;
925	int nr_shared = vgic_nr_shared_irqs(dist);
926	int vcpu_id;
927
928	vcpu_id = vcpu->vcpu_id;
929	act_percpu = vcpu->arch.vgic_cpu.active_percpu;
930	act_shared = vcpu->arch.vgic_cpu.active_shared;
931
932	active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id);
933	enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
934	bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS);
935
936	active = vgic_bitmap_get_shared_map(&dist->irq_active);
937	enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
938	bitmap_and(act_shared, active, enabled, nr_shared);
939	bitmap_and(act_shared, act_shared,
940		   vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
941		   nr_shared);
942
943	active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS);
944	active_shared = find_first_bit(act_shared, nr_shared);
945
946	return (active_private < VGIC_NR_PRIVATE_IRQS ||
947		active_shared < nr_shared);
948}
949
950static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
951{
952	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
953	unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
954	unsigned long pending_private, pending_shared;
955	int nr_shared = vgic_nr_shared_irqs(dist);
956	int vcpu_id;
957
958	vcpu_id = vcpu->vcpu_id;
959	pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
960	pend_shared = vcpu->arch.vgic_cpu.pending_shared;
961
962	pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
963	enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
964	bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
965
966	pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
967	enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
968	bitmap_and(pend_shared, pending, enabled, nr_shared);
969	bitmap_and(pend_shared, pend_shared,
970		   vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
971		   nr_shared);
972
973	pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
974	pending_shared = find_first_bit(pend_shared, nr_shared);
975	return (pending_private < VGIC_NR_PRIVATE_IRQS ||
976		pending_shared < vgic_nr_shared_irqs(dist));
977}
978
979/*
980 * Update the interrupt state and determine which CPUs have pending
981 * or active interrupts. Must be called with distributor lock held.
982 */
983void vgic_update_state(struct kvm *kvm)
984{
985	struct vgic_dist *dist = &kvm->arch.vgic;
986	struct kvm_vcpu *vcpu;
987	int c;
988
989	if (!dist->enabled) {
990		set_bit(0, dist->irq_pending_on_cpu);
991		return;
992	}
993
994	kvm_for_each_vcpu(c, vcpu, kvm) {
995		if (compute_pending_for_cpu(vcpu))
996			set_bit(c, dist->irq_pending_on_cpu);
997
998		if (compute_active_for_cpu(vcpu))
999			set_bit(c, dist->irq_active_on_cpu);
1000		else
1001			clear_bit(c, dist->irq_active_on_cpu);
1002	}
1003}
1004
1005static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
1006{
1007	return vgic_ops->get_lr(vcpu, lr);
1008}
1009
1010static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
1011			       struct vgic_lr vlr)
1012{
1013	vgic_ops->set_lr(vcpu, lr, vlr);
1014}
1015
1016static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
1017			       struct vgic_lr vlr)
1018{
1019	vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
1020}
1021
1022static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
1023{
1024	return vgic_ops->get_elrsr(vcpu);
1025}
1026
1027static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
1028{
1029	return vgic_ops->get_eisr(vcpu);
1030}
1031
1032static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu)
1033{
1034	vgic_ops->clear_eisr(vcpu);
1035}
1036
1037static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
1038{
1039	return vgic_ops->get_interrupt_status(vcpu);
1040}
1041
1042static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu)
1043{
1044	vgic_ops->enable_underflow(vcpu);
1045}
1046
1047static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
1048{
1049	vgic_ops->disable_underflow(vcpu);
1050}
1051
1052void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
1053{
1054	vgic_ops->get_vmcr(vcpu, vmcr);
1055}
1056
1057void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
1058{
1059	vgic_ops->set_vmcr(vcpu, vmcr);
1060}
1061
1062static inline void vgic_enable(struct kvm_vcpu *vcpu)
1063{
1064	vgic_ops->enable(vcpu);
1065}
1066
1067static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
1068{
1069	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1070	struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
1071
1072	vlr.state = 0;
1073	vgic_set_lr(vcpu, lr_nr, vlr);
1074	clear_bit(lr_nr, vgic_cpu->lr_used);
1075	vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
1076	vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
1077}
1078
1079/*
1080 * An interrupt may have been disabled after being made pending on the
1081 * CPU interface (the classic case is a timer running while we're
1082 * rebooting the guest - the interrupt would kick as soon as the CPU
1083 * interface gets enabled, with deadly consequences).
1084 *
1085 * The solution is to examine already active LRs, and check the
1086 * interrupt is still enabled. If not, just retire it.
1087 */
1088static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
1089{
1090	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1091	int lr;
1092
1093	for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
1094		struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1095
1096		if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
1097			vgic_retire_lr(lr, vlr.irq, vcpu);
1098			if (vgic_irq_is_queued(vcpu, vlr.irq))
1099				vgic_irq_clear_queued(vcpu, vlr.irq);
1100		}
1101	}
1102}
1103
1104static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
1105				 int lr_nr, struct vgic_lr vlr)
1106{
1107	if (vgic_irq_is_active(vcpu, irq)) {
1108		vlr.state |= LR_STATE_ACTIVE;
1109		kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
1110		vgic_irq_clear_active(vcpu, irq);
1111		vgic_update_state(vcpu->kvm);
1112	} else if (vgic_dist_irq_is_pending(vcpu, irq)) {
1113		vlr.state |= LR_STATE_PENDING;
1114		kvm_debug("Set pending: 0x%x\n", vlr.state);
1115	}
1116
1117	if (!vgic_irq_is_edge(vcpu, irq))
1118		vlr.state |= LR_EOI_INT;
1119
1120	vgic_set_lr(vcpu, lr_nr, vlr);
1121	vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
1122}
1123
1124/*
1125 * Queue an interrupt to a CPU virtual interface. Return true on success,
1126 * or false if it wasn't possible to queue it.
1127 * sgi_source must be zero for any non-SGI interrupts.
1128 */
1129bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
1130{
1131	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1132	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1133	struct vgic_lr vlr;
1134	int lr;
1135
1136	/* Sanitize the input... */
1137	BUG_ON(sgi_source_id & ~7);
1138	BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
1139	BUG_ON(irq >= dist->nr_irqs);
1140
1141	kvm_debug("Queue IRQ%d\n", irq);
1142
1143	lr = vgic_cpu->vgic_irq_lr_map[irq];
1144
1145	/* Do we have an active interrupt for the same CPUID? */
1146	if (lr != LR_EMPTY) {
1147		vlr = vgic_get_lr(vcpu, lr);
1148		if (vlr.source == sgi_source_id) {
1149			kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
1150			BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
1151			vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
1152			return true;
1153		}
1154	}
1155
1156	/* Try to use another LR for this interrupt */
1157	lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
1158			       vgic->nr_lr);
1159	if (lr >= vgic->nr_lr)
1160		return false;
1161
1162	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
1163	vgic_cpu->vgic_irq_lr_map[irq] = lr;
1164	set_bit(lr, vgic_cpu->lr_used);
1165
1166	vlr.irq = irq;
1167	vlr.source = sgi_source_id;
1168	vlr.state = 0;
1169	vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
1170
1171	return true;
1172}
1173
1174static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
1175{
1176	if (!vgic_can_sample_irq(vcpu, irq))
1177		return true; /* level interrupt, already queued */
1178
1179	if (vgic_queue_irq(vcpu, 0, irq)) {
1180		if (vgic_irq_is_edge(vcpu, irq)) {
1181			vgic_dist_irq_clear_pending(vcpu, irq);
1182			vgic_cpu_irq_clear(vcpu, irq);
1183		} else {
1184			vgic_irq_set_queued(vcpu, irq);
1185		}
1186
1187		return true;
1188	}
1189
1190	return false;
1191}
1192
1193/*
1194 * Fill the list registers with pending interrupts before running the
1195 * guest.
1196 */
1197static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1198{
1199	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1200	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1201	unsigned long *pa_percpu, *pa_shared;
1202	int i, vcpu_id;
1203	int overflow = 0;
1204	int nr_shared = vgic_nr_shared_irqs(dist);
1205
1206	vcpu_id = vcpu->vcpu_id;
1207
1208	pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu;
1209	pa_shared = vcpu->arch.vgic_cpu.pend_act_shared;
1210
1211	bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu,
1212		  VGIC_NR_PRIVATE_IRQS);
1213	bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared,
1214		  nr_shared);
1215	/*
1216	 * We may not have any pending interrupt, or the interrupts
1217	 * may have been serviced from another vcpu. In all cases,
1218	 * move along.
1219	 */
1220	if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu))
1221		goto epilog;
1222
1223	/* SGIs */
1224	for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) {
1225		if (!queue_sgi(vcpu, i))
1226			overflow = 1;
1227	}
1228
1229	/* PPIs */
1230	for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) {
1231		if (!vgic_queue_hwirq(vcpu, i))
1232			overflow = 1;
1233	}
1234
1235	/* SPIs */
1236	for_each_set_bit(i, pa_shared, nr_shared) {
1237		if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
1238			overflow = 1;
1239	}
1240
1241
1242
1243
1244epilog:
1245	if (overflow) {
1246		vgic_enable_underflow(vcpu);
1247	} else {
1248		vgic_disable_underflow(vcpu);
1249		/*
1250		 * We're about to run this VCPU, and we've consumed
1251		 * everything the distributor had in store for
1252		 * us. Claim we don't have anything pending. We'll
1253		 * adjust that if needed while exiting.
1254		 */
1255		clear_bit(vcpu_id, dist->irq_pending_on_cpu);
1256	}
1257}
1258
1259static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1260{
1261	u32 status = vgic_get_interrupt_status(vcpu);
1262	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1263	bool level_pending = false;
1264	struct kvm *kvm = vcpu->kvm;
1265
1266	kvm_debug("STATUS = %08x\n", status);
1267
1268	if (status & INT_STATUS_EOI) {
1269		/*
1270		 * Some level interrupts have been EOIed. Clear their
1271		 * active bit.
1272		 */
1273		u64 eisr = vgic_get_eisr(vcpu);
1274		unsigned long *eisr_ptr = u64_to_bitmask(&eisr);
1275		int lr;
1276
1277		for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
1278			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1279			WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
1280
1281			spin_lock(&dist->lock);
1282			vgic_irq_clear_queued(vcpu, vlr.irq);
1283			WARN_ON(vlr.state & LR_STATE_MASK);
1284			vlr.state = 0;
1285			vgic_set_lr(vcpu, lr, vlr);
1286
1287			/*
1288			 * If the IRQ was EOIed it was also ACKed and we we
1289			 * therefore assume we can clear the soft pending
1290			 * state (should it had been set) for this interrupt.
1291			 *
1292			 * Note: if the IRQ soft pending state was set after
1293			 * the IRQ was acked, it actually shouldn't be
1294			 * cleared, but we have no way of knowing that unless
1295			 * we start trapping ACKs when the soft-pending state
1296			 * is set.
1297			 */
1298			vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
1299
1300			/*
1301			 * kvm_notify_acked_irq calls kvm_set_irq()
1302			 * to reset the IRQ level. Need to release the
1303			 * lock for kvm_set_irq to grab it.
1304			 */
1305			spin_unlock(&dist->lock);
1306
1307			kvm_notify_acked_irq(kvm, 0,
1308					     vlr.irq - VGIC_NR_PRIVATE_IRQS);
1309			spin_lock(&dist->lock);
1310
1311			/* Any additional pending interrupt? */
1312			if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
1313				vgic_cpu_irq_set(vcpu, vlr.irq);
1314				level_pending = true;
1315			} else {
1316				vgic_dist_irq_clear_pending(vcpu, vlr.irq);
1317				vgic_cpu_irq_clear(vcpu, vlr.irq);
1318			}
1319
1320			spin_unlock(&dist->lock);
1321
1322			/*
1323			 * Despite being EOIed, the LR may not have
1324			 * been marked as empty.
1325			 */
1326			vgic_sync_lr_elrsr(vcpu, lr, vlr);
1327		}
1328	}
1329
1330	if (status & INT_STATUS_UNDERFLOW)
1331		vgic_disable_underflow(vcpu);
1332
1333	/*
1334	 * In the next iterations of the vcpu loop, if we sync the vgic state
1335	 * after flushing it, but before entering the guest (this happens for
1336	 * pending signals and vmid rollovers), then make sure we don't pick
1337	 * up any old maintenance interrupts here.
1338	 */
1339	vgic_clear_eisr(vcpu);
1340
1341	return level_pending;
1342}
1343
1344/* Sync back the VGIC state after a guest run */
1345static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1346{
1347	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1348	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1349	u64 elrsr;
1350	unsigned long *elrsr_ptr;
1351	int lr, pending;
1352	bool level_pending;
1353
1354	level_pending = vgic_process_maintenance(vcpu);
1355	elrsr = vgic_get_elrsr(vcpu);
1356	elrsr_ptr = u64_to_bitmask(&elrsr);
1357
1358	/* Clear mappings for empty LRs */
1359	for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) {
1360		struct vgic_lr vlr;
1361
1362		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
1363			continue;
1364
1365		vlr = vgic_get_lr(vcpu, lr);
1366
1367		BUG_ON(vlr.irq >= dist->nr_irqs);
1368		vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
1369	}
1370
1371	/* Check if we still have something up our sleeve... */
1372	pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
1373	if (level_pending || pending < vgic->nr_lr)
1374		set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1375}
1376
1377void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1378{
1379	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1380
1381	if (!irqchip_in_kernel(vcpu->kvm))
1382		return;
1383
1384	spin_lock(&dist->lock);
1385	__kvm_vgic_flush_hwstate(vcpu);
1386	spin_unlock(&dist->lock);
1387}
1388
1389void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1390{
1391	if (!irqchip_in_kernel(vcpu->kvm))
1392		return;
1393
1394	__kvm_vgic_sync_hwstate(vcpu);
1395}
1396
1397int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
1398{
1399	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1400
1401	if (!irqchip_in_kernel(vcpu->kvm))
1402		return 0;
1403
1404	return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1405}
1406
1407int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu)
1408{
1409	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1410
1411	if (!irqchip_in_kernel(vcpu->kvm))
1412		return 0;
1413
1414	return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
1415}
1416
1417
1418void vgic_kick_vcpus(struct kvm *kvm)
1419{
1420	struct kvm_vcpu *vcpu;
1421	int c;
1422
1423	/*
1424	 * We've injected an interrupt, time to find out who deserves
1425	 * a good kick...
1426	 */
1427	kvm_for_each_vcpu(c, vcpu, kvm) {
1428		if (kvm_vgic_vcpu_pending_irq(vcpu))
1429			kvm_vcpu_kick(vcpu);
1430	}
1431}
1432
1433static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
1434{
1435	int edge_triggered = vgic_irq_is_edge(vcpu, irq);
1436
1437	/*
1438	 * Only inject an interrupt if:
1439	 * - edge triggered and we have a rising edge
1440	 * - level triggered and we change level
1441	 */
1442	if (edge_triggered) {
1443		int state = vgic_dist_irq_is_pending(vcpu, irq);
1444		return level > state;
1445	} else {
1446		int state = vgic_dist_irq_get_level(vcpu, irq);
1447		return level != state;
1448	}
1449}
1450
1451static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1452				  unsigned int irq_num, bool level)
1453{
1454	struct vgic_dist *dist = &kvm->arch.vgic;
1455	struct kvm_vcpu *vcpu;
1456	int edge_triggered, level_triggered;
1457	int enabled;
1458	bool ret = true, can_inject = true;
1459
1460	spin_lock(&dist->lock);
1461
1462	vcpu = kvm_get_vcpu(kvm, cpuid);
1463	edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
1464	level_triggered = !edge_triggered;
1465
1466	if (!vgic_validate_injection(vcpu, irq_num, level)) {
1467		ret = false;
1468		goto out;
1469	}
1470
1471	if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
1472		cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
1473		if (cpuid == VCPU_NOT_ALLOCATED) {
1474			/* Pretend we use CPU0, and prevent injection */
1475			cpuid = 0;
1476			can_inject = false;
1477		}
1478		vcpu = kvm_get_vcpu(kvm, cpuid);
1479	}
1480
1481	kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
1482
1483	if (level) {
1484		if (level_triggered)
1485			vgic_dist_irq_set_level(vcpu, irq_num);
1486		vgic_dist_irq_set_pending(vcpu, irq_num);
1487	} else {
1488		if (level_triggered) {
1489			vgic_dist_irq_clear_level(vcpu, irq_num);
1490			if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
1491				vgic_dist_irq_clear_pending(vcpu, irq_num);
1492		}
1493
1494		ret = false;
1495		goto out;
1496	}
1497
1498	enabled = vgic_irq_is_enabled(vcpu, irq_num);
1499
1500	if (!enabled || !can_inject) {
1501		ret = false;
1502		goto out;
1503	}
1504
1505	if (!vgic_can_sample_irq(vcpu, irq_num)) {
1506		/*
1507		 * Level interrupt in progress, will be picked up
1508		 * when EOId.
1509		 */
1510		ret = false;
1511		goto out;
1512	}
1513
1514	if (level) {
1515		vgic_cpu_irq_set(vcpu, irq_num);
1516		set_bit(cpuid, dist->irq_pending_on_cpu);
1517	}
1518
1519out:
1520	spin_unlock(&dist->lock);
1521
1522	return ret ? cpuid : -EINVAL;
1523}
1524
1525/**
1526 * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
1527 * @kvm:     The VM structure pointer
1528 * @cpuid:   The CPU for PPIs
1529 * @irq_num: The IRQ number that is assigned to the device
1530 * @level:   Edge-triggered:  true:  to trigger the interrupt
1531 *			      false: to ignore the call
1532 *	     Level-sensitive  true:  activates an interrupt
1533 *			      false: deactivates an interrupt
1534 *
1535 * The GIC is not concerned with devices being active-LOW or active-HIGH for
1536 * level-sensitive interrupts.  You can think of the level parameter as 1
1537 * being HIGH and 0 being LOW and all devices being active-HIGH.
1538 */
1539int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
1540			bool level)
1541{
1542	int ret = 0;
1543	int vcpu_id;
1544
1545	if (unlikely(!vgic_initialized(kvm))) {
1546		/*
1547		 * We only provide the automatic initialization of the VGIC
1548		 * for the legacy case of a GICv2. Any other type must
1549		 * be explicitly initialized once setup with the respective
1550		 * KVM device call.
1551		 */
1552		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) {
1553			ret = -EBUSY;
1554			goto out;
1555		}
1556		mutex_lock(&kvm->lock);
1557		ret = vgic_init(kvm);
1558		mutex_unlock(&kvm->lock);
1559
1560		if (ret)
1561			goto out;
1562	}
1563
1564	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
1565		return -EINVAL;
1566
1567	vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
1568	if (vcpu_id >= 0) {
1569		/* kick the specified vcpu */
1570		kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
1571	}
1572
1573out:
1574	return ret;
1575}
1576
1577static irqreturn_t vgic_maintenance_handler(int irq, void *data)
1578{
1579	/*
1580	 * We cannot rely on the vgic maintenance interrupt to be
1581	 * delivered synchronously. This means we can only use it to
1582	 * exit the VM, and we perform the handling of EOIed
1583	 * interrupts on the exit path (see vgic_process_maintenance).
1584	 */
1585	return IRQ_HANDLED;
1586}
1587
1588void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
1589{
1590	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1591
1592	kfree(vgic_cpu->pending_shared);
1593	kfree(vgic_cpu->active_shared);
1594	kfree(vgic_cpu->pend_act_shared);
1595	kfree(vgic_cpu->vgic_irq_lr_map);
1596	vgic_cpu->pending_shared = NULL;
1597	vgic_cpu->active_shared = NULL;
1598	vgic_cpu->pend_act_shared = NULL;
1599	vgic_cpu->vgic_irq_lr_map = NULL;
1600}
1601
1602static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
1603{
1604	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1605	int nr_longs = BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
1606	int sz = nr_longs * sizeof(unsigned long);
1607	vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
1608	vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
1609	vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
1610	vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
1611
1612	if (!vgic_cpu->pending_shared
1613		|| !vgic_cpu->active_shared
1614		|| !vgic_cpu->pend_act_shared
1615		|| !vgic_cpu->vgic_irq_lr_map) {
1616		kvm_vgic_vcpu_destroy(vcpu);
1617		return -ENOMEM;
1618	}
1619
1620	memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
1621
1622	/*
1623	 * Store the number of LRs per vcpu, so we don't have to go
1624	 * all the way to the distributor structure to find out. Only
1625	 * assembly code should use this one.
1626	 */
1627	vgic_cpu->nr_lr = vgic->nr_lr;
1628
1629	return 0;
1630}
1631
1632/**
1633 * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
1634 *
1635 * The host's GIC naturally limits the maximum amount of VCPUs a guest
1636 * can use.
1637 */
1638int kvm_vgic_get_max_vcpus(void)
1639{
1640	return vgic->max_gic_vcpus;
1641}
1642
1643void kvm_vgic_destroy(struct kvm *kvm)
1644{
1645	struct vgic_dist *dist = &kvm->arch.vgic;
1646	struct kvm_vcpu *vcpu;
1647	int i;
1648
1649	kvm_for_each_vcpu(i, vcpu, kvm)
1650		kvm_vgic_vcpu_destroy(vcpu);
1651
1652	vgic_free_bitmap(&dist->irq_enabled);
1653	vgic_free_bitmap(&dist->irq_level);
1654	vgic_free_bitmap(&dist->irq_pending);
1655	vgic_free_bitmap(&dist->irq_soft_pend);
1656	vgic_free_bitmap(&dist->irq_queued);
1657	vgic_free_bitmap(&dist->irq_cfg);
1658	vgic_free_bytemap(&dist->irq_priority);
1659	if (dist->irq_spi_target) {
1660		for (i = 0; i < dist->nr_cpus; i++)
1661			vgic_free_bitmap(&dist->irq_spi_target[i]);
1662	}
1663	kfree(dist->irq_sgi_sources);
1664	kfree(dist->irq_spi_cpu);
1665	kfree(dist->irq_spi_mpidr);
1666	kfree(dist->irq_spi_target);
1667	kfree(dist->irq_pending_on_cpu);
1668	kfree(dist->irq_active_on_cpu);
1669	dist->irq_sgi_sources = NULL;
1670	dist->irq_spi_cpu = NULL;
1671	dist->irq_spi_target = NULL;
1672	dist->irq_pending_on_cpu = NULL;
1673	dist->irq_active_on_cpu = NULL;
1674	dist->nr_cpus = 0;
1675}
1676
1677/*
1678 * Allocate and initialize the various data structures. Must be called
1679 * with kvm->lock held!
1680 */
1681int vgic_init(struct kvm *kvm)
1682{
1683	struct vgic_dist *dist = &kvm->arch.vgic;
1684	struct kvm_vcpu *vcpu;
1685	int nr_cpus, nr_irqs;
1686	int ret, i, vcpu_id;
1687
1688	if (vgic_initialized(kvm))
1689		return 0;
1690
1691	nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
1692	if (!nr_cpus)		/* No vcpus? Can't be good... */
1693		return -ENODEV;
1694
1695	/*
1696	 * If nobody configured the number of interrupts, use the
1697	 * legacy one.
1698	 */
1699	if (!dist->nr_irqs)
1700		dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
1701
1702	nr_irqs = dist->nr_irqs;
1703
1704	ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
1705	ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
1706	ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
1707	ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
1708	ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
1709	ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs);
1710	ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
1711	ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
1712
1713	if (ret)
1714		goto out;
1715
1716	dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL);
1717	dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL);
1718	dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus,
1719				       GFP_KERNEL);
1720	dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
1721					   GFP_KERNEL);
1722	dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
1723					   GFP_KERNEL);
1724	if (!dist->irq_sgi_sources ||
1725	    !dist->irq_spi_cpu ||
1726	    !dist->irq_spi_target ||
1727	    !dist->irq_pending_on_cpu ||
1728	    !dist->irq_active_on_cpu) {
1729		ret = -ENOMEM;
1730		goto out;
1731	}
1732
1733	for (i = 0; i < nr_cpus; i++)
1734		ret |= vgic_init_bitmap(&dist->irq_spi_target[i],
1735					nr_cpus, nr_irqs);
1736
1737	if (ret)
1738		goto out;
1739
1740	ret = kvm->arch.vgic.vm_ops.init_model(kvm);
1741	if (ret)
1742		goto out;
1743
1744	kvm_for_each_vcpu(vcpu_id, vcpu, kvm) {
1745		ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
1746		if (ret) {
1747			kvm_err("VGIC: Failed to allocate vcpu memory\n");
1748			break;
1749		}
1750
1751		for (i = 0; i < dist->nr_irqs; i++) {
1752			if (i < VGIC_NR_PPIS)
1753				vgic_bitmap_set_irq_val(&dist->irq_enabled,
1754							vcpu->vcpu_id, i, 1);
1755			if (i < VGIC_NR_PRIVATE_IRQS)
1756				vgic_bitmap_set_irq_val(&dist->irq_cfg,
1757							vcpu->vcpu_id, i,
1758							VGIC_CFG_EDGE);
1759		}
1760
1761		vgic_enable(vcpu);
1762	}
1763
1764out:
1765	if (ret)
1766		kvm_vgic_destroy(kvm);
1767
1768	return ret;
1769}
1770
1771static int init_vgic_model(struct kvm *kvm, int type)
1772{
1773	switch (type) {
1774	case KVM_DEV_TYPE_ARM_VGIC_V2:
1775		vgic_v2_init_emulation(kvm);
1776		break;
1777#ifdef CONFIG_ARM_GIC_V3
1778	case KVM_DEV_TYPE_ARM_VGIC_V3:
1779		vgic_v3_init_emulation(kvm);
1780		break;
1781#endif
1782	default:
1783		return -ENODEV;
1784	}
1785
1786	if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus)
1787		return -E2BIG;
1788
1789	return 0;
1790}
1791
1792int kvm_vgic_create(struct kvm *kvm, u32 type)
1793{
1794	int i, vcpu_lock_idx = -1, ret;
1795	struct kvm_vcpu *vcpu;
1796
1797	mutex_lock(&kvm->lock);
1798
1799	if (irqchip_in_kernel(kvm)) {
1800		ret = -EEXIST;
1801		goto out;
1802	}
1803
1804	/*
1805	 * This function is also called by the KVM_CREATE_IRQCHIP handler,
1806	 * which had no chance yet to check the availability of the GICv2
1807	 * emulation. So check this here again. KVM_CREATE_DEVICE does
1808	 * the proper checks already.
1809	 */
1810	if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) {
1811		ret = -ENODEV;
1812		goto out;
1813	}
1814
1815	/*
1816	 * Any time a vcpu is run, vcpu_load is called which tries to grab the
1817	 * vcpu->mutex.  By grabbing the vcpu->mutex of all VCPUs we ensure
1818	 * that no other VCPUs are run while we create the vgic.
1819	 */
1820	ret = -EBUSY;
1821	kvm_for_each_vcpu(i, vcpu, kvm) {
1822		if (!mutex_trylock(&vcpu->mutex))
1823			goto out_unlock;
1824		vcpu_lock_idx = i;
1825	}
1826
1827	kvm_for_each_vcpu(i, vcpu, kvm) {
1828		if (vcpu->arch.has_run_once)
1829			goto out_unlock;
1830	}
1831	ret = 0;
1832
1833	ret = init_vgic_model(kvm, type);
1834	if (ret)
1835		goto out_unlock;
1836
1837	spin_lock_init(&kvm->arch.vgic.lock);
1838	kvm->arch.vgic.in_kernel = true;
1839	kvm->arch.vgic.vgic_model = type;
1840	kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
1841	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
1842	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
1843	kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
1844
1845out_unlock:
1846	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
1847		vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
1848		mutex_unlock(&vcpu->mutex);
1849	}
1850
1851out:
1852	mutex_unlock(&kvm->lock);
1853	return ret;
1854}
1855
1856static int vgic_ioaddr_overlap(struct kvm *kvm)
1857{
1858	phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
1859	phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
1860
1861	if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
1862		return 0;
1863	if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) ||
1864	    (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist))
1865		return -EBUSY;
1866	return 0;
1867}
1868
1869static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
1870			      phys_addr_t addr, phys_addr_t size)
1871{
1872	int ret;
1873
1874	if (addr & ~KVM_PHYS_MASK)
1875		return -E2BIG;
1876
1877	if (addr & (SZ_4K - 1))
1878		return -EINVAL;
1879
1880	if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
1881		return -EEXIST;
1882	if (addr + size < addr)
1883		return -EINVAL;
1884
1885	*ioaddr = addr;
1886	ret = vgic_ioaddr_overlap(kvm);
1887	if (ret)
1888		*ioaddr = VGIC_ADDR_UNDEF;
1889
1890	return ret;
1891}
1892
1893/**
1894 * kvm_vgic_addr - set or get vgic VM base addresses
1895 * @kvm:   pointer to the vm struct
1896 * @type:  the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX
1897 * @addr:  pointer to address value
1898 * @write: if true set the address in the VM address space, if false read the
1899 *          address
1900 *
1901 * Set or get the vgic base addresses for the distributor and the virtual CPU
1902 * interface in the VM physical address space.  These addresses are properties
1903 * of the emulated core/SoC and therefore user space initially knows this
1904 * information.
1905 */
1906int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
1907{
1908	int r = 0;
1909	struct vgic_dist *vgic = &kvm->arch.vgic;
1910	int type_needed;
1911	phys_addr_t *addr_ptr, block_size;
1912	phys_addr_t alignment;
1913
1914	mutex_lock(&kvm->lock);
1915	switch (type) {
1916	case KVM_VGIC_V2_ADDR_TYPE_DIST:
1917		type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
1918		addr_ptr = &vgic->vgic_dist_base;
1919		block_size = KVM_VGIC_V2_DIST_SIZE;
1920		alignment = SZ_4K;
1921		break;
1922	case KVM_VGIC_V2_ADDR_TYPE_CPU:
1923		type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
1924		addr_ptr = &vgic->vgic_cpu_base;
1925		block_size = KVM_VGIC_V2_CPU_SIZE;
1926		alignment = SZ_4K;
1927		break;
1928#ifdef CONFIG_ARM_GIC_V3
1929	case KVM_VGIC_V3_ADDR_TYPE_DIST:
1930		type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
1931		addr_ptr = &vgic->vgic_dist_base;
1932		block_size = KVM_VGIC_V3_DIST_SIZE;
1933		alignment = SZ_64K;
1934		break;
1935	case KVM_VGIC_V3_ADDR_TYPE_REDIST:
1936		type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
1937		addr_ptr = &vgic->vgic_redist_base;
1938		block_size = KVM_VGIC_V3_REDIST_SIZE;
1939		alignment = SZ_64K;
1940		break;
1941#endif
1942	default:
1943		r = -ENODEV;
1944		goto out;
1945	}
1946
1947	if (vgic->vgic_model != type_needed) {
1948		r = -ENODEV;
1949		goto out;
1950	}
1951
1952	if (write) {
1953		if (!IS_ALIGNED(*addr, alignment))
1954			r = -EINVAL;
1955		else
1956			r = vgic_ioaddr_assign(kvm, addr_ptr, *addr,
1957					       block_size);
1958	} else {
1959		*addr = *addr_ptr;
1960	}
1961
1962out:
1963	mutex_unlock(&kvm->lock);
1964	return r;
1965}
1966
1967int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1968{
1969	int r;
1970
1971	switch (attr->group) {
1972	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
1973		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
1974		u64 addr;
1975		unsigned long type = (unsigned long)attr->attr;
1976
1977		if (copy_from_user(&addr, uaddr, sizeof(addr)))
1978			return -EFAULT;
1979
1980		r = kvm_vgic_addr(dev->kvm, type, &addr, true);
1981		return (r == -ENODEV) ? -ENXIO : r;
1982	}
1983	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
1984		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
1985		u32 val;
1986		int ret = 0;
1987
1988		if (get_user(val, uaddr))
1989			return -EFAULT;
1990
1991		/*
1992		 * We require:
1993		 * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
1994		 * - at most 1024 interrupts
1995		 * - a multiple of 32 interrupts
1996		 */
1997		if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
1998		    val > VGIC_MAX_IRQS ||
1999		    (val & 31))
2000			return -EINVAL;
2001
2002		mutex_lock(&dev->kvm->lock);
2003
2004		if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
2005			ret = -EBUSY;
2006		else
2007			dev->kvm->arch.vgic.nr_irqs = val;
2008
2009		mutex_unlock(&dev->kvm->lock);
2010
2011		return ret;
2012	}
2013	case KVM_DEV_ARM_VGIC_GRP_CTRL: {
2014		switch (attr->attr) {
2015		case KVM_DEV_ARM_VGIC_CTRL_INIT:
2016			r = vgic_init(dev->kvm);
2017			return r;
2018		}
2019		break;
2020	}
2021	}
2022
2023	return -ENXIO;
2024}
2025
2026int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2027{
2028	int r = -ENXIO;
2029
2030	switch (attr->group) {
2031	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
2032		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
2033		u64 addr;
2034		unsigned long type = (unsigned long)attr->attr;
2035
2036		r = kvm_vgic_addr(dev->kvm, type, &addr, false);
2037		if (r)
2038			return (r == -ENODEV) ? -ENXIO : r;
2039
2040		if (copy_to_user(uaddr, &addr, sizeof(addr)))
2041			return -EFAULT;
2042		break;
2043	}
2044	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
2045		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2046
2047		r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
2048		break;
2049	}
2050
2051	}
2052
2053	return r;
2054}
2055
2056int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset)
2057{
2058	if (vgic_find_range(ranges, 4, offset))
2059		return 0;
2060	else
2061		return -ENXIO;
2062}
2063
2064static void vgic_init_maintenance_interrupt(void *info)
2065{
2066	enable_percpu_irq(vgic->maint_irq, 0);
2067}
2068
2069static int vgic_cpu_notify(struct notifier_block *self,
2070			   unsigned long action, void *cpu)
2071{
2072	switch (action) {
2073	case CPU_STARTING:
2074	case CPU_STARTING_FROZEN:
2075		vgic_init_maintenance_interrupt(NULL);
2076		break;
2077	case CPU_DYING:
2078	case CPU_DYING_FROZEN:
2079		disable_percpu_irq(vgic->maint_irq);
2080		break;
2081	}
2082
2083	return NOTIFY_OK;
2084}
2085
2086static struct notifier_block vgic_cpu_nb = {
2087	.notifier_call = vgic_cpu_notify,
2088};
2089
2090static const struct of_device_id vgic_ids[] = {
2091	{ .compatible = "arm,cortex-a15-gic",	.data = vgic_v2_probe, },
2092	{ .compatible = "arm,cortex-a7-gic",	.data = vgic_v2_probe, },
2093	{ .compatible = "arm,gic-400",		.data = vgic_v2_probe, },
2094	{ .compatible = "arm,gic-v3",		.data = vgic_v3_probe, },
2095	{},
2096};
2097
2098int kvm_vgic_hyp_init(void)
2099{
2100	const struct of_device_id *matched_id;
2101	const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
2102				const struct vgic_params **);
2103	struct device_node *vgic_node;
2104	int ret;
2105
2106	vgic_node = of_find_matching_node_and_match(NULL,
2107						    vgic_ids, &matched_id);
2108	if (!vgic_node) {
2109		kvm_err("error: no compatible GIC node found\n");
2110		return -ENODEV;
2111	}
2112
2113	vgic_probe = matched_id->data;
2114	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
2115	if (ret)
2116		return ret;
2117
2118	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
2119				 "vgic", kvm_get_running_vcpus());
2120	if (ret) {
2121		kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
2122		return ret;
2123	}
2124
2125	ret = __register_cpu_notifier(&vgic_cpu_nb);
2126	if (ret) {
2127		kvm_err("Cannot register vgic CPU notifier\n");
2128		goto out_free_irq;
2129	}
2130
2131	/* Callback into for arch code for setup */
2132	vgic_arch_setup(vgic);
2133
2134	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
2135
2136	return 0;
2137
2138out_free_irq:
2139	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
2140	return ret;
2141}
2142
2143int kvm_irq_map_gsi(struct kvm *kvm,
2144		    struct kvm_kernel_irq_routing_entry *entries,
2145		    int gsi)
2146{
2147	return 0;
2148}
2149
2150int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
2151{
2152	return pin;
2153}
2154
2155int kvm_set_irq(struct kvm *kvm, int irq_source_id,
2156		u32 irq, int level, bool line_status)
2157{
2158	unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS;
2159
2160	trace_kvm_set_irq(irq, level, irq_source_id);
2161
2162	BUG_ON(!vgic_initialized(kvm));
2163
2164	return kvm_vgic_inject_irq(kvm, 0, spi, level);
2165}
2166
2167/* MSI not implemented yet */
2168int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
2169		struct kvm *kvm, int irq_source_id,
2170		int level, bool line_status)
2171{
2172	return 0;
2173}
2174