1/*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 *    Author(s): Carsten Otte <cotte@de.ibm.com>
11 *               Christian Borntraeger <borntraeger@de.ibm.com>
12 *               Heiko Carstens <heiko.carstens@de.ibm.com>
13 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14 *               Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17#include <linux/compiler.h>
18#include <linux/err.h>
19#include <linux/fs.h>
20#include <linux/hrtimer.h>
21#include <linux/init.h>
22#include <linux/kvm.h>
23#include <linux/kvm_host.h>
24#include <linux/module.h>
25#include <linux/random.h>
26#include <linux/slab.h>
27#include <linux/timer.h>
28#include <linux/vmalloc.h>
29#include <asm/asm-offsets.h>
30#include <asm/lowcore.h>
31#include <asm/etr.h>
32#include <asm/pgtable.h>
33#include <asm/nmi.h>
34#include <asm/switch_to.h>
35#include <asm/isc.h>
36#include <asm/sclp.h>
37#include "kvm-s390.h"
38#include "gaccess.h"
39
40#define KMSG_COMPONENT "kvm-s390"
41#undef pr_fmt
42#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
43
44#define CREATE_TRACE_POINTS
45#include "trace.h"
46#include "trace-s390.h"
47
48#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
49#define LOCAL_IRQS 32
50#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51			   (KVM_MAX_VCPUS + LOCAL_IRQS))
52
53#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
54
55struct kvm_stats_debugfs_item debugfs_entries[] = {
56	{ "userspace_handled", VCPU_STAT(exit_userspace) },
57	{ "exit_null", VCPU_STAT(exit_null) },
58	{ "exit_validity", VCPU_STAT(exit_validity) },
59	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
60	{ "exit_external_request", VCPU_STAT(exit_external_request) },
61	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62	{ "exit_instruction", VCPU_STAT(exit_instruction) },
63	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
68	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
70	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
71	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
72	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
74	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
81	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
83	{ "instruction_spx", VCPU_STAT(instruction_spx) },
84	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
85	{ "instruction_stap", VCPU_STAT(instruction_stap) },
86	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
89	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
90	{ "instruction_essa", VCPU_STAT(instruction_essa) },
91	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
92	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
93	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
94	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110	{ "diagnose_10", VCPU_STAT(diagnose_10) },
111	{ "diagnose_44", VCPU_STAT(diagnose_44) },
112	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
113	{ "diagnose_258", VCPU_STAT(diagnose_258) },
114	{ "diagnose_308", VCPU_STAT(diagnose_308) },
115	{ "diagnose_500", VCPU_STAT(diagnose_500) },
116	{ NULL }
117};
118
119/* upper facilities limit for kvm */
120unsigned long kvm_s390_fac_list_mask[] = {
121	0xffe6fffbfcfdfc40UL,
122	0x005e800000000000UL,
123};
124
125unsigned long kvm_s390_fac_list_mask_size(void)
126{
127	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128	return ARRAY_SIZE(kvm_s390_fac_list_mask);
129}
130
131static struct gmap_notifier gmap_notifier;
132debug_info_t *kvm_s390_dbf;
133
134/* Section: not file related */
135int kvm_arch_hardware_enable(void)
136{
137	/* every s390 is virtualization enabled ;-) */
138	return 0;
139}
140
141static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
142
143/*
144 * This callback is executed during stop_machine(). All CPUs are therefore
145 * temporarily stopped. In order not to change guest behavior, we have to
146 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147 * so a CPU won't be stopped while calculating with the epoch.
148 */
149static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
150			  void *v)
151{
152	struct kvm *kvm;
153	struct kvm_vcpu *vcpu;
154	int i;
155	unsigned long long *delta = v;
156
157	list_for_each_entry(kvm, &vm_list, vm_list) {
158		kvm->arch.epoch -= *delta;
159		kvm_for_each_vcpu(i, vcpu, kvm) {
160			vcpu->arch.sie_block->epoch -= *delta;
161		}
162	}
163	return NOTIFY_OK;
164}
165
166static struct notifier_block kvm_clock_notifier = {
167	.notifier_call = kvm_clock_sync,
168};
169
170int kvm_arch_hardware_setup(void)
171{
172	gmap_notifier.notifier_call = kvm_gmap_notifier;
173	gmap_register_ipte_notifier(&gmap_notifier);
174	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175				       &kvm_clock_notifier);
176	return 0;
177}
178
179void kvm_arch_hardware_unsetup(void)
180{
181	gmap_unregister_ipte_notifier(&gmap_notifier);
182	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183					 &kvm_clock_notifier);
184}
185
186int kvm_arch_init(void *opaque)
187{
188	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
189	if (!kvm_s390_dbf)
190		return -ENOMEM;
191
192	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193		debug_unregister(kvm_s390_dbf);
194		return -ENOMEM;
195	}
196
197	/* Register floating interrupt controller interface. */
198	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
199}
200
201void kvm_arch_exit(void)
202{
203	debug_unregister(kvm_s390_dbf);
204}
205
206/* Section: device related */
207long kvm_arch_dev_ioctl(struct file *filp,
208			unsigned int ioctl, unsigned long arg)
209{
210	if (ioctl == KVM_S390_ENABLE_SIE)
211		return s390_enable_sie();
212	return -EINVAL;
213}
214
215int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
216{
217	int r;
218
219	switch (ext) {
220	case KVM_CAP_S390_PSW:
221	case KVM_CAP_S390_GMAP:
222	case KVM_CAP_SYNC_MMU:
223#ifdef CONFIG_KVM_S390_UCONTROL
224	case KVM_CAP_S390_UCONTROL:
225#endif
226	case KVM_CAP_ASYNC_PF:
227	case KVM_CAP_SYNC_REGS:
228	case KVM_CAP_ONE_REG:
229	case KVM_CAP_ENABLE_CAP:
230	case KVM_CAP_S390_CSS_SUPPORT:
231	case KVM_CAP_IOEVENTFD:
232	case KVM_CAP_DEVICE_CTRL:
233	case KVM_CAP_ENABLE_CAP_VM:
234	case KVM_CAP_S390_IRQCHIP:
235	case KVM_CAP_VM_ATTRIBUTES:
236	case KVM_CAP_MP_STATE:
237	case KVM_CAP_S390_INJECT_IRQ:
238	case KVM_CAP_S390_USER_SIGP:
239	case KVM_CAP_S390_USER_STSI:
240	case KVM_CAP_S390_SKEYS:
241	case KVM_CAP_S390_IRQ_STATE:
242		r = 1;
243		break;
244	case KVM_CAP_S390_MEM_OP:
245		r = MEM_OP_MAX_SIZE;
246		break;
247	case KVM_CAP_NR_VCPUS:
248	case KVM_CAP_MAX_VCPUS:
249		r = KVM_MAX_VCPUS;
250		break;
251	case KVM_CAP_NR_MEMSLOTS:
252		r = KVM_USER_MEM_SLOTS;
253		break;
254	case KVM_CAP_S390_COW:
255		r = MACHINE_HAS_ESOP;
256		break;
257	case KVM_CAP_S390_VECTOR_REGISTERS:
258		r = MACHINE_HAS_VX;
259		break;
260	default:
261		r = 0;
262	}
263	return r;
264}
265
266static void kvm_s390_sync_dirty_log(struct kvm *kvm,
267					struct kvm_memory_slot *memslot)
268{
269	gfn_t cur_gfn, last_gfn;
270	unsigned long address;
271	struct gmap *gmap = kvm->arch.gmap;
272
273	down_read(&gmap->mm->mmap_sem);
274	/* Loop over all guest pages */
275	last_gfn = memslot->base_gfn + memslot->npages;
276	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
277		address = gfn_to_hva_memslot(memslot, cur_gfn);
278
279		if (gmap_test_and_clear_dirty(address, gmap))
280			mark_page_dirty(kvm, cur_gfn);
281	}
282	up_read(&gmap->mm->mmap_sem);
283}
284
285/* Section: vm related */
286/*
287 * Get (and clear) the dirty memory log for a memory slot.
288 */
289int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
290			       struct kvm_dirty_log *log)
291{
292	int r;
293	unsigned long n;
294	struct kvm_memslots *slots;
295	struct kvm_memory_slot *memslot;
296	int is_dirty = 0;
297
298	mutex_lock(&kvm->slots_lock);
299
300	r = -EINVAL;
301	if (log->slot >= KVM_USER_MEM_SLOTS)
302		goto out;
303
304	slots = kvm_memslots(kvm);
305	memslot = id_to_memslot(slots, log->slot);
306	r = -ENOENT;
307	if (!memslot->dirty_bitmap)
308		goto out;
309
310	kvm_s390_sync_dirty_log(kvm, memslot);
311	r = kvm_get_dirty_log(kvm, log, &is_dirty);
312	if (r)
313		goto out;
314
315	/* Clear the dirty log */
316	if (is_dirty) {
317		n = kvm_dirty_bitmap_bytes(memslot);
318		memset(memslot->dirty_bitmap, 0, n);
319	}
320	r = 0;
321out:
322	mutex_unlock(&kvm->slots_lock);
323	return r;
324}
325
326static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
327{
328	int r;
329
330	if (cap->flags)
331		return -EINVAL;
332
333	switch (cap->cap) {
334	case KVM_CAP_S390_IRQCHIP:
335		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
336		kvm->arch.use_irqchip = 1;
337		r = 0;
338		break;
339	case KVM_CAP_S390_USER_SIGP:
340		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
341		kvm->arch.user_sigp = 1;
342		r = 0;
343		break;
344	case KVM_CAP_S390_VECTOR_REGISTERS:
345		mutex_lock(&kvm->lock);
346		if (atomic_read(&kvm->online_vcpus)) {
347			r = -EBUSY;
348		} else if (MACHINE_HAS_VX) {
349			set_kvm_facility(kvm->arch.model.fac->mask, 129);
350			set_kvm_facility(kvm->arch.model.fac->list, 129);
351			r = 0;
352		} else
353			r = -EINVAL;
354		mutex_unlock(&kvm->lock);
355		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
356			 r ? "(not available)" : "(success)");
357		break;
358	case KVM_CAP_S390_USER_STSI:
359		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
360		kvm->arch.user_stsi = 1;
361		r = 0;
362		break;
363	default:
364		r = -EINVAL;
365		break;
366	}
367	return r;
368}
369
370static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
371{
372	int ret;
373
374	switch (attr->attr) {
375	case KVM_S390_VM_MEM_LIMIT_SIZE:
376		ret = 0;
377		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
378			 kvm->arch.gmap->asce_end);
379		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
380			ret = -EFAULT;
381		break;
382	default:
383		ret = -ENXIO;
384		break;
385	}
386	return ret;
387}
388
389static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
390{
391	int ret;
392	unsigned int idx;
393	switch (attr->attr) {
394	case KVM_S390_VM_MEM_ENABLE_CMMA:
395		/* enable CMMA only for z10 and later (EDAT_1) */
396		ret = -EINVAL;
397		if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
398			break;
399
400		ret = -EBUSY;
401		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
402		mutex_lock(&kvm->lock);
403		if (atomic_read(&kvm->online_vcpus) == 0) {
404			kvm->arch.use_cmma = 1;
405			ret = 0;
406		}
407		mutex_unlock(&kvm->lock);
408		break;
409	case KVM_S390_VM_MEM_CLR_CMMA:
410		ret = -EINVAL;
411		if (!kvm->arch.use_cmma)
412			break;
413
414		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
415		mutex_lock(&kvm->lock);
416		idx = srcu_read_lock(&kvm->srcu);
417		s390_reset_cmma(kvm->arch.gmap->mm);
418		srcu_read_unlock(&kvm->srcu, idx);
419		mutex_unlock(&kvm->lock);
420		ret = 0;
421		break;
422	case KVM_S390_VM_MEM_LIMIT_SIZE: {
423		unsigned long new_limit;
424
425		if (kvm_is_ucontrol(kvm))
426			return -EINVAL;
427
428		if (get_user(new_limit, (u64 __user *)attr->addr))
429			return -EFAULT;
430
431		if (new_limit > kvm->arch.gmap->asce_end)
432			return -E2BIG;
433
434		ret = -EBUSY;
435		mutex_lock(&kvm->lock);
436		if (atomic_read(&kvm->online_vcpus) == 0) {
437			/* gmap_alloc will round the limit up */
438			struct gmap *new = gmap_alloc(current->mm, new_limit);
439
440			if (!new) {
441				ret = -ENOMEM;
442			} else {
443				gmap_free(kvm->arch.gmap);
444				new->private = kvm;
445				kvm->arch.gmap = new;
446				ret = 0;
447			}
448		}
449		mutex_unlock(&kvm->lock);
450		VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
451		break;
452	}
453	default:
454		ret = -ENXIO;
455		break;
456	}
457	return ret;
458}
459
460static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
461
462static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
463{
464	struct kvm_vcpu *vcpu;
465	int i;
466
467	if (!test_kvm_facility(kvm, 76))
468		return -EINVAL;
469
470	mutex_lock(&kvm->lock);
471	switch (attr->attr) {
472	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
473		get_random_bytes(
474			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
475			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
476		kvm->arch.crypto.aes_kw = 1;
477		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
478		break;
479	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
480		get_random_bytes(
481			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
482			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
483		kvm->arch.crypto.dea_kw = 1;
484		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
485		break;
486	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
487		kvm->arch.crypto.aes_kw = 0;
488		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
489			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
490		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
491		break;
492	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
493		kvm->arch.crypto.dea_kw = 0;
494		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
495			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
496		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
497		break;
498	default:
499		mutex_unlock(&kvm->lock);
500		return -ENXIO;
501	}
502
503	kvm_for_each_vcpu(i, vcpu, kvm) {
504		kvm_s390_vcpu_crypto_setup(vcpu);
505		exit_sie(vcpu);
506	}
507	mutex_unlock(&kvm->lock);
508	return 0;
509}
510
511static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
512{
513	u8 gtod_high;
514
515	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
516					   sizeof(gtod_high)))
517		return -EFAULT;
518
519	if (gtod_high != 0)
520		return -EINVAL;
521	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
522
523	return 0;
524}
525
526static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
527{
528	u64 gtod;
529
530	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
531		return -EFAULT;
532
533	kvm_s390_set_tod_clock(kvm, gtod);
534	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
535	return 0;
536}
537
538static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
539{
540	int ret;
541
542	if (attr->flags)
543		return -EINVAL;
544
545	switch (attr->attr) {
546	case KVM_S390_VM_TOD_HIGH:
547		ret = kvm_s390_set_tod_high(kvm, attr);
548		break;
549	case KVM_S390_VM_TOD_LOW:
550		ret = kvm_s390_set_tod_low(kvm, attr);
551		break;
552	default:
553		ret = -ENXIO;
554		break;
555	}
556	return ret;
557}
558
559static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
560{
561	u8 gtod_high = 0;
562
563	if (copy_to_user((void __user *)attr->addr, &gtod_high,
564					 sizeof(gtod_high)))
565		return -EFAULT;
566	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
567
568	return 0;
569}
570
571static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
572{
573	u64 gtod;
574
575	gtod = kvm_s390_get_tod_clock_fast(kvm);
576	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
577		return -EFAULT;
578	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
579
580	return 0;
581}
582
583static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
584{
585	int ret;
586
587	if (attr->flags)
588		return -EINVAL;
589
590	switch (attr->attr) {
591	case KVM_S390_VM_TOD_HIGH:
592		ret = kvm_s390_get_tod_high(kvm, attr);
593		break;
594	case KVM_S390_VM_TOD_LOW:
595		ret = kvm_s390_get_tod_low(kvm, attr);
596		break;
597	default:
598		ret = -ENXIO;
599		break;
600	}
601	return ret;
602}
603
604static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
605{
606	struct kvm_s390_vm_cpu_processor *proc;
607	int ret = 0;
608
609	mutex_lock(&kvm->lock);
610	if (atomic_read(&kvm->online_vcpus)) {
611		ret = -EBUSY;
612		goto out;
613	}
614	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
615	if (!proc) {
616		ret = -ENOMEM;
617		goto out;
618	}
619	if (!copy_from_user(proc, (void __user *)attr->addr,
620			    sizeof(*proc))) {
621		memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
622		       sizeof(struct cpuid));
623		kvm->arch.model.ibc = proc->ibc;
624		memcpy(kvm->arch.model.fac->list, proc->fac_list,
625		       S390_ARCH_FAC_LIST_SIZE_BYTE);
626	} else
627		ret = -EFAULT;
628	kfree(proc);
629out:
630	mutex_unlock(&kvm->lock);
631	return ret;
632}
633
634static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
635{
636	int ret = -ENXIO;
637
638	switch (attr->attr) {
639	case KVM_S390_VM_CPU_PROCESSOR:
640		ret = kvm_s390_set_processor(kvm, attr);
641		break;
642	}
643	return ret;
644}
645
646static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
647{
648	struct kvm_s390_vm_cpu_processor *proc;
649	int ret = 0;
650
651	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
652	if (!proc) {
653		ret = -ENOMEM;
654		goto out;
655	}
656	memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
657	proc->ibc = kvm->arch.model.ibc;
658	memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
659	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
660		ret = -EFAULT;
661	kfree(proc);
662out:
663	return ret;
664}
665
666static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
667{
668	struct kvm_s390_vm_cpu_machine *mach;
669	int ret = 0;
670
671	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
672	if (!mach) {
673		ret = -ENOMEM;
674		goto out;
675	}
676	get_cpu_id((struct cpuid *) &mach->cpuid);
677	mach->ibc = sclp.ibc;
678	memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
679	       S390_ARCH_FAC_LIST_SIZE_BYTE);
680	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
681	       S390_ARCH_FAC_LIST_SIZE_BYTE);
682	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
683		ret = -EFAULT;
684	kfree(mach);
685out:
686	return ret;
687}
688
689static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
690{
691	int ret = -ENXIO;
692
693	switch (attr->attr) {
694	case KVM_S390_VM_CPU_PROCESSOR:
695		ret = kvm_s390_get_processor(kvm, attr);
696		break;
697	case KVM_S390_VM_CPU_MACHINE:
698		ret = kvm_s390_get_machine(kvm, attr);
699		break;
700	}
701	return ret;
702}
703
704static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
705{
706	int ret;
707
708	switch (attr->group) {
709	case KVM_S390_VM_MEM_CTRL:
710		ret = kvm_s390_set_mem_control(kvm, attr);
711		break;
712	case KVM_S390_VM_TOD:
713		ret = kvm_s390_set_tod(kvm, attr);
714		break;
715	case KVM_S390_VM_CPU_MODEL:
716		ret = kvm_s390_set_cpu_model(kvm, attr);
717		break;
718	case KVM_S390_VM_CRYPTO:
719		ret = kvm_s390_vm_set_crypto(kvm, attr);
720		break;
721	default:
722		ret = -ENXIO;
723		break;
724	}
725
726	return ret;
727}
728
729static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
730{
731	int ret;
732
733	switch (attr->group) {
734	case KVM_S390_VM_MEM_CTRL:
735		ret = kvm_s390_get_mem_control(kvm, attr);
736		break;
737	case KVM_S390_VM_TOD:
738		ret = kvm_s390_get_tod(kvm, attr);
739		break;
740	case KVM_S390_VM_CPU_MODEL:
741		ret = kvm_s390_get_cpu_model(kvm, attr);
742		break;
743	default:
744		ret = -ENXIO;
745		break;
746	}
747
748	return ret;
749}
750
751static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
752{
753	int ret;
754
755	switch (attr->group) {
756	case KVM_S390_VM_MEM_CTRL:
757		switch (attr->attr) {
758		case KVM_S390_VM_MEM_ENABLE_CMMA:
759		case KVM_S390_VM_MEM_CLR_CMMA:
760		case KVM_S390_VM_MEM_LIMIT_SIZE:
761			ret = 0;
762			break;
763		default:
764			ret = -ENXIO;
765			break;
766		}
767		break;
768	case KVM_S390_VM_TOD:
769		switch (attr->attr) {
770		case KVM_S390_VM_TOD_LOW:
771		case KVM_S390_VM_TOD_HIGH:
772			ret = 0;
773			break;
774		default:
775			ret = -ENXIO;
776			break;
777		}
778		break;
779	case KVM_S390_VM_CPU_MODEL:
780		switch (attr->attr) {
781		case KVM_S390_VM_CPU_PROCESSOR:
782		case KVM_S390_VM_CPU_MACHINE:
783			ret = 0;
784			break;
785		default:
786			ret = -ENXIO;
787			break;
788		}
789		break;
790	case KVM_S390_VM_CRYPTO:
791		switch (attr->attr) {
792		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
793		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
794		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
795		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
796			ret = 0;
797			break;
798		default:
799			ret = -ENXIO;
800			break;
801		}
802		break;
803	default:
804		ret = -ENXIO;
805		break;
806	}
807
808	return ret;
809}
810
811static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
812{
813	uint8_t *keys;
814	uint64_t hva;
815	unsigned long curkey;
816	int i, r = 0;
817
818	if (args->flags != 0)
819		return -EINVAL;
820
821	/* Is this guest using storage keys? */
822	if (!mm_use_skey(current->mm))
823		return KVM_S390_GET_SKEYS_NONE;
824
825	/* Enforce sane limit on memory allocation */
826	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
827		return -EINVAL;
828
829	keys = kmalloc_array(args->count, sizeof(uint8_t),
830			     GFP_KERNEL | __GFP_NOWARN);
831	if (!keys)
832		keys = vmalloc(sizeof(uint8_t) * args->count);
833	if (!keys)
834		return -ENOMEM;
835
836	for (i = 0; i < args->count; i++) {
837		hva = gfn_to_hva(kvm, args->start_gfn + i);
838		if (kvm_is_error_hva(hva)) {
839			r = -EFAULT;
840			goto out;
841		}
842
843		curkey = get_guest_storage_key(current->mm, hva);
844		if (IS_ERR_VALUE(curkey)) {
845			r = curkey;
846			goto out;
847		}
848		keys[i] = curkey;
849	}
850
851	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
852			 sizeof(uint8_t) * args->count);
853	if (r)
854		r = -EFAULT;
855out:
856	kvfree(keys);
857	return r;
858}
859
860static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
861{
862	uint8_t *keys;
863	uint64_t hva;
864	int i, r = 0;
865
866	if (args->flags != 0)
867		return -EINVAL;
868
869	/* Enforce sane limit on memory allocation */
870	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
871		return -EINVAL;
872
873	keys = kmalloc_array(args->count, sizeof(uint8_t),
874			     GFP_KERNEL | __GFP_NOWARN);
875	if (!keys)
876		keys = vmalloc(sizeof(uint8_t) * args->count);
877	if (!keys)
878		return -ENOMEM;
879
880	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
881			   sizeof(uint8_t) * args->count);
882	if (r) {
883		r = -EFAULT;
884		goto out;
885	}
886
887	/* Enable storage key handling for the guest */
888	r = s390_enable_skey();
889	if (r)
890		goto out;
891
892	for (i = 0; i < args->count; i++) {
893		hva = gfn_to_hva(kvm, args->start_gfn + i);
894		if (kvm_is_error_hva(hva)) {
895			r = -EFAULT;
896			goto out;
897		}
898
899		/* Lowest order bit is reserved */
900		if (keys[i] & 0x01) {
901			r = -EINVAL;
902			goto out;
903		}
904
905		r = set_guest_storage_key(current->mm, hva,
906					  (unsigned long)keys[i], 0);
907		if (r)
908			goto out;
909	}
910out:
911	kvfree(keys);
912	return r;
913}
914
915long kvm_arch_vm_ioctl(struct file *filp,
916		       unsigned int ioctl, unsigned long arg)
917{
918	struct kvm *kvm = filp->private_data;
919	void __user *argp = (void __user *)arg;
920	struct kvm_device_attr attr;
921	int r;
922
923	switch (ioctl) {
924	case KVM_S390_INTERRUPT: {
925		struct kvm_s390_interrupt s390int;
926
927		r = -EFAULT;
928		if (copy_from_user(&s390int, argp, sizeof(s390int)))
929			break;
930		r = kvm_s390_inject_vm(kvm, &s390int);
931		break;
932	}
933	case KVM_ENABLE_CAP: {
934		struct kvm_enable_cap cap;
935		r = -EFAULT;
936		if (copy_from_user(&cap, argp, sizeof(cap)))
937			break;
938		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
939		break;
940	}
941	case KVM_CREATE_IRQCHIP: {
942		struct kvm_irq_routing_entry routing;
943
944		r = -EINVAL;
945		if (kvm->arch.use_irqchip) {
946			/* Set up dummy routing. */
947			memset(&routing, 0, sizeof(routing));
948			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
949		}
950		break;
951	}
952	case KVM_SET_DEVICE_ATTR: {
953		r = -EFAULT;
954		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
955			break;
956		r = kvm_s390_vm_set_attr(kvm, &attr);
957		break;
958	}
959	case KVM_GET_DEVICE_ATTR: {
960		r = -EFAULT;
961		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
962			break;
963		r = kvm_s390_vm_get_attr(kvm, &attr);
964		break;
965	}
966	case KVM_HAS_DEVICE_ATTR: {
967		r = -EFAULT;
968		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
969			break;
970		r = kvm_s390_vm_has_attr(kvm, &attr);
971		break;
972	}
973	case KVM_S390_GET_SKEYS: {
974		struct kvm_s390_skeys args;
975
976		r = -EFAULT;
977		if (copy_from_user(&args, argp,
978				   sizeof(struct kvm_s390_skeys)))
979			break;
980		r = kvm_s390_get_skeys(kvm, &args);
981		break;
982	}
983	case KVM_S390_SET_SKEYS: {
984		struct kvm_s390_skeys args;
985
986		r = -EFAULT;
987		if (copy_from_user(&args, argp,
988				   sizeof(struct kvm_s390_skeys)))
989			break;
990		r = kvm_s390_set_skeys(kvm, &args);
991		break;
992	}
993	default:
994		r = -ENOTTY;
995	}
996
997	return r;
998}
999
1000static int kvm_s390_query_ap_config(u8 *config)
1001{
1002	u32 fcn_code = 0x04000000UL;
1003	u32 cc = 0;
1004
1005	memset(config, 0, 128);
1006	asm volatile(
1007		"lgr 0,%1\n"
1008		"lgr 2,%2\n"
1009		".long 0xb2af0000\n"		/* PQAP(QCI) */
1010		"0: ipm %0\n"
1011		"srl %0,28\n"
1012		"1:\n"
1013		EX_TABLE(0b, 1b)
1014		: "+r" (cc)
1015		: "r" (fcn_code), "r" (config)
1016		: "cc", "0", "2", "memory"
1017	);
1018
1019	return cc;
1020}
1021
1022static int kvm_s390_apxa_installed(void)
1023{
1024	u8 config[128];
1025	int cc;
1026
1027	if (test_facility(2) && test_facility(12)) {
1028		cc = kvm_s390_query_ap_config(config);
1029
1030		if (cc)
1031			pr_err("PQAP(QCI) failed with cc=%d", cc);
1032		else
1033			return config[0] & 0x40;
1034	}
1035
1036	return 0;
1037}
1038
1039static void kvm_s390_set_crycb_format(struct kvm *kvm)
1040{
1041	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1042
1043	if (kvm_s390_apxa_installed())
1044		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1045	else
1046		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1047}
1048
1049static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1050{
1051	get_cpu_id(cpu_id);
1052	cpu_id->version = 0xff;
1053}
1054
1055static int kvm_s390_crypto_init(struct kvm *kvm)
1056{
1057	if (!test_kvm_facility(kvm, 76))
1058		return 0;
1059
1060	kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1061					 GFP_KERNEL | GFP_DMA);
1062	if (!kvm->arch.crypto.crycb)
1063		return -ENOMEM;
1064
1065	kvm_s390_set_crycb_format(kvm);
1066
1067	/* Enable AES/DEA protected key functions by default */
1068	kvm->arch.crypto.aes_kw = 1;
1069	kvm->arch.crypto.dea_kw = 1;
1070	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1071			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1072	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1073			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1074
1075	return 0;
1076}
1077
1078int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1079{
1080	int i, rc;
1081	char debug_name[16];
1082	static unsigned long sca_offset;
1083
1084	rc = -EINVAL;
1085#ifdef CONFIG_KVM_S390_UCONTROL
1086	if (type & ~KVM_VM_S390_UCONTROL)
1087		goto out_err;
1088	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1089		goto out_err;
1090#else
1091	if (type)
1092		goto out_err;
1093#endif
1094
1095	rc = s390_enable_sie();
1096	if (rc)
1097		goto out_err;
1098
1099	rc = -ENOMEM;
1100
1101	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1102	if (!kvm->arch.sca)
1103		goto out_err;
1104	spin_lock(&kvm_lock);
1105	sca_offset += 16;
1106	if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
1107		sca_offset = 0;
1108	kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1109	spin_unlock(&kvm_lock);
1110
1111	sprintf(debug_name, "kvm-%u", current->pid);
1112
1113	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1114	if (!kvm->arch.dbf)
1115		goto out_err;
1116
1117	/*
1118	 * The architectural maximum amount of facilities is 16 kbit. To store
1119	 * this amount, 2 kbyte of memory is required. Thus we need a full
1120	 * page to hold the guest facility list (arch.model.fac->list) and the
1121	 * facility mask (arch.model.fac->mask). Its address size has to be
1122	 * 31 bits and word aligned.
1123	 */
1124	kvm->arch.model.fac =
1125		(struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1126	if (!kvm->arch.model.fac)
1127		goto out_err;
1128
1129	/* Populate the facility mask initially. */
1130	memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1131	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1132	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1133		if (i < kvm_s390_fac_list_mask_size())
1134			kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1135		else
1136			kvm->arch.model.fac->mask[i] = 0UL;
1137	}
1138
1139	/* Populate the facility list initially. */
1140	memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1141	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1142
1143	kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1144	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1145
1146	if (kvm_s390_crypto_init(kvm) < 0)
1147		goto out_err;
1148
1149	spin_lock_init(&kvm->arch.float_int.lock);
1150	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1151		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1152	init_waitqueue_head(&kvm->arch.ipte_wq);
1153	mutex_init(&kvm->arch.ipte_mutex);
1154
1155	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1156	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1157
1158	if (type & KVM_VM_S390_UCONTROL) {
1159		kvm->arch.gmap = NULL;
1160	} else {
1161		kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1162		if (!kvm->arch.gmap)
1163			goto out_err;
1164		kvm->arch.gmap->private = kvm;
1165		kvm->arch.gmap->pfault_enabled = 0;
1166	}
1167
1168	kvm->arch.css_support = 0;
1169	kvm->arch.use_irqchip = 0;
1170	kvm->arch.epoch = 0;
1171
1172	spin_lock_init(&kvm->arch.start_stop_lock);
1173	KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1174
1175	return 0;
1176out_err:
1177	kfree(kvm->arch.crypto.crycb);
1178	free_page((unsigned long)kvm->arch.model.fac);
1179	debug_unregister(kvm->arch.dbf);
1180	free_page((unsigned long)(kvm->arch.sca));
1181	KVM_EVENT(3, "creation of vm failed: %d", rc);
1182	return rc;
1183}
1184
1185void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1186{
1187	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1188	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1189	kvm_s390_clear_local_irqs(vcpu);
1190	kvm_clear_async_pf_completion_queue(vcpu);
1191	if (!kvm_is_ucontrol(vcpu->kvm)) {
1192		clear_bit(63 - vcpu->vcpu_id,
1193			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1194		if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1195		    (__u64) vcpu->arch.sie_block)
1196			vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1197	}
1198	smp_mb();
1199
1200	if (kvm_is_ucontrol(vcpu->kvm))
1201		gmap_free(vcpu->arch.gmap);
1202
1203	if (vcpu->kvm->arch.use_cmma)
1204		kvm_s390_vcpu_unsetup_cmma(vcpu);
1205	free_page((unsigned long)(vcpu->arch.sie_block));
1206
1207	kvm_vcpu_uninit(vcpu);
1208	kmem_cache_free(kvm_vcpu_cache, vcpu);
1209}
1210
1211static void kvm_free_vcpus(struct kvm *kvm)
1212{
1213	unsigned int i;
1214	struct kvm_vcpu *vcpu;
1215
1216	kvm_for_each_vcpu(i, vcpu, kvm)
1217		kvm_arch_vcpu_destroy(vcpu);
1218
1219	mutex_lock(&kvm->lock);
1220	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1221		kvm->vcpus[i] = NULL;
1222
1223	atomic_set(&kvm->online_vcpus, 0);
1224	mutex_unlock(&kvm->lock);
1225}
1226
1227void kvm_arch_destroy_vm(struct kvm *kvm)
1228{
1229	kvm_free_vcpus(kvm);
1230	free_page((unsigned long)kvm->arch.model.fac);
1231	free_page((unsigned long)(kvm->arch.sca));
1232	debug_unregister(kvm->arch.dbf);
1233	kfree(kvm->arch.crypto.crycb);
1234	if (!kvm_is_ucontrol(kvm))
1235		gmap_free(kvm->arch.gmap);
1236	kvm_s390_destroy_adapters(kvm);
1237	kvm_s390_clear_float_irqs(kvm);
1238	KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1239}
1240
1241/* Section: vcpu related */
1242static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1243{
1244	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1245	if (!vcpu->arch.gmap)
1246		return -ENOMEM;
1247	vcpu->arch.gmap->private = vcpu->kvm;
1248
1249	return 0;
1250}
1251
1252int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1253{
1254	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1255	kvm_clear_async_pf_completion_queue(vcpu);
1256	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1257				    KVM_SYNC_GPRS |
1258				    KVM_SYNC_ACRS |
1259				    KVM_SYNC_CRS |
1260				    KVM_SYNC_ARCH0 |
1261				    KVM_SYNC_PFAULT;
1262	if (test_kvm_facility(vcpu->kvm, 129))
1263		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1264
1265	if (kvm_is_ucontrol(vcpu->kvm))
1266		return __kvm_ucontrol_vcpu_init(vcpu);
1267
1268	return 0;
1269}
1270
1271void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1272{
1273	/* Save host register state */
1274	save_fpu_regs();
1275	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1276	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1277
1278	/* Depending on MACHINE_HAS_VX, data stored to vrs either
1279	 * has vector register or floating point register format.
1280	 */
1281	current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1282	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1283	if (test_fp_ctl(current->thread.fpu.fpc))
1284		/* User space provided an invalid FPC, let's clear it */
1285		current->thread.fpu.fpc = 0;
1286
1287	save_access_regs(vcpu->arch.host_acrs);
1288	restore_access_regs(vcpu->run->s.regs.acrs);
1289	gmap_enable(vcpu->arch.gmap);
1290	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1291}
1292
1293void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1294{
1295	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1296	gmap_disable(vcpu->arch.gmap);
1297
1298	/* Save guest register state */
1299	save_fpu_regs();
1300	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1301
1302	/* Restore host register state */
1303	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1304	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1305
1306	save_access_regs(vcpu->run->s.regs.acrs);
1307	restore_access_regs(vcpu->arch.host_acrs);
1308}
1309
1310static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1311{
1312	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1313	vcpu->arch.sie_block->gpsw.mask = 0UL;
1314	vcpu->arch.sie_block->gpsw.addr = 0UL;
1315	kvm_s390_set_prefix(vcpu, 0);
1316	vcpu->arch.sie_block->cputm     = 0UL;
1317	vcpu->arch.sie_block->ckc       = 0UL;
1318	vcpu->arch.sie_block->todpr     = 0;
1319	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1320	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1321	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1322	/* make sure the new fpc will be lazily loaded */
1323	save_fpu_regs();
1324	current->thread.fpu.fpc = 0;
1325	vcpu->arch.sie_block->gbea = 1;
1326	vcpu->arch.sie_block->pp = 0;
1327	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1328	kvm_clear_async_pf_completion_queue(vcpu);
1329	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1330		kvm_s390_vcpu_stop(vcpu);
1331	kvm_s390_clear_local_irqs(vcpu);
1332}
1333
1334void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1335{
1336	mutex_lock(&vcpu->kvm->lock);
1337	preempt_disable();
1338	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1339	preempt_enable();
1340	mutex_unlock(&vcpu->kvm->lock);
1341	if (!kvm_is_ucontrol(vcpu->kvm))
1342		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1343}
1344
1345static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1346{
1347	if (!test_kvm_facility(vcpu->kvm, 76))
1348		return;
1349
1350	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1351
1352	if (vcpu->kvm->arch.crypto.aes_kw)
1353		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1354	if (vcpu->kvm->arch.crypto.dea_kw)
1355		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1356
1357	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1358}
1359
1360void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1361{
1362	free_page(vcpu->arch.sie_block->cbrlo);
1363	vcpu->arch.sie_block->cbrlo = 0;
1364}
1365
1366int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1367{
1368	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1369	if (!vcpu->arch.sie_block->cbrlo)
1370		return -ENOMEM;
1371
1372	vcpu->arch.sie_block->ecb2 |= 0x80;
1373	vcpu->arch.sie_block->ecb2 &= ~0x08;
1374	return 0;
1375}
1376
1377static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1378{
1379	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1380
1381	vcpu->arch.cpu_id = model->cpu_id;
1382	vcpu->arch.sie_block->ibc = model->ibc;
1383	vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1384}
1385
1386int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1387{
1388	int rc = 0;
1389
1390	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1391						    CPUSTAT_SM |
1392						    CPUSTAT_STOPPED);
1393
1394	if (test_kvm_facility(vcpu->kvm, 78))
1395		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1396	else if (test_kvm_facility(vcpu->kvm, 8))
1397		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1398
1399	kvm_s390_vcpu_setup_model(vcpu);
1400
1401	vcpu->arch.sie_block->ecb   = 6;
1402	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1403		vcpu->arch.sie_block->ecb |= 0x10;
1404
1405	vcpu->arch.sie_block->ecb2  = 8;
1406	vcpu->arch.sie_block->eca   = 0xC1002000U;
1407	if (sclp.has_siif)
1408		vcpu->arch.sie_block->eca |= 1;
1409	if (sclp.has_sigpif)
1410		vcpu->arch.sie_block->eca |= 0x10000000U;
1411	if (test_kvm_facility(vcpu->kvm, 129)) {
1412		vcpu->arch.sie_block->eca |= 0x00020000;
1413		vcpu->arch.sie_block->ecd |= 0x20000000;
1414	}
1415	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1416
1417	if (vcpu->kvm->arch.use_cmma) {
1418		rc = kvm_s390_vcpu_setup_cmma(vcpu);
1419		if (rc)
1420			return rc;
1421	}
1422	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1423	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1424
1425	kvm_s390_vcpu_crypto_setup(vcpu);
1426
1427	return rc;
1428}
1429
1430struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1431				      unsigned int id)
1432{
1433	struct kvm_vcpu *vcpu;
1434	struct sie_page *sie_page;
1435	int rc = -EINVAL;
1436
1437	if (id >= KVM_MAX_VCPUS)
1438		goto out;
1439
1440	rc = -ENOMEM;
1441
1442	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1443	if (!vcpu)
1444		goto out;
1445
1446	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1447	if (!sie_page)
1448		goto out_free_cpu;
1449
1450	vcpu->arch.sie_block = &sie_page->sie_block;
1451	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1452
1453	vcpu->arch.sie_block->icpua = id;
1454	if (!kvm_is_ucontrol(kvm)) {
1455		if (!kvm->arch.sca) {
1456			WARN_ON_ONCE(1);
1457			goto out_free_cpu;
1458		}
1459		if (!kvm->arch.sca->cpu[id].sda)
1460			kvm->arch.sca->cpu[id].sda =
1461				(__u64) vcpu->arch.sie_block;
1462		vcpu->arch.sie_block->scaoh =
1463			(__u32)(((__u64)kvm->arch.sca) >> 32);
1464		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1465		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1466	}
1467
1468	spin_lock_init(&vcpu->arch.local_int.lock);
1469	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1470	vcpu->arch.local_int.wq = &vcpu->wq;
1471	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1472
1473	rc = kvm_vcpu_init(vcpu, kvm, id);
1474	if (rc)
1475		goto out_free_sie_block;
1476	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1477		 vcpu->arch.sie_block);
1478	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1479
1480	return vcpu;
1481out_free_sie_block:
1482	free_page((unsigned long)(vcpu->arch.sie_block));
1483out_free_cpu:
1484	kmem_cache_free(kvm_vcpu_cache, vcpu);
1485out:
1486	return ERR_PTR(rc);
1487}
1488
1489int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1490{
1491	return kvm_s390_vcpu_has_irq(vcpu, 0);
1492}
1493
1494void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1495{
1496	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1497	exit_sie(vcpu);
1498}
1499
1500void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1501{
1502	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1503}
1504
1505static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1506{
1507	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1508	exit_sie(vcpu);
1509}
1510
1511static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1512{
1513	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1514}
1515
1516/*
1517 * Kick a guest cpu out of SIE and wait until SIE is not running.
1518 * If the CPU is not running (e.g. waiting as idle) the function will
1519 * return immediately. */
1520void exit_sie(struct kvm_vcpu *vcpu)
1521{
1522	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1523	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1524		cpu_relax();
1525}
1526
1527/* Kick a guest cpu out of SIE to process a request synchronously */
1528void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1529{
1530	kvm_make_request(req, vcpu);
1531	kvm_s390_vcpu_request(vcpu);
1532}
1533
1534static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1535{
1536	int i;
1537	struct kvm *kvm = gmap->private;
1538	struct kvm_vcpu *vcpu;
1539
1540	kvm_for_each_vcpu(i, vcpu, kvm) {
1541		/* match against both prefix pages */
1542		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1543			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1544			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1545		}
1546	}
1547}
1548
1549int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1550{
1551	/* kvm common code refers to this, but never calls it */
1552	BUG();
1553	return 0;
1554}
1555
1556static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1557					   struct kvm_one_reg *reg)
1558{
1559	int r = -EINVAL;
1560
1561	switch (reg->id) {
1562	case KVM_REG_S390_TODPR:
1563		r = put_user(vcpu->arch.sie_block->todpr,
1564			     (u32 __user *)reg->addr);
1565		break;
1566	case KVM_REG_S390_EPOCHDIFF:
1567		r = put_user(vcpu->arch.sie_block->epoch,
1568			     (u64 __user *)reg->addr);
1569		break;
1570	case KVM_REG_S390_CPU_TIMER:
1571		r = put_user(vcpu->arch.sie_block->cputm,
1572			     (u64 __user *)reg->addr);
1573		break;
1574	case KVM_REG_S390_CLOCK_COMP:
1575		r = put_user(vcpu->arch.sie_block->ckc,
1576			     (u64 __user *)reg->addr);
1577		break;
1578	case KVM_REG_S390_PFTOKEN:
1579		r = put_user(vcpu->arch.pfault_token,
1580			     (u64 __user *)reg->addr);
1581		break;
1582	case KVM_REG_S390_PFCOMPARE:
1583		r = put_user(vcpu->arch.pfault_compare,
1584			     (u64 __user *)reg->addr);
1585		break;
1586	case KVM_REG_S390_PFSELECT:
1587		r = put_user(vcpu->arch.pfault_select,
1588			     (u64 __user *)reg->addr);
1589		break;
1590	case KVM_REG_S390_PP:
1591		r = put_user(vcpu->arch.sie_block->pp,
1592			     (u64 __user *)reg->addr);
1593		break;
1594	case KVM_REG_S390_GBEA:
1595		r = put_user(vcpu->arch.sie_block->gbea,
1596			     (u64 __user *)reg->addr);
1597		break;
1598	default:
1599		break;
1600	}
1601
1602	return r;
1603}
1604
1605static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1606					   struct kvm_one_reg *reg)
1607{
1608	int r = -EINVAL;
1609
1610	switch (reg->id) {
1611	case KVM_REG_S390_TODPR:
1612		r = get_user(vcpu->arch.sie_block->todpr,
1613			     (u32 __user *)reg->addr);
1614		break;
1615	case KVM_REG_S390_EPOCHDIFF:
1616		r = get_user(vcpu->arch.sie_block->epoch,
1617			     (u64 __user *)reg->addr);
1618		break;
1619	case KVM_REG_S390_CPU_TIMER:
1620		r = get_user(vcpu->arch.sie_block->cputm,
1621			     (u64 __user *)reg->addr);
1622		break;
1623	case KVM_REG_S390_CLOCK_COMP:
1624		r = get_user(vcpu->arch.sie_block->ckc,
1625			     (u64 __user *)reg->addr);
1626		break;
1627	case KVM_REG_S390_PFTOKEN:
1628		r = get_user(vcpu->arch.pfault_token,
1629			     (u64 __user *)reg->addr);
1630		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1631			kvm_clear_async_pf_completion_queue(vcpu);
1632		break;
1633	case KVM_REG_S390_PFCOMPARE:
1634		r = get_user(vcpu->arch.pfault_compare,
1635			     (u64 __user *)reg->addr);
1636		break;
1637	case KVM_REG_S390_PFSELECT:
1638		r = get_user(vcpu->arch.pfault_select,
1639			     (u64 __user *)reg->addr);
1640		break;
1641	case KVM_REG_S390_PP:
1642		r = get_user(vcpu->arch.sie_block->pp,
1643			     (u64 __user *)reg->addr);
1644		break;
1645	case KVM_REG_S390_GBEA:
1646		r = get_user(vcpu->arch.sie_block->gbea,
1647			     (u64 __user *)reg->addr);
1648		break;
1649	default:
1650		break;
1651	}
1652
1653	return r;
1654}
1655
1656static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1657{
1658	kvm_s390_vcpu_initial_reset(vcpu);
1659	return 0;
1660}
1661
1662int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1663{
1664	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1665	return 0;
1666}
1667
1668int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1669{
1670	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1671	return 0;
1672}
1673
1674int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1675				  struct kvm_sregs *sregs)
1676{
1677	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1678	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1679	restore_access_regs(vcpu->run->s.regs.acrs);
1680	return 0;
1681}
1682
1683int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1684				  struct kvm_sregs *sregs)
1685{
1686	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1687	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1688	return 0;
1689}
1690
1691int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1692{
1693	/* make sure the new values will be lazily loaded */
1694	save_fpu_regs();
1695	if (test_fp_ctl(fpu->fpc))
1696		return -EINVAL;
1697	current->thread.fpu.fpc = fpu->fpc;
1698	if (MACHINE_HAS_VX)
1699		convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
1700	else
1701		memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
1702	return 0;
1703}
1704
1705int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1706{
1707	/* make sure we have the latest values */
1708	save_fpu_regs();
1709	if (MACHINE_HAS_VX)
1710		convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
1711	else
1712		memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
1713	fpu->fpc = current->thread.fpu.fpc;
1714	return 0;
1715}
1716
1717static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1718{
1719	int rc = 0;
1720
1721	if (!is_vcpu_stopped(vcpu))
1722		rc = -EBUSY;
1723	else {
1724		vcpu->run->psw_mask = psw.mask;
1725		vcpu->run->psw_addr = psw.addr;
1726	}
1727	return rc;
1728}
1729
1730int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1731				  struct kvm_translation *tr)
1732{
1733	return -EINVAL; /* not implemented yet */
1734}
1735
1736#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1737			      KVM_GUESTDBG_USE_HW_BP | \
1738			      KVM_GUESTDBG_ENABLE)
1739
1740int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1741					struct kvm_guest_debug *dbg)
1742{
1743	int rc = 0;
1744
1745	vcpu->guest_debug = 0;
1746	kvm_s390_clear_bp_data(vcpu);
1747
1748	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1749		return -EINVAL;
1750
1751	if (dbg->control & KVM_GUESTDBG_ENABLE) {
1752		vcpu->guest_debug = dbg->control;
1753		/* enforce guest PER */
1754		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1755
1756		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1757			rc = kvm_s390_import_bp_data(vcpu, dbg);
1758	} else {
1759		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1760		vcpu->arch.guestdbg.last_bp = 0;
1761	}
1762
1763	if (rc) {
1764		vcpu->guest_debug = 0;
1765		kvm_s390_clear_bp_data(vcpu);
1766		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1767	}
1768
1769	return rc;
1770}
1771
1772int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1773				    struct kvm_mp_state *mp_state)
1774{
1775	/* CHECK_STOP and LOAD are not supported yet */
1776	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1777				       KVM_MP_STATE_OPERATING;
1778}
1779
1780int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1781				    struct kvm_mp_state *mp_state)
1782{
1783	int rc = 0;
1784
1785	/* user space knows about this interface - let it control the state */
1786	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1787
1788	switch (mp_state->mp_state) {
1789	case KVM_MP_STATE_STOPPED:
1790		kvm_s390_vcpu_stop(vcpu);
1791		break;
1792	case KVM_MP_STATE_OPERATING:
1793		kvm_s390_vcpu_start(vcpu);
1794		break;
1795	case KVM_MP_STATE_LOAD:
1796	case KVM_MP_STATE_CHECK_STOP:
1797		/* fall through - CHECK_STOP and LOAD are not supported yet */
1798	default:
1799		rc = -ENXIO;
1800	}
1801
1802	return rc;
1803}
1804
1805static bool ibs_enabled(struct kvm_vcpu *vcpu)
1806{
1807	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1808}
1809
1810static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1811{
1812retry:
1813	kvm_s390_vcpu_request_handled(vcpu);
1814	if (!vcpu->requests)
1815		return 0;
1816	/*
1817	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1818	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1819	 * This ensures that the ipte instruction for this request has
1820	 * already finished. We might race against a second unmapper that
1821	 * wants to set the blocking bit. Lets just retry the request loop.
1822	 */
1823	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1824		int rc;
1825		rc = gmap_ipte_notify(vcpu->arch.gmap,
1826				      kvm_s390_get_prefix(vcpu),
1827				      PAGE_SIZE * 2);
1828		if (rc)
1829			return rc;
1830		goto retry;
1831	}
1832
1833	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1834		vcpu->arch.sie_block->ihcpu = 0xffff;
1835		goto retry;
1836	}
1837
1838	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1839		if (!ibs_enabled(vcpu)) {
1840			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1841			atomic_or(CPUSTAT_IBS,
1842					&vcpu->arch.sie_block->cpuflags);
1843		}
1844		goto retry;
1845	}
1846
1847	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1848		if (ibs_enabled(vcpu)) {
1849			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1850			atomic_andnot(CPUSTAT_IBS,
1851					  &vcpu->arch.sie_block->cpuflags);
1852		}
1853		goto retry;
1854	}
1855
1856	/* nothing to do, just clear the request */
1857	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1858
1859	return 0;
1860}
1861
1862void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
1863{
1864	struct kvm_vcpu *vcpu;
1865	int i;
1866
1867	mutex_lock(&kvm->lock);
1868	preempt_disable();
1869	kvm->arch.epoch = tod - get_tod_clock();
1870	kvm_s390_vcpu_block_all(kvm);
1871	kvm_for_each_vcpu(i, vcpu, kvm)
1872		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
1873	kvm_s390_vcpu_unblock_all(kvm);
1874	preempt_enable();
1875	mutex_unlock(&kvm->lock);
1876}
1877
1878/**
1879 * kvm_arch_fault_in_page - fault-in guest page if necessary
1880 * @vcpu: The corresponding virtual cpu
1881 * @gpa: Guest physical address
1882 * @writable: Whether the page should be writable or not
1883 *
1884 * Make sure that a guest page has been faulted-in on the host.
1885 *
1886 * Return: Zero on success, negative error code otherwise.
1887 */
1888long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1889{
1890	return gmap_fault(vcpu->arch.gmap, gpa,
1891			  writable ? FAULT_FLAG_WRITE : 0);
1892}
1893
1894static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1895				      unsigned long token)
1896{
1897	struct kvm_s390_interrupt inti;
1898	struct kvm_s390_irq irq;
1899
1900	if (start_token) {
1901		irq.u.ext.ext_params2 = token;
1902		irq.type = KVM_S390_INT_PFAULT_INIT;
1903		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1904	} else {
1905		inti.type = KVM_S390_INT_PFAULT_DONE;
1906		inti.parm64 = token;
1907		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1908	}
1909}
1910
1911void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1912				     struct kvm_async_pf *work)
1913{
1914	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1915	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1916}
1917
1918void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1919				 struct kvm_async_pf *work)
1920{
1921	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1922	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1923}
1924
1925void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1926			       struct kvm_async_pf *work)
1927{
1928	/* s390 will always inject the page directly */
1929}
1930
1931bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1932{
1933	/*
1934	 * s390 will always inject the page directly,
1935	 * but we still want check_async_completion to cleanup
1936	 */
1937	return true;
1938}
1939
1940static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1941{
1942	hva_t hva;
1943	struct kvm_arch_async_pf arch;
1944	int rc;
1945
1946	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1947		return 0;
1948	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1949	    vcpu->arch.pfault_compare)
1950		return 0;
1951	if (psw_extint_disabled(vcpu))
1952		return 0;
1953	if (kvm_s390_vcpu_has_irq(vcpu, 0))
1954		return 0;
1955	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1956		return 0;
1957	if (!vcpu->arch.gmap->pfault_enabled)
1958		return 0;
1959
1960	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1961	hva += current->thread.gmap_addr & ~PAGE_MASK;
1962	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1963		return 0;
1964
1965	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
1966	return rc;
1967}
1968
1969static int vcpu_pre_run(struct kvm_vcpu *vcpu)
1970{
1971	int rc, cpuflags;
1972
1973	/*
1974	 * On s390 notifications for arriving pages will be delivered directly
1975	 * to the guest but the house keeping for completed pfaults is
1976	 * handled outside the worker.
1977	 */
1978	kvm_check_async_pf_completion(vcpu);
1979
1980	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
1981
1982	if (need_resched())
1983		schedule();
1984
1985	if (test_cpu_flag(CIF_MCCK_PENDING))
1986		s390_handle_mcck();
1987
1988	if (!kvm_is_ucontrol(vcpu->kvm)) {
1989		rc = kvm_s390_deliver_pending_interrupts(vcpu);
1990		if (rc)
1991			return rc;
1992	}
1993
1994	rc = kvm_s390_handle_requests(vcpu);
1995	if (rc)
1996		return rc;
1997
1998	if (guestdbg_enabled(vcpu)) {
1999		kvm_s390_backup_guest_per_regs(vcpu);
2000		kvm_s390_patch_guest_per_regs(vcpu);
2001	}
2002
2003	vcpu->arch.sie_block->icptcode = 0;
2004	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2005	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2006	trace_kvm_s390_sie_enter(vcpu, cpuflags);
2007
2008	return 0;
2009}
2010
2011static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2012{
2013	psw_t *psw = &vcpu->arch.sie_block->gpsw;
2014	u8 opcode;
2015	int rc;
2016
2017	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2018	trace_kvm_s390_sie_fault(vcpu);
2019
2020	/*
2021	 * We want to inject an addressing exception, which is defined as a
2022	 * suppressing or terminating exception. However, since we came here
2023	 * by a DAT access exception, the PSW still points to the faulting
2024	 * instruction since DAT exceptions are nullifying. So we've got
2025	 * to look up the current opcode to get the length of the instruction
2026	 * to be able to forward the PSW.
2027	 */
2028	rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2029	if (rc)
2030		return kvm_s390_inject_prog_cond(vcpu, rc);
2031	psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2032
2033	return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2034}
2035
2036static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2037{
2038	int rc = -1;
2039
2040	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2041		   vcpu->arch.sie_block->icptcode);
2042	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2043
2044	if (guestdbg_enabled(vcpu))
2045		kvm_s390_restore_guest_per_regs(vcpu);
2046
2047	if (exit_reason >= 0) {
2048		rc = 0;
2049	} else if (kvm_is_ucontrol(vcpu->kvm)) {
2050		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2051		vcpu->run->s390_ucontrol.trans_exc_code =
2052						current->thread.gmap_addr;
2053		vcpu->run->s390_ucontrol.pgm_code = 0x10;
2054		rc = -EREMOTE;
2055
2056	} else if (current->thread.gmap_pfault) {
2057		trace_kvm_s390_major_guest_pfault(vcpu);
2058		current->thread.gmap_pfault = 0;
2059		if (kvm_arch_setup_async_pf(vcpu)) {
2060			rc = 0;
2061		} else {
2062			gpa_t gpa = current->thread.gmap_addr;
2063			rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2064		}
2065	}
2066
2067	if (rc == -1)
2068		rc = vcpu_post_run_fault_in_sie(vcpu);
2069
2070	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2071
2072	if (rc == 0) {
2073		if (kvm_is_ucontrol(vcpu->kvm))
2074			/* Don't exit for host interrupts. */
2075			rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2076		else
2077			rc = kvm_handle_sie_intercept(vcpu);
2078	}
2079
2080	return rc;
2081}
2082
2083static int __vcpu_run(struct kvm_vcpu *vcpu)
2084{
2085	int rc, exit_reason;
2086
2087	/*
2088	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2089	 * ning the guest), so that memslots (and other stuff) are protected
2090	 */
2091	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2092
2093	do {
2094		rc = vcpu_pre_run(vcpu);
2095		if (rc)
2096			break;
2097
2098		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2099		/*
2100		 * As PF_VCPU will be used in fault handler, between
2101		 * guest_enter and guest_exit should be no uaccess.
2102		 */
2103		local_irq_disable();
2104		__kvm_guest_enter();
2105		local_irq_enable();
2106		exit_reason = sie64a(vcpu->arch.sie_block,
2107				     vcpu->run->s.regs.gprs);
2108		local_irq_disable();
2109		__kvm_guest_exit();
2110		local_irq_enable();
2111		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2112
2113		rc = vcpu_post_run(vcpu, exit_reason);
2114	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2115
2116	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2117	return rc;
2118}
2119
2120static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2121{
2122	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2123	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2124	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2125		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2126	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2127		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2128		/* some control register changes require a tlb flush */
2129		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2130	}
2131	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2132		vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2133		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2134		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2135		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2136		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2137	}
2138	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2139		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2140		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2141		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2142		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2143			kvm_clear_async_pf_completion_queue(vcpu);
2144	}
2145	kvm_run->kvm_dirty_regs = 0;
2146}
2147
2148static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2149{
2150	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2151	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2152	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2153	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2154	kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2155	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2156	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2157	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2158	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2159	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2160	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2161	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2162}
2163
2164int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2165{
2166	int rc;
2167	sigset_t sigsaved;
2168
2169	if (guestdbg_exit_pending(vcpu)) {
2170		kvm_s390_prepare_debug_exit(vcpu);
2171		return 0;
2172	}
2173
2174	if (vcpu->sigset_active)
2175		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2176
2177	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2178		kvm_s390_vcpu_start(vcpu);
2179	} else if (is_vcpu_stopped(vcpu)) {
2180		pr_err_ratelimited("can't run stopped vcpu %d\n",
2181				   vcpu->vcpu_id);
2182		return -EINVAL;
2183	}
2184
2185	sync_regs(vcpu, kvm_run);
2186
2187	might_fault();
2188	rc = __vcpu_run(vcpu);
2189
2190	if (signal_pending(current) && !rc) {
2191		kvm_run->exit_reason = KVM_EXIT_INTR;
2192		rc = -EINTR;
2193	}
2194
2195	if (guestdbg_exit_pending(vcpu) && !rc)  {
2196		kvm_s390_prepare_debug_exit(vcpu);
2197		rc = 0;
2198	}
2199
2200	if (rc == -EOPNOTSUPP) {
2201		/* intercept cannot be handled in-kernel, prepare kvm-run */
2202		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2203		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2204		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2205		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2206		rc = 0;
2207	}
2208
2209	if (rc == -EREMOTE) {
2210		/* intercept was handled, but userspace support is needed
2211		 * kvm_run has been prepared by the handler */
2212		rc = 0;
2213	}
2214
2215	store_regs(vcpu, kvm_run);
2216
2217	if (vcpu->sigset_active)
2218		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2219
2220	vcpu->stat.exit_userspace++;
2221	return rc;
2222}
2223
2224/*
2225 * store status at address
2226 * we use have two special cases:
2227 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2228 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2229 */
2230int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2231{
2232	unsigned char archmode = 1;
2233	freg_t fprs[NUM_FPRS];
2234	unsigned int px;
2235	u64 clkcomp;
2236	int rc;
2237
2238	px = kvm_s390_get_prefix(vcpu);
2239	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2240		if (write_guest_abs(vcpu, 163, &archmode, 1))
2241			return -EFAULT;
2242		gpa = 0;
2243	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2244		if (write_guest_real(vcpu, 163, &archmode, 1))
2245			return -EFAULT;
2246		gpa = px;
2247	} else
2248		gpa -= __LC_FPREGS_SAVE_AREA;
2249
2250	/* manually convert vector registers if necessary */
2251	if (MACHINE_HAS_VX) {
2252		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2253		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2254				     fprs, 128);
2255	} else {
2256		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2257				     vcpu->run->s.regs.vrs, 128);
2258	}
2259	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2260			      vcpu->run->s.regs.gprs, 128);
2261	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2262			      &vcpu->arch.sie_block->gpsw, 16);
2263	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2264			      &px, 4);
2265	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2266			      &vcpu->run->s.regs.fpc, 4);
2267	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2268			      &vcpu->arch.sie_block->todpr, 4);
2269	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2270			      &vcpu->arch.sie_block->cputm, 8);
2271	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2272	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2273			      &clkcomp, 8);
2274	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2275			      &vcpu->run->s.regs.acrs, 64);
2276	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2277			      &vcpu->arch.sie_block->gcr, 128);
2278	return rc ? -EFAULT : 0;
2279}
2280
2281int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2282{
2283	/*
2284	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2285	 * copying in vcpu load/put. Lets update our copies before we save
2286	 * it into the save area
2287	 */
2288	save_fpu_regs();
2289	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2290	save_access_regs(vcpu->run->s.regs.acrs);
2291
2292	return kvm_s390_store_status_unloaded(vcpu, addr);
2293}
2294
2295/*
2296 * store additional status at address
2297 */
2298int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2299					unsigned long gpa)
2300{
2301	/* Only bits 0-53 are used for address formation */
2302	if (!(gpa & ~0x3ff))
2303		return 0;
2304
2305	return write_guest_abs(vcpu, gpa & ~0x3ff,
2306			       (void *)&vcpu->run->s.regs.vrs, 512);
2307}
2308
2309int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2310{
2311	if (!test_kvm_facility(vcpu->kvm, 129))
2312		return 0;
2313
2314	/*
2315	 * The guest VXRS are in the host VXRs due to the lazy
2316	 * copying in vcpu load/put. We can simply call save_fpu_regs()
2317	 * to save the current register state because we are in the
2318	 * middle of a load/put cycle.
2319	 *
2320	 * Let's update our copies before we save it into the save area.
2321	 */
2322	save_fpu_regs();
2323
2324	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2325}
2326
2327static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2328{
2329	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2330	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2331}
2332
2333static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2334{
2335	unsigned int i;
2336	struct kvm_vcpu *vcpu;
2337
2338	kvm_for_each_vcpu(i, vcpu, kvm) {
2339		__disable_ibs_on_vcpu(vcpu);
2340	}
2341}
2342
2343static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2344{
2345	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2346	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2347}
2348
2349void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2350{
2351	int i, online_vcpus, started_vcpus = 0;
2352
2353	if (!is_vcpu_stopped(vcpu))
2354		return;
2355
2356	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2357	/* Only one cpu at a time may enter/leave the STOPPED state. */
2358	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2359	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2360
2361	for (i = 0; i < online_vcpus; i++) {
2362		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2363			started_vcpus++;
2364	}
2365
2366	if (started_vcpus == 0) {
2367		/* we're the only active VCPU -> speed it up */
2368		__enable_ibs_on_vcpu(vcpu);
2369	} else if (started_vcpus == 1) {
2370		/*
2371		 * As we are starting a second VCPU, we have to disable
2372		 * the IBS facility on all VCPUs to remove potentially
2373		 * oustanding ENABLE requests.
2374		 */
2375		__disable_ibs_on_all_vcpus(vcpu->kvm);
2376	}
2377
2378	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2379	/*
2380	 * Another VCPU might have used IBS while we were offline.
2381	 * Let's play safe and flush the VCPU at startup.
2382	 */
2383	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2384	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2385	return;
2386}
2387
2388void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2389{
2390	int i, online_vcpus, started_vcpus = 0;
2391	struct kvm_vcpu *started_vcpu = NULL;
2392
2393	if (is_vcpu_stopped(vcpu))
2394		return;
2395
2396	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2397	/* Only one cpu at a time may enter/leave the STOPPED state. */
2398	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2399	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2400
2401	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2402	kvm_s390_clear_stop_irq(vcpu);
2403
2404	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2405	__disable_ibs_on_vcpu(vcpu);
2406
2407	for (i = 0; i < online_vcpus; i++) {
2408		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2409			started_vcpus++;
2410			started_vcpu = vcpu->kvm->vcpus[i];
2411		}
2412	}
2413
2414	if (started_vcpus == 1) {
2415		/*
2416		 * As we only have one VCPU left, we want to enable the
2417		 * IBS facility for that VCPU to speed it up.
2418		 */
2419		__enable_ibs_on_vcpu(started_vcpu);
2420	}
2421
2422	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2423	return;
2424}
2425
2426static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2427				     struct kvm_enable_cap *cap)
2428{
2429	int r;
2430
2431	if (cap->flags)
2432		return -EINVAL;
2433
2434	switch (cap->cap) {
2435	case KVM_CAP_S390_CSS_SUPPORT:
2436		if (!vcpu->kvm->arch.css_support) {
2437			vcpu->kvm->arch.css_support = 1;
2438			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2439			trace_kvm_s390_enable_css(vcpu->kvm);
2440		}
2441		r = 0;
2442		break;
2443	default:
2444		r = -EINVAL;
2445		break;
2446	}
2447	return r;
2448}
2449
2450static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2451				  struct kvm_s390_mem_op *mop)
2452{
2453	void __user *uaddr = (void __user *)mop->buf;
2454	void *tmpbuf = NULL;
2455	int r, srcu_idx;
2456	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2457				    | KVM_S390_MEMOP_F_CHECK_ONLY;
2458
2459	if (mop->flags & ~supported_flags)
2460		return -EINVAL;
2461
2462	if (mop->size > MEM_OP_MAX_SIZE)
2463		return -E2BIG;
2464
2465	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2466		tmpbuf = vmalloc(mop->size);
2467		if (!tmpbuf)
2468			return -ENOMEM;
2469	}
2470
2471	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2472
2473	switch (mop->op) {
2474	case KVM_S390_MEMOP_LOGICAL_READ:
2475		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2476			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2477			break;
2478		}
2479		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2480		if (r == 0) {
2481			if (copy_to_user(uaddr, tmpbuf, mop->size))
2482				r = -EFAULT;
2483		}
2484		break;
2485	case KVM_S390_MEMOP_LOGICAL_WRITE:
2486		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2487			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2488			break;
2489		}
2490		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2491			r = -EFAULT;
2492			break;
2493		}
2494		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2495		break;
2496	default:
2497		r = -EINVAL;
2498	}
2499
2500	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2501
2502	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2503		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2504
2505	vfree(tmpbuf);
2506	return r;
2507}
2508
2509long kvm_arch_vcpu_ioctl(struct file *filp,
2510			 unsigned int ioctl, unsigned long arg)
2511{
2512	struct kvm_vcpu *vcpu = filp->private_data;
2513	void __user *argp = (void __user *)arg;
2514	int idx;
2515	long r;
2516
2517	switch (ioctl) {
2518	case KVM_S390_IRQ: {
2519		struct kvm_s390_irq s390irq;
2520
2521		r = -EFAULT;
2522		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2523			break;
2524		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2525		break;
2526	}
2527	case KVM_S390_INTERRUPT: {
2528		struct kvm_s390_interrupt s390int;
2529		struct kvm_s390_irq s390irq;
2530
2531		r = -EFAULT;
2532		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2533			break;
2534		if (s390int_to_s390irq(&s390int, &s390irq))
2535			return -EINVAL;
2536		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2537		break;
2538	}
2539	case KVM_S390_STORE_STATUS:
2540		idx = srcu_read_lock(&vcpu->kvm->srcu);
2541		r = kvm_s390_vcpu_store_status(vcpu, arg);
2542		srcu_read_unlock(&vcpu->kvm->srcu, idx);
2543		break;
2544	case KVM_S390_SET_INITIAL_PSW: {
2545		psw_t psw;
2546
2547		r = -EFAULT;
2548		if (copy_from_user(&psw, argp, sizeof(psw)))
2549			break;
2550		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2551		break;
2552	}
2553	case KVM_S390_INITIAL_RESET:
2554		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2555		break;
2556	case KVM_SET_ONE_REG:
2557	case KVM_GET_ONE_REG: {
2558		struct kvm_one_reg reg;
2559		r = -EFAULT;
2560		if (copy_from_user(&reg, argp, sizeof(reg)))
2561			break;
2562		if (ioctl == KVM_SET_ONE_REG)
2563			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2564		else
2565			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2566		break;
2567	}
2568#ifdef CONFIG_KVM_S390_UCONTROL
2569	case KVM_S390_UCAS_MAP: {
2570		struct kvm_s390_ucas_mapping ucasmap;
2571
2572		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2573			r = -EFAULT;
2574			break;
2575		}
2576
2577		if (!kvm_is_ucontrol(vcpu->kvm)) {
2578			r = -EINVAL;
2579			break;
2580		}
2581
2582		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2583				     ucasmap.vcpu_addr, ucasmap.length);
2584		break;
2585	}
2586	case KVM_S390_UCAS_UNMAP: {
2587		struct kvm_s390_ucas_mapping ucasmap;
2588
2589		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2590			r = -EFAULT;
2591			break;
2592		}
2593
2594		if (!kvm_is_ucontrol(vcpu->kvm)) {
2595			r = -EINVAL;
2596			break;
2597		}
2598
2599		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2600			ucasmap.length);
2601		break;
2602	}
2603#endif
2604	case KVM_S390_VCPU_FAULT: {
2605		r = gmap_fault(vcpu->arch.gmap, arg, 0);
2606		break;
2607	}
2608	case KVM_ENABLE_CAP:
2609	{
2610		struct kvm_enable_cap cap;
2611		r = -EFAULT;
2612		if (copy_from_user(&cap, argp, sizeof(cap)))
2613			break;
2614		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2615		break;
2616	}
2617	case KVM_S390_MEM_OP: {
2618		struct kvm_s390_mem_op mem_op;
2619
2620		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2621			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2622		else
2623			r = -EFAULT;
2624		break;
2625	}
2626	case KVM_S390_SET_IRQ_STATE: {
2627		struct kvm_s390_irq_state irq_state;
2628
2629		r = -EFAULT;
2630		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2631			break;
2632		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2633		    irq_state.len == 0 ||
2634		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2635			r = -EINVAL;
2636			break;
2637		}
2638		r = kvm_s390_set_irq_state(vcpu,
2639					   (void __user *) irq_state.buf,
2640					   irq_state.len);
2641		break;
2642	}
2643	case KVM_S390_GET_IRQ_STATE: {
2644		struct kvm_s390_irq_state irq_state;
2645
2646		r = -EFAULT;
2647		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2648			break;
2649		if (irq_state.len == 0) {
2650			r = -EINVAL;
2651			break;
2652		}
2653		r = kvm_s390_get_irq_state(vcpu,
2654					   (__u8 __user *)  irq_state.buf,
2655					   irq_state.len);
2656		break;
2657	}
2658	default:
2659		r = -ENOTTY;
2660	}
2661	return r;
2662}
2663
2664int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2665{
2666#ifdef CONFIG_KVM_S390_UCONTROL
2667	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2668		 && (kvm_is_ucontrol(vcpu->kvm))) {
2669		vmf->page = virt_to_page(vcpu->arch.sie_block);
2670		get_page(vmf->page);
2671		return 0;
2672	}
2673#endif
2674	return VM_FAULT_SIGBUS;
2675}
2676
2677int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2678			    unsigned long npages)
2679{
2680	return 0;
2681}
2682
2683/* Section: memory related */
2684int kvm_arch_prepare_memory_region(struct kvm *kvm,
2685				   struct kvm_memory_slot *memslot,
2686				   const struct kvm_userspace_memory_region *mem,
2687				   enum kvm_mr_change change)
2688{
2689	/* A few sanity checks. We can have memory slots which have to be
2690	   located/ended at a segment boundary (1MB). The memory in userland is
2691	   ok to be fragmented into various different vmas. It is okay to mmap()
2692	   and munmap() stuff in this slot after doing this call at any time */
2693
2694	if (mem->userspace_addr & 0xffffful)
2695		return -EINVAL;
2696
2697	if (mem->memory_size & 0xffffful)
2698		return -EINVAL;
2699
2700	return 0;
2701}
2702
2703void kvm_arch_commit_memory_region(struct kvm *kvm,
2704				const struct kvm_userspace_memory_region *mem,
2705				const struct kvm_memory_slot *old,
2706				const struct kvm_memory_slot *new,
2707				enum kvm_mr_change change)
2708{
2709	int rc;
2710
2711	/* If the basics of the memslot do not change, we do not want
2712	 * to update the gmap. Every update causes several unnecessary
2713	 * segment translation exceptions. This is usually handled just
2714	 * fine by the normal fault handler + gmap, but it will also
2715	 * cause faults on the prefix page of running guest CPUs.
2716	 */
2717	if (old->userspace_addr == mem->userspace_addr &&
2718	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2719	    old->npages * PAGE_SIZE == mem->memory_size)
2720		return;
2721
2722	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2723		mem->guest_phys_addr, mem->memory_size);
2724	if (rc)
2725		pr_warn("failed to commit memory region\n");
2726	return;
2727}
2728
2729static int __init kvm_s390_init(void)
2730{
2731	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2732}
2733
2734static void __exit kvm_s390_exit(void)
2735{
2736	kvm_exit();
2737}
2738
2739module_init(kvm_s390_init);
2740module_exit(kvm_s390_exit);
2741
2742/*
2743 * Enable autoloading of the kvm module.
2744 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2745 * since x86 takes a different approach.
2746 */
2747#include <linux/miscdevice.h>
2748MODULE_ALIAS_MISCDEV(KVM_MINOR);
2749MODULE_ALIAS("devname:kvm");
2750