root/arch/x86/kvm/lapic.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. apic_test_vector
  2. kvm_apic_pending_eoi
  3. __apic_test_and_set_vector
  4. __apic_test_and_clear_vector
  5. apic_enabled
  6. kvm_x2apic_id
  7. kvm_can_post_timer_interrupt
  8. kvm_use_posted_timer_interrupt
  9. kvm_apic_map_get_logical_dest
  10. kvm_apic_map_free
  11. recalculate_apic_map
  12. apic_set_spiv
  13. kvm_apic_set_xapic_id
  14. kvm_apic_set_ldr
  15. kvm_apic_calc_x2apic_ldr
  16. kvm_apic_set_x2apic_id
  17. apic_lvt_enabled
  18. apic_lvt_vector
  19. apic_lvtt_oneshot
  20. apic_lvtt_period
  21. apic_lvtt_tscdeadline
  22. apic_lvt_nmi_mode
  23. kvm_apic_set_version
  24. find_highest_vector
  25. count_vectors
  26. __kvm_apic_update_irr
  27. kvm_apic_update_irr
  28. apic_search_irr
  29. apic_find_highest_irr
  30. apic_clear_irr
  31. apic_set_isr
  32. apic_find_highest_isr
  33. apic_clear_isr
  34. kvm_lapic_find_highest_irr
  35. kvm_apic_set_irq
  36. kvm_pv_send_ipi
  37. pv_eoi_put_user
  38. pv_eoi_get_user
  39. pv_eoi_enabled
  40. pv_eoi_get_pending
  41. pv_eoi_set_pending
  42. pv_eoi_clr_pending
  43. apic_has_interrupt_for_ppr
  44. __apic_update_ppr
  45. apic_update_ppr
  46. kvm_apic_update_ppr
  47. apic_set_tpr
  48. kvm_apic_broadcast
  49. kvm_apic_match_physical_addr
  50. kvm_apic_match_logical_addr
  51. kvm_apic_mda
  52. kvm_apic_match_dest
  53. kvm_vector_to_index
  54. kvm_apic_disabled_lapic_found
  55. kvm_apic_is_broadcast_dest
  56. kvm_apic_map_get_dest_lapic
  57. kvm_irq_delivery_to_apic_fast
  58. kvm_intr_is_single_vcpu_fast
  59. __apic_accept_irq
  60. kvm_apic_compare_prio
  61. kvm_ioapic_handles_vector
  62. kvm_ioapic_send_eoi
  63. apic_set_eoi
  64. kvm_apic_set_eoi_accelerated
  65. apic_send_ipi
  66. apic_get_tmcct
  67. __report_tpr_access
  68. report_tpr_access
  69. __apic_read
  70. to_lapic
  71. kvm_lapic_reg_read
  72. apic_mmio_in_range
  73. apic_mmio_read
  74. update_divide_count
  75. limit_periodic_timer_frequency
  76. apic_update_lvtt
  77. lapic_timer_int_injected
  78. __wait_lapic_expire
  79. adjust_lapic_timer_advance
  80. __kvm_wait_lapic_expire
  81. kvm_wait_lapic_expire
  82. kvm_apic_inject_pending_timer_irqs
  83. apic_timer_expired
  84. start_sw_tscdeadline
  85. update_target_expiration
  86. set_target_expiration
  87. advance_periodic_target_expiration
  88. start_sw_period
  89. kvm_lapic_hv_timer_in_use
  90. cancel_hv_timer
  91. start_hv_timer
  92. start_sw_timer
  93. restart_apic_timer
  94. kvm_lapic_expired_hv_timer
  95. kvm_lapic_switch_to_hv_timer
  96. kvm_lapic_switch_to_sw_timer
  97. kvm_lapic_restart_hv_timer
  98. start_apic_timer
  99. apic_manage_nmi_watchdog
  100. kvm_lapic_reg_write
  101. apic_mmio_write
  102. kvm_lapic_set_eoi
  103. kvm_apic_write_nodecode
  104. kvm_free_lapic
  105. kvm_get_lapic_tscdeadline_msr
  106. kvm_set_lapic_tscdeadline_msr
  107. kvm_lapic_set_tpr
  108. kvm_lapic_get_cr8
  109. kvm_lapic_set_base
  110. kvm_lapic_reset
  111. lapic_is_periodic
  112. apic_has_pending_timer
  113. kvm_apic_local_deliver
  114. kvm_apic_nmi_wd_deliver
  115. apic_timer_fn
  116. kvm_create_lapic
  117. kvm_apic_has_interrupt
  118. kvm_apic_accept_pic_intr
  119. kvm_inject_apic_timer_irqs
  120. kvm_get_apic_interrupt
  121. kvm_apic_state_fixup
  122. kvm_apic_get_state
  123. kvm_apic_set_state
  124. __kvm_migrate_apic_timer
  125. apic_sync_pv_eoi_from_guest
  126. kvm_lapic_sync_from_vapic
  127. apic_sync_pv_eoi_to_guest
  128. kvm_lapic_sync_to_vapic
  129. kvm_lapic_set_vapic_addr
  130. kvm_x2apic_msr_write
  131. kvm_x2apic_msr_read
  132. kvm_hv_vapic_msr_write
  133. kvm_hv_vapic_msr_read
  134. kvm_lapic_enable_pv_eoi
  135. kvm_apic_accept_events
  136. kvm_lapic_init
  137. kvm_lapic_exit

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 
   3 /*
   4  * Local APIC virtualization
   5  *
   6  * Copyright (C) 2006 Qumranet, Inc.
   7  * Copyright (C) 2007 Novell
   8  * Copyright (C) 2007 Intel
   9  * Copyright 2009 Red Hat, Inc. and/or its affiliates.
  10  *
  11  * Authors:
  12  *   Dor Laor <dor.laor@qumranet.com>
  13  *   Gregory Haskins <ghaskins@novell.com>
  14  *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
  15  *
  16  * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
  17  */
  18 
  19 #include <linux/kvm_host.h>
  20 #include <linux/kvm.h>
  21 #include <linux/mm.h>
  22 #include <linux/highmem.h>
  23 #include <linux/smp.h>
  24 #include <linux/hrtimer.h>
  25 #include <linux/io.h>
  26 #include <linux/export.h>
  27 #include <linux/math64.h>
  28 #include <linux/slab.h>
  29 #include <asm/processor.h>
  30 #include <asm/msr.h>
  31 #include <asm/page.h>
  32 #include <asm/current.h>
  33 #include <asm/apicdef.h>
  34 #include <asm/delay.h>
  35 #include <linux/atomic.h>
  36 #include <linux/jump_label.h>
  37 #include "kvm_cache_regs.h"
  38 #include "irq.h"
  39 #include "trace.h"
  40 #include "x86.h"
  41 #include "cpuid.h"
  42 #include "hyperv.h"
  43 
  44 #ifndef CONFIG_X86_64
  45 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
  46 #else
  47 #define mod_64(x, y) ((x) % (y))
  48 #endif
  49 
  50 #define PRId64 "d"
  51 #define PRIx64 "llx"
  52 #define PRIu64 "u"
  53 #define PRIo64 "o"
  54 
  55 /* 14 is the version for Xeon and Pentium 8.4.8*/
  56 #define APIC_VERSION                    (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
  57 #define LAPIC_MMIO_LENGTH               (1 << 12)
  58 /* followed define is not in apicdef.h */
  59 #define APIC_SHORT_MASK                 0xc0000
  60 #define APIC_DEST_NOSHORT               0x0
  61 #define APIC_DEST_MASK                  0x800
  62 #define MAX_APIC_VECTOR                 256
  63 #define APIC_VECTORS_PER_REG            32
  64 
  65 #define APIC_BROADCAST                  0xFF
  66 #define X2APIC_BROADCAST                0xFFFFFFFFul
  67 
  68 static bool lapic_timer_advance_dynamic __read_mostly;
  69 #define LAPIC_TIMER_ADVANCE_ADJUST_MIN  100     /* clock cycles */
  70 #define LAPIC_TIMER_ADVANCE_ADJUST_MAX  10000   /* clock cycles */
  71 #define LAPIC_TIMER_ADVANCE_NS_INIT     1000
  72 #define LAPIC_TIMER_ADVANCE_NS_MAX     5000
  73 /* step-by-step approximation to mitigate fluctuation */
  74 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
  75 
  76 static inline int apic_test_vector(int vec, void *bitmap)
  77 {
  78         return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  79 }
  80 
  81 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
  82 {
  83         struct kvm_lapic *apic = vcpu->arch.apic;
  84 
  85         return apic_test_vector(vector, apic->regs + APIC_ISR) ||
  86                 apic_test_vector(vector, apic->regs + APIC_IRR);
  87 }
  88 
  89 static inline int __apic_test_and_set_vector(int vec, void *bitmap)
  90 {
  91         return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  92 }
  93 
  94 static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
  95 {
  96         return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  97 }
  98 
  99 struct static_key_deferred apic_hw_disabled __read_mostly;
 100 struct static_key_deferred apic_sw_disabled __read_mostly;
 101 
 102 static inline int apic_enabled(struct kvm_lapic *apic)
 103 {
 104         return kvm_apic_sw_enabled(apic) &&     kvm_apic_hw_enabled(apic);
 105 }
 106 
 107 #define LVT_MASK        \
 108         (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
 109 
 110 #define LINT_MASK       \
 111         (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 112          APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 113 
 114 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
 115 {
 116         return apic->vcpu->vcpu_id;
 117 }
 118 
 119 bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
 120 {
 121         return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
 122 }
 123 EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);
 124 
 125 static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
 126 {
 127         return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
 128 }
 129 
 130 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
 131                 u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
 132         switch (map->mode) {
 133         case KVM_APIC_MODE_X2APIC: {
 134                 u32 offset = (dest_id >> 16) * 16;
 135                 u32 max_apic_id = map->max_apic_id;
 136 
 137                 if (offset <= max_apic_id) {
 138                         u8 cluster_size = min(max_apic_id - offset + 1, 16U);
 139 
 140                         offset = array_index_nospec(offset, map->max_apic_id + 1);
 141                         *cluster = &map->phys_map[offset];
 142                         *mask = dest_id & (0xffff >> (16 - cluster_size));
 143                 } else {
 144                         *mask = 0;
 145                 }
 146 
 147                 return true;
 148                 }
 149         case KVM_APIC_MODE_XAPIC_FLAT:
 150                 *cluster = map->xapic_flat_map;
 151                 *mask = dest_id & 0xff;
 152                 return true;
 153         case KVM_APIC_MODE_XAPIC_CLUSTER:
 154                 *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
 155                 *mask = dest_id & 0xf;
 156                 return true;
 157         default:
 158                 /* Not optimized. */
 159                 return false;
 160         }
 161 }
 162 
 163 static void kvm_apic_map_free(struct rcu_head *rcu)
 164 {
 165         struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
 166 
 167         kvfree(map);
 168 }
 169 
 170 static void recalculate_apic_map(struct kvm *kvm)
 171 {
 172         struct kvm_apic_map *new, *old = NULL;
 173         struct kvm_vcpu *vcpu;
 174         int i;
 175         u32 max_id = 255; /* enough space for any xAPIC ID */
 176 
 177         mutex_lock(&kvm->arch.apic_map_lock);
 178 
 179         kvm_for_each_vcpu(i, vcpu, kvm)
 180                 if (kvm_apic_present(vcpu))
 181                         max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
 182 
 183         new = kvzalloc(sizeof(struct kvm_apic_map) +
 184                            sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
 185                            GFP_KERNEL_ACCOUNT);
 186 
 187         if (!new)
 188                 goto out;
 189 
 190         new->max_apic_id = max_id;
 191 
 192         kvm_for_each_vcpu(i, vcpu, kvm) {
 193                 struct kvm_lapic *apic = vcpu->arch.apic;
 194                 struct kvm_lapic **cluster;
 195                 u16 mask;
 196                 u32 ldr;
 197                 u8 xapic_id;
 198                 u32 x2apic_id;
 199 
 200                 if (!kvm_apic_present(vcpu))
 201                         continue;
 202 
 203                 xapic_id = kvm_xapic_id(apic);
 204                 x2apic_id = kvm_x2apic_id(apic);
 205 
 206                 /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
 207                 if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
 208                                 x2apic_id <= new->max_apic_id)
 209                         new->phys_map[x2apic_id] = apic;
 210                 /*
 211                  * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
 212                  * prevent them from masking VCPUs with APIC ID <= 0xff.
 213                  */
 214                 if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
 215                         new->phys_map[xapic_id] = apic;
 216 
 217                 if (!kvm_apic_sw_enabled(apic))
 218                         continue;
 219 
 220                 ldr = kvm_lapic_get_reg(apic, APIC_LDR);
 221 
 222                 if (apic_x2apic_mode(apic)) {
 223                         new->mode |= KVM_APIC_MODE_X2APIC;
 224                 } else if (ldr) {
 225                         ldr = GET_APIC_LOGICAL_ID(ldr);
 226                         if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
 227                                 new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
 228                         else
 229                                 new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
 230                 }
 231 
 232                 if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
 233                         continue;
 234 
 235                 if (mask)
 236                         cluster[ffs(mask) - 1] = apic;
 237         }
 238 out:
 239         old = rcu_dereference_protected(kvm->arch.apic_map,
 240                         lockdep_is_held(&kvm->arch.apic_map_lock));
 241         rcu_assign_pointer(kvm->arch.apic_map, new);
 242         mutex_unlock(&kvm->arch.apic_map_lock);
 243 
 244         if (old)
 245                 call_rcu(&old->rcu, kvm_apic_map_free);
 246 
 247         kvm_make_scan_ioapic_request(kvm);
 248 }
 249 
 250 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 251 {
 252         bool enabled = val & APIC_SPIV_APIC_ENABLED;
 253 
 254         kvm_lapic_set_reg(apic, APIC_SPIV, val);
 255 
 256         if (enabled != apic->sw_enabled) {
 257                 apic->sw_enabled = enabled;
 258                 if (enabled)
 259                         static_key_slow_dec_deferred(&apic_sw_disabled);
 260                 else
 261                         static_key_slow_inc(&apic_sw_disabled.key);
 262 
 263                 recalculate_apic_map(apic->vcpu->kvm);
 264         }
 265 }
 266 
 267 static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
 268 {
 269         kvm_lapic_set_reg(apic, APIC_ID, id << 24);
 270         recalculate_apic_map(apic->vcpu->kvm);
 271 }
 272 
 273 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
 274 {
 275         kvm_lapic_set_reg(apic, APIC_LDR, id);
 276         recalculate_apic_map(apic->vcpu->kvm);
 277 }
 278 
 279 static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
 280 {
 281         return ((id >> 4) << 16) | (1 << (id & 0xf));
 282 }
 283 
 284 static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
 285 {
 286         u32 ldr = kvm_apic_calc_x2apic_ldr(id);
 287 
 288         WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
 289 
 290         kvm_lapic_set_reg(apic, APIC_ID, id);
 291         kvm_lapic_set_reg(apic, APIC_LDR, ldr);
 292         recalculate_apic_map(apic->vcpu->kvm);
 293 }
 294 
 295 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 296 {
 297         return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
 298 }
 299 
 300 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 301 {
 302         return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
 303 }
 304 
 305 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
 306 {
 307         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
 308 }
 309 
 310 static inline int apic_lvtt_period(struct kvm_lapic *apic)
 311 {
 312         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
 313 }
 314 
 315 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 316 {
 317         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
 318 }
 319 
 320 static inline int apic_lvt_nmi_mode(u32 lvt_val)
 321 {
 322         return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
 323 }
 324 
 325 void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 326 {
 327         struct kvm_lapic *apic = vcpu->arch.apic;
 328         struct kvm_cpuid_entry2 *feat;
 329         u32 v = APIC_VERSION;
 330 
 331         if (!lapic_in_kernel(vcpu))
 332                 return;
 333 
 334         /*
 335          * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
 336          * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
 337          * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
 338          * version first and level-triggered interrupts never get EOIed in
 339          * IOAPIC.
 340          */
 341         feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
 342         if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
 343             !ioapic_in_kernel(vcpu->kvm))
 344                 v |= APIC_LVR_DIRECTED_EOI;
 345         kvm_lapic_set_reg(apic, APIC_LVR, v);
 346 }
 347 
 348 static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
 349         LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 350         LVT_MASK | APIC_MODE_MASK,      /* LVTTHMR */
 351         LVT_MASK | APIC_MODE_MASK,      /* LVTPC */
 352         LINT_MASK, LINT_MASK,   /* LVT0-1 */
 353         LVT_MASK                /* LVTERR */
 354 };
 355 
 356 static int find_highest_vector(void *bitmap)
 357 {
 358         int vec;
 359         u32 *reg;
 360 
 361         for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
 362              vec >= 0; vec -= APIC_VECTORS_PER_REG) {
 363                 reg = bitmap + REG_POS(vec);
 364                 if (*reg)
 365                         return __fls(*reg) + vec;
 366         }
 367 
 368         return -1;
 369 }
 370 
 371 static u8 count_vectors(void *bitmap)
 372 {
 373         int vec;
 374         u32 *reg;
 375         u8 count = 0;
 376 
 377         for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
 378                 reg = bitmap + REG_POS(vec);
 379                 count += hweight32(*reg);
 380         }
 381 
 382         return count;
 383 }
 384 
 385 bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
 386 {
 387         u32 i, vec;
 388         u32 pir_val, irr_val, prev_irr_val;
 389         int max_updated_irr;
 390 
 391         max_updated_irr = -1;
 392         *max_irr = -1;
 393 
 394         for (i = vec = 0; i <= 7; i++, vec += 32) {
 395                 pir_val = READ_ONCE(pir[i]);
 396                 irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
 397                 if (pir_val) {
 398                         prev_irr_val = irr_val;
 399                         irr_val |= xchg(&pir[i], 0);
 400                         *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
 401                         if (prev_irr_val != irr_val) {
 402                                 max_updated_irr =
 403                                         __fls(irr_val ^ prev_irr_val) + vec;
 404                         }
 405                 }
 406                 if (irr_val)
 407                         *max_irr = __fls(irr_val) + vec;
 408         }
 409 
 410         return ((max_updated_irr != -1) &&
 411                 (max_updated_irr == *max_irr));
 412 }
 413 EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
 414 
 415 bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
 416 {
 417         struct kvm_lapic *apic = vcpu->arch.apic;
 418 
 419         return __kvm_apic_update_irr(pir, apic->regs, max_irr);
 420 }
 421 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 422 
 423 static inline int apic_search_irr(struct kvm_lapic *apic)
 424 {
 425         return find_highest_vector(apic->regs + APIC_IRR);
 426 }
 427 
 428 static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 429 {
 430         int result;
 431 
 432         /*
 433          * Note that irr_pending is just a hint. It will be always
 434          * true with virtual interrupt delivery enabled.
 435          */
 436         if (!apic->irr_pending)
 437                 return -1;
 438 
 439         result = apic_search_irr(apic);
 440         ASSERT(result == -1 || result >= 16);
 441 
 442         return result;
 443 }
 444 
 445 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 446 {
 447         struct kvm_vcpu *vcpu;
 448 
 449         vcpu = apic->vcpu;
 450 
 451         if (unlikely(vcpu->arch.apicv_active)) {
 452                 /* need to update RVI */
 453                 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
 454                 kvm_x86_ops->hwapic_irr_update(vcpu,
 455                                 apic_find_highest_irr(apic));
 456         } else {
 457                 apic->irr_pending = false;
 458                 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
 459                 if (apic_search_irr(apic) != -1)
 460                         apic->irr_pending = true;
 461         }
 462 }
 463 
 464 static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 465 {
 466         struct kvm_vcpu *vcpu;
 467 
 468         if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
 469                 return;
 470 
 471         vcpu = apic->vcpu;
 472 
 473         /*
 474          * With APIC virtualization enabled, all caching is disabled
 475          * because the processor can modify ISR under the hood.  Instead
 476          * just set SVI.
 477          */
 478         if (unlikely(vcpu->arch.apicv_active))
 479                 kvm_x86_ops->hwapic_isr_update(vcpu, vec);
 480         else {
 481                 ++apic->isr_count;
 482                 BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
 483                 /*
 484                  * ISR (in service register) bit is set when injecting an interrupt.
 485                  * The highest vector is injected. Thus the latest bit set matches
 486                  * the highest bit in ISR.
 487                  */
 488                 apic->highest_isr_cache = vec;
 489         }
 490 }
 491 
 492 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 493 {
 494         int result;
 495 
 496         /*
 497          * Note that isr_count is always 1, and highest_isr_cache
 498          * is always -1, with APIC virtualization enabled.
 499          */
 500         if (!apic->isr_count)
 501                 return -1;
 502         if (likely(apic->highest_isr_cache != -1))
 503                 return apic->highest_isr_cache;
 504 
 505         result = find_highest_vector(apic->regs + APIC_ISR);
 506         ASSERT(result == -1 || result >= 16);
 507 
 508         return result;
 509 }
 510 
 511 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
 512 {
 513         struct kvm_vcpu *vcpu;
 514         if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
 515                 return;
 516 
 517         vcpu = apic->vcpu;
 518 
 519         /*
 520          * We do get here for APIC virtualization enabled if the guest
 521          * uses the Hyper-V APIC enlightenment.  In this case we may need
 522          * to trigger a new interrupt delivery by writing the SVI field;
 523          * on the other hand isr_count and highest_isr_cache are unused
 524          * and must be left alone.
 525          */
 526         if (unlikely(vcpu->arch.apicv_active))
 527                 kvm_x86_ops->hwapic_isr_update(vcpu,
 528                                                apic_find_highest_isr(apic));
 529         else {
 530                 --apic->isr_count;
 531                 BUG_ON(apic->isr_count < 0);
 532                 apic->highest_isr_cache = -1;
 533         }
 534 }
 535 
 536 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 537 {
 538         /* This may race with setting of irr in __apic_accept_irq() and
 539          * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
 540          * will cause vmexit immediately and the value will be recalculated
 541          * on the next vmentry.
 542          */
 543         return apic_find_highest_irr(vcpu->arch.apic);
 544 }
 545 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
 546 
 547 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 548                              int vector, int level, int trig_mode,
 549                              struct dest_map *dest_map);
 550 
 551 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 552                      struct dest_map *dest_map)
 553 {
 554         struct kvm_lapic *apic = vcpu->arch.apic;
 555 
 556         return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
 557                         irq->level, irq->trig_mode, dest_map);
 558 }
 559 
 560 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 561                     unsigned long ipi_bitmap_high, u32 min,
 562                     unsigned long icr, int op_64_bit)
 563 {
 564         int i;
 565         struct kvm_apic_map *map;
 566         struct kvm_vcpu *vcpu;
 567         struct kvm_lapic_irq irq = {0};
 568         int cluster_size = op_64_bit ? 64 : 32;
 569         int count = 0;
 570 
 571         irq.vector = icr & APIC_VECTOR_MASK;
 572         irq.delivery_mode = icr & APIC_MODE_MASK;
 573         irq.level = (icr & APIC_INT_ASSERT) != 0;
 574         irq.trig_mode = icr & APIC_INT_LEVELTRIG;
 575 
 576         if (icr & APIC_DEST_MASK)
 577                 return -KVM_EINVAL;
 578         if (icr & APIC_SHORT_MASK)
 579                 return -KVM_EINVAL;
 580 
 581         rcu_read_lock();
 582         map = rcu_dereference(kvm->arch.apic_map);
 583 
 584         if (unlikely(!map)) {
 585                 count = -EOPNOTSUPP;
 586                 goto out;
 587         }
 588 
 589         if (min > map->max_apic_id)
 590                 goto out;
 591         /* Bits above cluster_size are masked in the caller.  */
 592         for_each_set_bit(i, &ipi_bitmap_low,
 593                 min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
 594                 if (map->phys_map[min + i]) {
 595                         vcpu = map->phys_map[min + i]->vcpu;
 596                         count += kvm_apic_set_irq(vcpu, &irq, NULL);
 597                 }
 598         }
 599 
 600         min += cluster_size;
 601 
 602         if (min > map->max_apic_id)
 603                 goto out;
 604 
 605         for_each_set_bit(i, &ipi_bitmap_high,
 606                 min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
 607                 if (map->phys_map[min + i]) {
 608                         vcpu = map->phys_map[min + i]->vcpu;
 609                         count += kvm_apic_set_irq(vcpu, &irq, NULL);
 610                 }
 611         }
 612 
 613 out:
 614         rcu_read_unlock();
 615         return count;
 616 }
 617 
 618 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
 619 {
 620 
 621         return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
 622                                       sizeof(val));
 623 }
 624 
 625 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
 626 {
 627 
 628         return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
 629                                       sizeof(*val));
 630 }
 631 
 632 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
 633 {
 634         return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
 635 }
 636 
 637 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
 638 {
 639         u8 val;
 640         if (pv_eoi_get_user(vcpu, &val) < 0) {
 641                 printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
 642                            (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 643                 return false;
 644         }
 645         return val & 0x1;
 646 }
 647 
 648 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
 649 {
 650         if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
 651                 printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
 652                            (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 653                 return;
 654         }
 655         __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 656 }
 657 
 658 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 659 {
 660         if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
 661                 printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
 662                            (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 663                 return;
 664         }
 665         __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 666 }
 667 
 668 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
 669 {
 670         int highest_irr;
 671         if (apic->vcpu->arch.apicv_active)
 672                 highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
 673         else
 674                 highest_irr = apic_find_highest_irr(apic);
 675         if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
 676                 return -1;
 677         return highest_irr;
 678 }
 679 
 680 static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
 681 {
 682         u32 tpr, isrv, ppr, old_ppr;
 683         int isr;
 684 
 685         old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
 686         tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
 687         isr = apic_find_highest_isr(apic);
 688         isrv = (isr != -1) ? isr : 0;
 689 
 690         if ((tpr & 0xf0) >= (isrv & 0xf0))
 691                 ppr = tpr & 0xff;
 692         else
 693                 ppr = isrv & 0xf0;
 694 
 695         *new_ppr = ppr;
 696         if (old_ppr != ppr)
 697                 kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
 698 
 699         return ppr < old_ppr;
 700 }
 701 
 702 static void apic_update_ppr(struct kvm_lapic *apic)
 703 {
 704         u32 ppr;
 705 
 706         if (__apic_update_ppr(apic, &ppr) &&
 707             apic_has_interrupt_for_ppr(apic, ppr) != -1)
 708                 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 709 }
 710 
 711 void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
 712 {
 713         apic_update_ppr(vcpu->arch.apic);
 714 }
 715 EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
 716 
 717 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 718 {
 719         kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
 720         apic_update_ppr(apic);
 721 }
 722 
 723 static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
 724 {
 725         return mda == (apic_x2apic_mode(apic) ?
 726                         X2APIC_BROADCAST : APIC_BROADCAST);
 727 }
 728 
 729 static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
 730 {
 731         if (kvm_apic_broadcast(apic, mda))
 732                 return true;
 733 
 734         if (apic_x2apic_mode(apic))
 735                 return mda == kvm_x2apic_id(apic);
 736 
 737         /*
 738          * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
 739          * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
 740          * this allows unique addressing of VCPUs with APIC ID over 0xff.
 741          * The 0xff condition is needed because writeable xAPIC ID.
 742          */
 743         if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
 744                 return true;
 745 
 746         return mda == kvm_xapic_id(apic);
 747 }
 748 
 749 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 750 {
 751         u32 logical_id;
 752 
 753         if (kvm_apic_broadcast(apic, mda))
 754                 return true;
 755 
 756         logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
 757 
 758         if (apic_x2apic_mode(apic))
 759                 return ((logical_id >> 16) == (mda >> 16))
 760                        && (logical_id & mda & 0xffff) != 0;
 761 
 762         logical_id = GET_APIC_LOGICAL_ID(logical_id);
 763 
 764         switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
 765         case APIC_DFR_FLAT:
 766                 return (logical_id & mda) != 0;
 767         case APIC_DFR_CLUSTER:
 768                 return ((logical_id >> 4) == (mda >> 4))
 769                        && (logical_id & mda & 0xf) != 0;
 770         default:
 771                 return false;
 772         }
 773 }
 774 
 775 /* The KVM local APIC implementation has two quirks:
 776  *
 777  *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
 778  *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
 779  *    KVM doesn't do that aliasing.
 780  *
 781  *  - in-kernel IOAPIC messages have to be delivered directly to
 782  *    x2APIC, because the kernel does not support interrupt remapping.
 783  *    In order to support broadcast without interrupt remapping, x2APIC
 784  *    rewrites the destination of non-IPI messages from APIC_BROADCAST
 785  *    to X2APIC_BROADCAST.
 786  *
 787  * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
 788  * important when userspace wants to use x2APIC-format MSIs, because
 789  * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
 790  */
 791 static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
 792                 struct kvm_lapic *source, struct kvm_lapic *target)
 793 {
 794         bool ipi = source != NULL;
 795 
 796         if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
 797             !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
 798                 return X2APIC_BROADCAST;
 799 
 800         return dest_id;
 801 }
 802 
 803 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 804                            int short_hand, unsigned int dest, int dest_mode)
 805 {
 806         struct kvm_lapic *target = vcpu->arch.apic;
 807         u32 mda = kvm_apic_mda(vcpu, dest, source, target);
 808 
 809         ASSERT(target);
 810         switch (short_hand) {
 811         case APIC_DEST_NOSHORT:
 812                 if (dest_mode == APIC_DEST_PHYSICAL)
 813                         return kvm_apic_match_physical_addr(target, mda);
 814                 else
 815                         return kvm_apic_match_logical_addr(target, mda);
 816         case APIC_DEST_SELF:
 817                 return target == source;
 818         case APIC_DEST_ALLINC:
 819                 return true;
 820         case APIC_DEST_ALLBUT:
 821                 return target != source;
 822         default:
 823                 return false;
 824         }
 825 }
 826 EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
 827 
 828 int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
 829                        const unsigned long *bitmap, u32 bitmap_size)
 830 {
 831         u32 mod;
 832         int i, idx = -1;
 833 
 834         mod = vector % dest_vcpus;
 835 
 836         for (i = 0; i <= mod; i++) {
 837                 idx = find_next_bit(bitmap, bitmap_size, idx + 1);
 838                 BUG_ON(idx == bitmap_size);
 839         }
 840 
 841         return idx;
 842 }
 843 
 844 static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
 845 {
 846         if (!kvm->arch.disabled_lapic_found) {
 847                 kvm->arch.disabled_lapic_found = true;
 848                 printk(KERN_INFO
 849                        "Disabled LAPIC found during irq injection\n");
 850         }
 851 }
 852 
 853 static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
 854                 struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
 855 {
 856         if (kvm->arch.x2apic_broadcast_quirk_disabled) {
 857                 if ((irq->dest_id == APIC_BROADCAST &&
 858                                 map->mode != KVM_APIC_MODE_X2APIC))
 859                         return true;
 860                 if (irq->dest_id == X2APIC_BROADCAST)
 861                         return true;
 862         } else {
 863                 bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
 864                 if (irq->dest_id == (x2apic_ipi ?
 865                                      X2APIC_BROADCAST : APIC_BROADCAST))
 866                         return true;
 867         }
 868 
 869         return false;
 870 }
 871 
 872 /* Return true if the interrupt can be handled by using *bitmap as index mask
 873  * for valid destinations in *dst array.
 874  * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
 875  * Note: we may have zero kvm_lapic destinations when we return true, which
 876  * means that the interrupt should be dropped.  In this case, *bitmap would be
 877  * zero and *dst undefined.
 878  */
 879 static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
 880                 struct kvm_lapic **src, struct kvm_lapic_irq *irq,
 881                 struct kvm_apic_map *map, struct kvm_lapic ***dst,
 882                 unsigned long *bitmap)
 883 {
 884         int i, lowest;
 885 
 886         if (irq->shorthand == APIC_DEST_SELF && src) {
 887                 *dst = src;
 888                 *bitmap = 1;
 889                 return true;
 890         } else if (irq->shorthand)
 891                 return false;
 892 
 893         if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
 894                 return false;
 895 
 896         if (irq->dest_mode == APIC_DEST_PHYSICAL) {
 897                 if (irq->dest_id > map->max_apic_id) {
 898                         *bitmap = 0;
 899                 } else {
 900                         u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
 901                         *dst = &map->phys_map[dest_id];
 902                         *bitmap = 1;
 903                 }
 904                 return true;
 905         }
 906 
 907         *bitmap = 0;
 908         if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
 909                                 (u16 *)bitmap))
 910                 return false;
 911 
 912         if (!kvm_lowest_prio_delivery(irq))
 913                 return true;
 914 
 915         if (!kvm_vector_hashing_enabled()) {
 916                 lowest = -1;
 917                 for_each_set_bit(i, bitmap, 16) {
 918                         if (!(*dst)[i])
 919                                 continue;
 920                         if (lowest < 0)
 921                                 lowest = i;
 922                         else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
 923                                                 (*dst)[lowest]->vcpu) < 0)
 924                                 lowest = i;
 925                 }
 926         } else {
 927                 if (!*bitmap)
 928                         return true;
 929 
 930                 lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
 931                                 bitmap, 16);
 932 
 933                 if (!(*dst)[lowest]) {
 934                         kvm_apic_disabled_lapic_found(kvm);
 935                         *bitmap = 0;
 936                         return true;
 937                 }
 938         }
 939 
 940         *bitmap = (lowest >= 0) ? 1 << lowest : 0;
 941 
 942         return true;
 943 }
 944 
 945 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 946                 struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
 947 {
 948         struct kvm_apic_map *map;
 949         unsigned long bitmap;
 950         struct kvm_lapic **dst = NULL;
 951         int i;
 952         bool ret;
 953 
 954         *r = -1;
 955 
 956         if (irq->shorthand == APIC_DEST_SELF) {
 957                 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
 958                 return true;
 959         }
 960 
 961         rcu_read_lock();
 962         map = rcu_dereference(kvm->arch.apic_map);
 963 
 964         ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
 965         if (ret) {
 966                 *r = 0;
 967                 for_each_set_bit(i, &bitmap, 16) {
 968                         if (!dst[i])
 969                                 continue;
 970                         *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
 971                 }
 972         }
 973 
 974         rcu_read_unlock();
 975         return ret;
 976 }
 977 
 978 /*
 979  * This routine tries to handler interrupts in posted mode, here is how
 980  * it deals with different cases:
 981  * - For single-destination interrupts, handle it in posted mode
 982  * - Else if vector hashing is enabled and it is a lowest-priority
 983  *   interrupt, handle it in posted mode and use the following mechanism
 984  *   to find the destinaiton vCPU.
 985  *      1. For lowest-priority interrupts, store all the possible
 986  *         destination vCPUs in an array.
 987  *      2. Use "guest vector % max number of destination vCPUs" to find
 988  *         the right destination vCPU in the array for the lowest-priority
 989  *         interrupt.
 990  * - Otherwise, use remapped mode to inject the interrupt.
 991  */
 992 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 993                         struct kvm_vcpu **dest_vcpu)
 994 {
 995         struct kvm_apic_map *map;
 996         unsigned long bitmap;
 997         struct kvm_lapic **dst = NULL;
 998         bool ret = false;
 999 
1000         if (irq->shorthand)
1001                 return false;
1002 
1003         rcu_read_lock();
1004         map = rcu_dereference(kvm->arch.apic_map);
1005 
1006         if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
1007                         hweight16(bitmap) == 1) {
1008                 unsigned long i = find_first_bit(&bitmap, 16);
1009 
1010                 if (dst[i]) {
1011                         *dest_vcpu = dst[i]->vcpu;
1012                         ret = true;
1013                 }
1014         }
1015 
1016         rcu_read_unlock();
1017         return ret;
1018 }
1019 
1020 /*
1021  * Add a pending IRQ into lapic.
1022  * Return 1 if successfully added and 0 if discarded.
1023  */
1024 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1025                              int vector, int level, int trig_mode,
1026                              struct dest_map *dest_map)
1027 {
1028         int result = 0;
1029         struct kvm_vcpu *vcpu = apic->vcpu;
1030 
1031         trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1032                                   trig_mode, vector);
1033         switch (delivery_mode) {
1034         case APIC_DM_LOWEST:
1035                 vcpu->arch.apic_arb_prio++;
1036                 /* fall through */
1037         case APIC_DM_FIXED:
1038                 if (unlikely(trig_mode && !level))
1039                         break;
1040 
1041                 /* FIXME add logic for vcpu on reset */
1042                 if (unlikely(!apic_enabled(apic)))
1043                         break;
1044 
1045                 result = 1;
1046 
1047                 if (dest_map) {
1048                         __set_bit(vcpu->vcpu_id, dest_map->map);
1049                         dest_map->vectors[vcpu->vcpu_id] = vector;
1050                 }
1051 
1052                 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1053                         if (trig_mode)
1054                                 kvm_lapic_set_vector(vector,
1055                                                      apic->regs + APIC_TMR);
1056                         else
1057                                 kvm_lapic_clear_vector(vector,
1058                                                        apic->regs + APIC_TMR);
1059                 }
1060 
1061                 if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) {
1062                         kvm_lapic_set_irr(vector, apic);
1063                         kvm_make_request(KVM_REQ_EVENT, vcpu);
1064                         kvm_vcpu_kick(vcpu);
1065                 }
1066                 break;
1067 
1068         case APIC_DM_REMRD:
1069                 result = 1;
1070                 vcpu->arch.pv.pv_unhalted = 1;
1071                 kvm_make_request(KVM_REQ_EVENT, vcpu);
1072                 kvm_vcpu_kick(vcpu);
1073                 break;
1074 
1075         case APIC_DM_SMI:
1076                 result = 1;
1077                 kvm_make_request(KVM_REQ_SMI, vcpu);
1078                 kvm_vcpu_kick(vcpu);
1079                 break;
1080 
1081         case APIC_DM_NMI:
1082                 result = 1;
1083                 kvm_inject_nmi(vcpu);
1084                 kvm_vcpu_kick(vcpu);
1085                 break;
1086 
1087         case APIC_DM_INIT:
1088                 if (!trig_mode || level) {
1089                         result = 1;
1090                         /* assumes that there are only KVM_APIC_INIT/SIPI */
1091                         apic->pending_events = (1UL << KVM_APIC_INIT);
1092                         /* make sure pending_events is visible before sending
1093                          * the request */
1094                         smp_wmb();
1095                         kvm_make_request(KVM_REQ_EVENT, vcpu);
1096                         kvm_vcpu_kick(vcpu);
1097                 }
1098                 break;
1099 
1100         case APIC_DM_STARTUP:
1101                 result = 1;
1102                 apic->sipi_vector = vector;
1103                 /* make sure sipi_vector is visible for the receiver */
1104                 smp_wmb();
1105                 set_bit(KVM_APIC_SIPI, &apic->pending_events);
1106                 kvm_make_request(KVM_REQ_EVENT, vcpu);
1107                 kvm_vcpu_kick(vcpu);
1108                 break;
1109 
1110         case APIC_DM_EXTINT:
1111                 /*
1112                  * Should only be called by kvm_apic_local_deliver() with LVT0,
1113                  * before NMI watchdog was enabled. Already handled by
1114                  * kvm_apic_accept_pic_intr().
1115                  */
1116                 break;
1117 
1118         default:
1119                 printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1120                        delivery_mode);
1121                 break;
1122         }
1123         return result;
1124 }
1125 
1126 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1127 {
1128         return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1129 }
1130 
1131 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1132 {
1133         return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1134 }
1135 
1136 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1137 {
1138         int trigger_mode;
1139 
1140         /* Eoi the ioapic only if the ioapic doesn't own the vector. */
1141         if (!kvm_ioapic_handles_vector(apic, vector))
1142                 return;
1143 
1144         /* Request a KVM exit to inform the userspace IOAPIC. */
1145         if (irqchip_split(apic->vcpu->kvm)) {
1146                 apic->vcpu->arch.pending_ioapic_eoi = vector;
1147                 kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1148                 return;
1149         }
1150 
1151         if (apic_test_vector(vector, apic->regs + APIC_TMR))
1152                 trigger_mode = IOAPIC_LEVEL_TRIG;
1153         else
1154                 trigger_mode = IOAPIC_EDGE_TRIG;
1155 
1156         kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1157 }
1158 
1159 static int apic_set_eoi(struct kvm_lapic *apic)
1160 {
1161         int vector = apic_find_highest_isr(apic);
1162 
1163         trace_kvm_eoi(apic, vector);
1164 
1165         /*
1166          * Not every write EOI will has corresponding ISR,
1167          * one example is when Kernel check timer on setup_IO_APIC
1168          */
1169         if (vector == -1)
1170                 return vector;
1171 
1172         apic_clear_isr(vector, apic);
1173         apic_update_ppr(apic);
1174 
1175         if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
1176                 kvm_hv_synic_send_eoi(apic->vcpu, vector);
1177 
1178         kvm_ioapic_send_eoi(apic, vector);
1179         kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1180         return vector;
1181 }
1182 
1183 /*
1184  * this interface assumes a trap-like exit, which has already finished
1185  * desired side effect including vISR and vPPR update.
1186  */
1187 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1188 {
1189         struct kvm_lapic *apic = vcpu->arch.apic;
1190 
1191         trace_kvm_eoi(apic, vector);
1192 
1193         kvm_ioapic_send_eoi(apic, vector);
1194         kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1195 }
1196 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1197 
1198 static void apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
1199 {
1200         struct kvm_lapic_irq irq;
1201 
1202         irq.vector = icr_low & APIC_VECTOR_MASK;
1203         irq.delivery_mode = icr_low & APIC_MODE_MASK;
1204         irq.dest_mode = icr_low & APIC_DEST_MASK;
1205         irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1206         irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1207         irq.shorthand = icr_low & APIC_SHORT_MASK;
1208         irq.msi_redir_hint = false;
1209         if (apic_x2apic_mode(apic))
1210                 irq.dest_id = icr_high;
1211         else
1212                 irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
1213 
1214         trace_kvm_apic_ipi(icr_low, irq.dest_id);
1215 
1216         kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1217 }
1218 
1219 static u32 apic_get_tmcct(struct kvm_lapic *apic)
1220 {
1221         ktime_t remaining, now;
1222         s64 ns;
1223         u32 tmcct;
1224 
1225         ASSERT(apic != NULL);
1226 
1227         /* if initial count is 0, current count should also be 0 */
1228         if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1229                 apic->lapic_timer.period == 0)
1230                 return 0;
1231 
1232         now = ktime_get();
1233         remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1234         if (ktime_to_ns(remaining) < 0)
1235                 remaining = 0;
1236 
1237         ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1238         tmcct = div64_u64(ns,
1239                          (APIC_BUS_CYCLE_NS * apic->divide_count));
1240 
1241         return tmcct;
1242 }
1243 
1244 static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1245 {
1246         struct kvm_vcpu *vcpu = apic->vcpu;
1247         struct kvm_run *run = vcpu->run;
1248 
1249         kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1250         run->tpr_access.rip = kvm_rip_read(vcpu);
1251         run->tpr_access.is_write = write;
1252 }
1253 
1254 static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1255 {
1256         if (apic->vcpu->arch.tpr_access_reporting)
1257                 __report_tpr_access(apic, write);
1258 }
1259 
1260 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1261 {
1262         u32 val = 0;
1263 
1264         if (offset >= LAPIC_MMIO_LENGTH)
1265                 return 0;
1266 
1267         switch (offset) {
1268         case APIC_ARBPRI:
1269                 break;
1270 
1271         case APIC_TMCCT:        /* Timer CCR */
1272                 if (apic_lvtt_tscdeadline(apic))
1273                         return 0;
1274 
1275                 val = apic_get_tmcct(apic);
1276                 break;
1277         case APIC_PROCPRI:
1278                 apic_update_ppr(apic);
1279                 val = kvm_lapic_get_reg(apic, offset);
1280                 break;
1281         case APIC_TASKPRI:
1282                 report_tpr_access(apic, false);
1283                 /* fall thru */
1284         default:
1285                 val = kvm_lapic_get_reg(apic, offset);
1286                 break;
1287         }
1288 
1289         return val;
1290 }
1291 
1292 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1293 {
1294         return container_of(dev, struct kvm_lapic, dev);
1295 }
1296 
1297 #define APIC_REG_MASK(reg)      (1ull << ((reg) >> 4))
1298 #define APIC_REGS_MASK(first, count) \
1299         (APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1300 
1301 int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1302                 void *data)
1303 {
1304         unsigned char alignment = offset & 0xf;
1305         u32 result;
1306         /* this bitmask has a bit cleared for each reserved register */
1307         u64 valid_reg_mask =
1308                 APIC_REG_MASK(APIC_ID) |
1309                 APIC_REG_MASK(APIC_LVR) |
1310                 APIC_REG_MASK(APIC_TASKPRI) |
1311                 APIC_REG_MASK(APIC_PROCPRI) |
1312                 APIC_REG_MASK(APIC_LDR) |
1313                 APIC_REG_MASK(APIC_DFR) |
1314                 APIC_REG_MASK(APIC_SPIV) |
1315                 APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1316                 APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1317                 APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1318                 APIC_REG_MASK(APIC_ESR) |
1319                 APIC_REG_MASK(APIC_ICR) |
1320                 APIC_REG_MASK(APIC_ICR2) |
1321                 APIC_REG_MASK(APIC_LVTT) |
1322                 APIC_REG_MASK(APIC_LVTTHMR) |
1323                 APIC_REG_MASK(APIC_LVTPC) |
1324                 APIC_REG_MASK(APIC_LVT0) |
1325                 APIC_REG_MASK(APIC_LVT1) |
1326                 APIC_REG_MASK(APIC_LVTERR) |
1327                 APIC_REG_MASK(APIC_TMICT) |
1328                 APIC_REG_MASK(APIC_TMCCT) |
1329                 APIC_REG_MASK(APIC_TDCR);
1330 
1331         /* ARBPRI is not valid on x2APIC */
1332         if (!apic_x2apic_mode(apic))
1333                 valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
1334 
1335         if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
1336                 return 1;
1337 
1338         result = __apic_read(apic, offset & ~0xf);
1339 
1340         trace_kvm_apic_read(offset, result);
1341 
1342         switch (len) {
1343         case 1:
1344         case 2:
1345         case 4:
1346                 memcpy(data, (char *)&result + alignment, len);
1347                 break;
1348         default:
1349                 printk(KERN_ERR "Local APIC read with len = %x, "
1350                        "should be 1,2, or 4 instead\n", len);
1351                 break;
1352         }
1353         return 0;
1354 }
1355 EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
1356 
1357 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1358 {
1359         return addr >= apic->base_address &&
1360                 addr < apic->base_address + LAPIC_MMIO_LENGTH;
1361 }
1362 
1363 static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1364                            gpa_t address, int len, void *data)
1365 {
1366         struct kvm_lapic *apic = to_lapic(this);
1367         u32 offset = address - apic->base_address;
1368 
1369         if (!apic_mmio_in_range(apic, address))
1370                 return -EOPNOTSUPP;
1371 
1372         if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1373                 if (!kvm_check_has_quirk(vcpu->kvm,
1374                                          KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1375                         return -EOPNOTSUPP;
1376 
1377                 memset(data, 0xff, len);
1378                 return 0;
1379         }
1380 
1381         kvm_lapic_reg_read(apic, offset, len, data);
1382 
1383         return 0;
1384 }
1385 
1386 static void update_divide_count(struct kvm_lapic *apic)
1387 {
1388         u32 tmp1, tmp2, tdcr;
1389 
1390         tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1391         tmp1 = tdcr & 0xf;
1392         tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1393         apic->divide_count = 0x1 << (tmp2 & 0x7);
1394 }
1395 
1396 static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1397 {
1398         /*
1399          * Do not allow the guest to program periodic timers with small
1400          * interval, since the hrtimers are not throttled by the host
1401          * scheduler.
1402          */
1403         if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1404                 s64 min_period = min_timer_period_us * 1000LL;
1405 
1406                 if (apic->lapic_timer.period < min_period) {
1407                         pr_info_ratelimited(
1408                             "kvm: vcpu %i: requested %lld ns "
1409                             "lapic timer period limited to %lld ns\n",
1410                             apic->vcpu->vcpu_id,
1411                             apic->lapic_timer.period, min_period);
1412                         apic->lapic_timer.period = min_period;
1413                 }
1414         }
1415 }
1416 
1417 static void apic_update_lvtt(struct kvm_lapic *apic)
1418 {
1419         u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1420                         apic->lapic_timer.timer_mode_mask;
1421 
1422         if (apic->lapic_timer.timer_mode != timer_mode) {
1423                 if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1424                                 APIC_LVT_TIMER_TSCDEADLINE)) {
1425                         hrtimer_cancel(&apic->lapic_timer.timer);
1426                         kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1427                         apic->lapic_timer.period = 0;
1428                         apic->lapic_timer.tscdeadline = 0;
1429                 }
1430                 apic->lapic_timer.timer_mode = timer_mode;
1431                 limit_periodic_timer_frequency(apic);
1432         }
1433 }
1434 
1435 /*
1436  * On APICv, this test will cause a busy wait
1437  * during a higher-priority task.
1438  */
1439 
1440 static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1441 {
1442         struct kvm_lapic *apic = vcpu->arch.apic;
1443         u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1444 
1445         if (kvm_apic_hw_enabled(apic)) {
1446                 int vec = reg & APIC_VECTOR_MASK;
1447                 void *bitmap = apic->regs + APIC_ISR;
1448 
1449                 if (vcpu->arch.apicv_active)
1450                         bitmap = apic->regs + APIC_IRR;
1451 
1452                 if (apic_test_vector(vec, bitmap))
1453                         return true;
1454         }
1455         return false;
1456 }
1457 
1458 static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1459 {
1460         u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1461 
1462         /*
1463          * If the guest TSC is running at a different ratio than the host, then
1464          * convert the delay to nanoseconds to achieve an accurate delay.  Note
1465          * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1466          * always for VMX enabled hardware.
1467          */
1468         if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
1469                 __delay(min(guest_cycles,
1470                         nsec_to_cycles(vcpu, timer_advance_ns)));
1471         } else {
1472                 u64 delay_ns = guest_cycles * 1000000ULL;
1473                 do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1474                 ndelay(min_t(u32, delay_ns, timer_advance_ns));
1475         }
1476 }
1477 
1478 static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1479                                               s64 advance_expire_delta)
1480 {
1481         struct kvm_lapic *apic = vcpu->arch.apic;
1482         u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1483         u64 ns;
1484 
1485         /* Do not adjust for tiny fluctuations or large random spikes. */
1486         if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1487             abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1488                 return;
1489 
1490         /* too early */
1491         if (advance_expire_delta < 0) {
1492                 ns = -advance_expire_delta * 1000000ULL;
1493                 do_div(ns, vcpu->arch.virtual_tsc_khz);
1494                 timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1495         } else {
1496         /* too late */
1497                 ns = advance_expire_delta * 1000000ULL;
1498                 do_div(ns, vcpu->arch.virtual_tsc_khz);
1499                 timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1500         }
1501 
1502         if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1503                 timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
1504         apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1505 }
1506 
1507 static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1508 {
1509         struct kvm_lapic *apic = vcpu->arch.apic;
1510         u64 guest_tsc, tsc_deadline;
1511 
1512         if (apic->lapic_timer.expired_tscdeadline == 0)
1513                 return;
1514 
1515         tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1516         apic->lapic_timer.expired_tscdeadline = 0;
1517         guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1518         apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
1519 
1520         if (guest_tsc < tsc_deadline)
1521                 __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1522 
1523         if (lapic_timer_advance_dynamic)
1524                 adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
1525 }
1526 
1527 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1528 {
1529         if (lapic_timer_int_injected(vcpu))
1530                 __kvm_wait_lapic_expire(vcpu);
1531 }
1532 EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1533 
1534 static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1535 {
1536         struct kvm_timer *ktimer = &apic->lapic_timer;
1537 
1538         kvm_apic_local_deliver(apic, APIC_LVTT);
1539         if (apic_lvtt_tscdeadline(apic))
1540                 ktimer->tscdeadline = 0;
1541         if (apic_lvtt_oneshot(apic)) {
1542                 ktimer->tscdeadline = 0;
1543                 ktimer->target_expiration = 0;
1544         }
1545 }
1546 
1547 static void apic_timer_expired(struct kvm_lapic *apic)
1548 {
1549         struct kvm_vcpu *vcpu = apic->vcpu;
1550         struct kvm_timer *ktimer = &apic->lapic_timer;
1551 
1552         if (atomic_read(&apic->lapic_timer.pending))
1553                 return;
1554 
1555         if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1556                 ktimer->expired_tscdeadline = ktimer->tscdeadline;
1557 
1558         if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1559                 if (apic->lapic_timer.timer_advance_ns)
1560                         __kvm_wait_lapic_expire(vcpu);
1561                 kvm_apic_inject_pending_timer_irqs(apic);
1562                 return;
1563         }
1564 
1565         atomic_inc(&apic->lapic_timer.pending);
1566         kvm_set_pending_timer(vcpu);
1567 }
1568 
1569 static void start_sw_tscdeadline(struct kvm_lapic *apic)
1570 {
1571         struct kvm_timer *ktimer = &apic->lapic_timer;
1572         u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1573         u64 ns = 0;
1574         ktime_t expire;
1575         struct kvm_vcpu *vcpu = apic->vcpu;
1576         unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1577         unsigned long flags;
1578         ktime_t now;
1579 
1580         if (unlikely(!tscdeadline || !this_tsc_khz))
1581                 return;
1582 
1583         local_irq_save(flags);
1584 
1585         now = ktime_get();
1586         guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1587 
1588         ns = (tscdeadline - guest_tsc) * 1000000ULL;
1589         do_div(ns, this_tsc_khz);
1590 
1591         if (likely(tscdeadline > guest_tsc) &&
1592             likely(ns > apic->lapic_timer.timer_advance_ns)) {
1593                 expire = ktime_add_ns(now, ns);
1594                 expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1595                 hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
1596         } else
1597                 apic_timer_expired(apic);
1598 
1599         local_irq_restore(flags);
1600 }
1601 
1602 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1603 {
1604         ktime_t now, remaining;
1605         u64 ns_remaining_old, ns_remaining_new;
1606 
1607         apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1608                 * APIC_BUS_CYCLE_NS * apic->divide_count;
1609         limit_periodic_timer_frequency(apic);
1610 
1611         now = ktime_get();
1612         remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1613         if (ktime_to_ns(remaining) < 0)
1614                 remaining = 0;
1615 
1616         ns_remaining_old = ktime_to_ns(remaining);
1617         ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1618                                            apic->divide_count, old_divisor);
1619 
1620         apic->lapic_timer.tscdeadline +=
1621                 nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1622                 nsec_to_cycles(apic->vcpu, ns_remaining_old);
1623         apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
1624 }
1625 
1626 static bool set_target_expiration(struct kvm_lapic *apic)
1627 {
1628         ktime_t now;
1629         u64 tscl = rdtsc();
1630 
1631         now = ktime_get();
1632         apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1633                 * APIC_BUS_CYCLE_NS * apic->divide_count;
1634 
1635         if (!apic->lapic_timer.period) {
1636                 apic->lapic_timer.tscdeadline = 0;
1637                 return false;
1638         }
1639 
1640         limit_periodic_timer_frequency(apic);
1641 
1642         apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1643                 nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
1644         apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
1645 
1646         return true;
1647 }
1648 
1649 static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1650 {
1651         ktime_t now = ktime_get();
1652         u64 tscl = rdtsc();
1653         ktime_t delta;
1654 
1655         /*
1656          * Synchronize both deadlines to the same time source or
1657          * differences in the periods (caused by differences in the
1658          * underlying clocks or numerical approximation errors) will
1659          * cause the two to drift apart over time as the errors
1660          * accumulate.
1661          */
1662         apic->lapic_timer.target_expiration =
1663                 ktime_add_ns(apic->lapic_timer.target_expiration,
1664                                 apic->lapic_timer.period);
1665         delta = ktime_sub(apic->lapic_timer.target_expiration, now);
1666         apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1667                 nsec_to_cycles(apic->vcpu, delta);
1668 }
1669 
1670 static void start_sw_period(struct kvm_lapic *apic)
1671 {
1672         if (!apic->lapic_timer.period)
1673                 return;
1674 
1675         if (ktime_after(ktime_get(),
1676                         apic->lapic_timer.target_expiration)) {
1677                 apic_timer_expired(apic);
1678 
1679                 if (apic_lvtt_oneshot(apic))
1680                         return;
1681 
1682                 advance_periodic_target_expiration(apic);
1683         }
1684 
1685         hrtimer_start(&apic->lapic_timer.timer,
1686                 apic->lapic_timer.target_expiration,
1687                 HRTIMER_MODE_ABS);
1688 }
1689 
1690 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1691 {
1692         if (!lapic_in_kernel(vcpu))
1693                 return false;
1694 
1695         return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1696 }
1697 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1698 
1699 static void cancel_hv_timer(struct kvm_lapic *apic)
1700 {
1701         WARN_ON(preemptible());
1702         WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1703         kvm_x86_ops->cancel_hv_timer(apic->vcpu);
1704         apic->lapic_timer.hv_timer_in_use = false;
1705 }
1706 
1707 static bool start_hv_timer(struct kvm_lapic *apic)
1708 {
1709         struct kvm_timer *ktimer = &apic->lapic_timer;
1710         struct kvm_vcpu *vcpu = apic->vcpu;
1711         bool expired;
1712 
1713         WARN_ON(preemptible());
1714         if (!kvm_x86_ops->set_hv_timer)
1715                 return false;
1716 
1717         if (!ktimer->tscdeadline)
1718                 return false;
1719 
1720         if (kvm_x86_ops->set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
1721                 return false;
1722 
1723         ktimer->hv_timer_in_use = true;
1724         hrtimer_cancel(&ktimer->timer);
1725 
1726         /*
1727          * To simplify handling the periodic timer, leave the hv timer running
1728          * even if the deadline timer has expired, i.e. rely on the resulting
1729          * VM-Exit to recompute the periodic timer's target expiration.
1730          */
1731         if (!apic_lvtt_period(apic)) {
1732                 /*
1733                  * Cancel the hv timer if the sw timer fired while the hv timer
1734                  * was being programmed, or if the hv timer itself expired.
1735                  */
1736                 if (atomic_read(&ktimer->pending)) {
1737                         cancel_hv_timer(apic);
1738                 } else if (expired) {
1739                         apic_timer_expired(apic);
1740                         cancel_hv_timer(apic);
1741                 }
1742         }
1743 
1744         trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
1745 
1746         return true;
1747 }
1748 
1749 static void start_sw_timer(struct kvm_lapic *apic)
1750 {
1751         struct kvm_timer *ktimer = &apic->lapic_timer;
1752 
1753         WARN_ON(preemptible());
1754         if (apic->lapic_timer.hv_timer_in_use)
1755                 cancel_hv_timer(apic);
1756         if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1757                 return;
1758 
1759         if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1760                 start_sw_period(apic);
1761         else if (apic_lvtt_tscdeadline(apic))
1762                 start_sw_tscdeadline(apic);
1763         trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
1764 }
1765 
1766 static void restart_apic_timer(struct kvm_lapic *apic)
1767 {
1768         preempt_disable();
1769 
1770         if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
1771                 goto out;
1772 
1773         if (!start_hv_timer(apic))
1774                 start_sw_timer(apic);
1775 out:
1776         preempt_enable();
1777 }
1778 
1779 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1780 {
1781         struct kvm_lapic *apic = vcpu->arch.apic;
1782 
1783         preempt_disable();
1784         /* If the preempt notifier has already run, it also called apic_timer_expired */
1785         if (!apic->lapic_timer.hv_timer_in_use)
1786                 goto out;
1787         WARN_ON(swait_active(&vcpu->wq));
1788         cancel_hv_timer(apic);
1789         apic_timer_expired(apic);
1790 
1791         if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1792                 advance_periodic_target_expiration(apic);
1793                 restart_apic_timer(apic);
1794         }
1795 out:
1796         preempt_enable();
1797 }
1798 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
1799 
1800 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
1801 {
1802         restart_apic_timer(vcpu->arch.apic);
1803 }
1804 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
1805 
1806 void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
1807 {
1808         struct kvm_lapic *apic = vcpu->arch.apic;
1809 
1810         preempt_disable();
1811         /* Possibly the TSC deadline timer is not enabled yet */
1812         if (apic->lapic_timer.hv_timer_in_use)
1813                 start_sw_timer(apic);
1814         preempt_enable();
1815 }
1816 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
1817 
1818 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
1819 {
1820         struct kvm_lapic *apic = vcpu->arch.apic;
1821 
1822         WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1823         restart_apic_timer(apic);
1824 }
1825 
1826 static void start_apic_timer(struct kvm_lapic *apic)
1827 {
1828         atomic_set(&apic->lapic_timer.pending, 0);
1829 
1830         if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1831             && !set_target_expiration(apic))
1832                 return;
1833 
1834         restart_apic_timer(apic);
1835 }
1836 
1837 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
1838 {
1839         bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
1840 
1841         if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
1842                 apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
1843                 if (lvt0_in_nmi_mode) {
1844                         atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1845                 } else
1846                         atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1847         }
1848 }
1849 
1850 int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1851 {
1852         int ret = 0;
1853 
1854         trace_kvm_apic_write(reg, val);
1855 
1856         switch (reg) {
1857         case APIC_ID:           /* Local APIC ID */
1858                 if (!apic_x2apic_mode(apic))
1859                         kvm_apic_set_xapic_id(apic, val >> 24);
1860                 else
1861                         ret = 1;
1862                 break;
1863 
1864         case APIC_TASKPRI:
1865                 report_tpr_access(apic, true);
1866                 apic_set_tpr(apic, val & 0xff);
1867                 break;
1868 
1869         case APIC_EOI:
1870                 apic_set_eoi(apic);
1871                 break;
1872 
1873         case APIC_LDR:
1874                 if (!apic_x2apic_mode(apic))
1875                         kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
1876                 else
1877                         ret = 1;
1878                 break;
1879 
1880         case APIC_DFR:
1881                 if (!apic_x2apic_mode(apic)) {
1882                         kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
1883                         recalculate_apic_map(apic->vcpu->kvm);
1884                 } else
1885                         ret = 1;
1886                 break;
1887 
1888         case APIC_SPIV: {
1889                 u32 mask = 0x3ff;
1890                 if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
1891                         mask |= APIC_SPIV_DIRECTED_EOI;
1892                 apic_set_spiv(apic, val & mask);
1893                 if (!(val & APIC_SPIV_APIC_ENABLED)) {
1894                         int i;
1895                         u32 lvt_val;
1896 
1897                         for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
1898                                 lvt_val = kvm_lapic_get_reg(apic,
1899                                                        APIC_LVTT + 0x10 * i);
1900                                 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
1901                                              lvt_val | APIC_LVT_MASKED);
1902                         }
1903                         apic_update_lvtt(apic);
1904                         atomic_set(&apic->lapic_timer.pending, 0);
1905 
1906                 }
1907                 break;
1908         }
1909         case APIC_ICR:
1910                 /* No delay here, so we always clear the pending bit */
1911                 val &= ~(1 << 12);
1912                 apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
1913                 kvm_lapic_set_reg(apic, APIC_ICR, val);
1914                 break;
1915 
1916         case APIC_ICR2:
1917                 if (!apic_x2apic_mode(apic))
1918                         val &= 0xff000000;
1919                 kvm_lapic_set_reg(apic, APIC_ICR2, val);
1920                 break;
1921 
1922         case APIC_LVT0:
1923                 apic_manage_nmi_watchdog(apic, val);
1924                 /* fall through */
1925         case APIC_LVTTHMR:
1926         case APIC_LVTPC:
1927         case APIC_LVT1:
1928         case APIC_LVTERR: {
1929                 /* TODO: Check vector */
1930                 size_t size;
1931                 u32 index;
1932 
1933                 if (!kvm_apic_sw_enabled(apic))
1934                         val |= APIC_LVT_MASKED;
1935                 size = ARRAY_SIZE(apic_lvt_mask);
1936                 index = array_index_nospec(
1937                                 (reg - APIC_LVTT) >> 4, size);
1938                 val &= apic_lvt_mask[index];
1939                 kvm_lapic_set_reg(apic, reg, val);
1940                 break;
1941         }
1942 
1943         case APIC_LVTT:
1944                 if (!kvm_apic_sw_enabled(apic))
1945                         val |= APIC_LVT_MASKED;
1946                 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
1947                 kvm_lapic_set_reg(apic, APIC_LVTT, val);
1948                 apic_update_lvtt(apic);
1949                 break;
1950 
1951         case APIC_TMICT:
1952                 if (apic_lvtt_tscdeadline(apic))
1953                         break;
1954 
1955                 hrtimer_cancel(&apic->lapic_timer.timer);
1956                 kvm_lapic_set_reg(apic, APIC_TMICT, val);
1957                 start_apic_timer(apic);
1958                 break;
1959 
1960         case APIC_TDCR: {
1961                 uint32_t old_divisor = apic->divide_count;
1962 
1963                 kvm_lapic_set_reg(apic, APIC_TDCR, val);
1964                 update_divide_count(apic);
1965                 if (apic->divide_count != old_divisor &&
1966                                 apic->lapic_timer.period) {
1967                         hrtimer_cancel(&apic->lapic_timer.timer);
1968                         update_target_expiration(apic, old_divisor);
1969                         restart_apic_timer(apic);
1970                 }
1971                 break;
1972         }
1973         case APIC_ESR:
1974                 if (apic_x2apic_mode(apic) && val != 0)
1975                         ret = 1;
1976                 break;
1977 
1978         case APIC_SELF_IPI:
1979                 if (apic_x2apic_mode(apic)) {
1980                         kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
1981                 } else
1982                         ret = 1;
1983                 break;
1984         default:
1985                 ret = 1;
1986                 break;
1987         }
1988 
1989         return ret;
1990 }
1991 EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
1992 
1993 static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1994                             gpa_t address, int len, const void *data)
1995 {
1996         struct kvm_lapic *apic = to_lapic(this);
1997         unsigned int offset = address - apic->base_address;
1998         u32 val;
1999 
2000         if (!apic_mmio_in_range(apic, address))
2001                 return -EOPNOTSUPP;
2002 
2003         if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2004                 if (!kvm_check_has_quirk(vcpu->kvm,
2005                                          KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2006                         return -EOPNOTSUPP;
2007 
2008                 return 0;
2009         }
2010 
2011         /*
2012          * APIC register must be aligned on 128-bits boundary.
2013          * 32/64/128 bits registers must be accessed thru 32 bits.
2014          * Refer SDM 8.4.1
2015          */
2016         if (len != 4 || (offset & 0xf))
2017                 return 0;
2018 
2019         val = *(u32*)data;
2020 
2021         kvm_lapic_reg_write(apic, offset & 0xff0, val);
2022 
2023         return 0;
2024 }
2025 
2026 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2027 {
2028         kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2029 }
2030 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
2031 
2032 /* emulate APIC access in a trap manner */
2033 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2034 {
2035         u32 val = 0;
2036 
2037         /* hw has done the conditional check and inst decode */
2038         offset &= 0xff0;
2039 
2040         kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
2041 
2042         /* TODO: optimize to just emulate side effect w/o one more write */
2043         kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
2044 }
2045 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
2046 
2047 void kvm_free_lapic(struct kvm_vcpu *vcpu)
2048 {
2049         struct kvm_lapic *apic = vcpu->arch.apic;
2050 
2051         if (!vcpu->arch.apic)
2052                 return;
2053 
2054         hrtimer_cancel(&apic->lapic_timer.timer);
2055 
2056         if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
2057                 static_key_slow_dec_deferred(&apic_hw_disabled);
2058 
2059         if (!apic->sw_enabled)
2060                 static_key_slow_dec_deferred(&apic_sw_disabled);
2061 
2062         if (apic->regs)
2063                 free_page((unsigned long)apic->regs);
2064 
2065         kfree(apic);
2066 }
2067 
2068 /*
2069  *----------------------------------------------------------------------
2070  * LAPIC interface
2071  *----------------------------------------------------------------------
2072  */
2073 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2074 {
2075         struct kvm_lapic *apic = vcpu->arch.apic;
2076 
2077         if (!lapic_in_kernel(vcpu) ||
2078                 !apic_lvtt_tscdeadline(apic))
2079                 return 0;
2080 
2081         return apic->lapic_timer.tscdeadline;
2082 }
2083 
2084 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2085 {
2086         struct kvm_lapic *apic = vcpu->arch.apic;
2087 
2088         if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
2089                         apic_lvtt_period(apic))
2090                 return;
2091 
2092         hrtimer_cancel(&apic->lapic_timer.timer);
2093         apic->lapic_timer.tscdeadline = data;
2094         start_apic_timer(apic);
2095 }
2096 
2097 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2098 {
2099         struct kvm_lapic *apic = vcpu->arch.apic;
2100 
2101         apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
2102                      | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
2103 }
2104 
2105 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2106 {
2107         u64 tpr;
2108 
2109         tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
2110 
2111         return (tpr & 0xf0) >> 4;
2112 }
2113 
2114 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
2115 {
2116         u64 old_value = vcpu->arch.apic_base;
2117         struct kvm_lapic *apic = vcpu->arch.apic;
2118 
2119         if (!apic)
2120                 value |= MSR_IA32_APICBASE_BSP;
2121 
2122         vcpu->arch.apic_base = value;
2123 
2124         if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2125                 kvm_update_cpuid(vcpu);
2126 
2127         if (!apic)
2128                 return;
2129 
2130         /* update jump label if enable bit changes */
2131         if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2132                 if (value & MSR_IA32_APICBASE_ENABLE) {
2133                         kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2134                         static_key_slow_dec_deferred(&apic_hw_disabled);
2135                 } else {
2136                         static_key_slow_inc(&apic_hw_disabled.key);
2137                         recalculate_apic_map(vcpu->kvm);
2138                 }
2139         }
2140 
2141         if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
2142                 kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2143 
2144         if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
2145                 kvm_x86_ops->set_virtual_apic_mode(vcpu);
2146 
2147         apic->base_address = apic->vcpu->arch.apic_base &
2148                              MSR_IA32_APICBASE_BASE;
2149 
2150         if ((value & MSR_IA32_APICBASE_ENABLE) &&
2151              apic->base_address != APIC_DEFAULT_PHYS_BASE)
2152                 pr_warn_once("APIC base relocation is unsupported by KVM");
2153 }
2154 
2155 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2156 {
2157         struct kvm_lapic *apic = vcpu->arch.apic;
2158         int i;
2159 
2160         if (!apic)
2161                 return;
2162 
2163         /* Stop the timer in case it's a reset to an active apic */
2164         hrtimer_cancel(&apic->lapic_timer.timer);
2165 
2166         if (!init_event) {
2167                 kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
2168                                          MSR_IA32_APICBASE_ENABLE);
2169                 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2170         }
2171         kvm_apic_set_version(apic->vcpu);
2172 
2173         for (i = 0; i < KVM_APIC_LVT_NUM; i++)
2174                 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
2175         apic_update_lvtt(apic);
2176         if (kvm_vcpu_is_reset_bsp(vcpu) &&
2177             kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2178                 kvm_lapic_set_reg(apic, APIC_LVT0,
2179                              SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2180         apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2181 
2182         kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU);
2183         apic_set_spiv(apic, 0xff);
2184         kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2185         if (!apic_x2apic_mode(apic))
2186                 kvm_apic_set_ldr(apic, 0);
2187         kvm_lapic_set_reg(apic, APIC_ESR, 0);
2188         kvm_lapic_set_reg(apic, APIC_ICR, 0);
2189         kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2190         kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2191         kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2192         for (i = 0; i < 8; i++) {
2193                 kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2194                 kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2195                 kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2196         }
2197         apic->irr_pending = vcpu->arch.apicv_active;
2198         apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
2199         apic->highest_isr_cache = -1;
2200         update_divide_count(apic);
2201         atomic_set(&apic->lapic_timer.pending, 0);
2202         if (kvm_vcpu_is_bsp(vcpu))
2203                 kvm_lapic_set_base(vcpu,
2204                                 vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
2205         vcpu->arch.pv_eoi.msr_val = 0;
2206         apic_update_ppr(apic);
2207         if (vcpu->arch.apicv_active) {
2208                 kvm_x86_ops->apicv_post_state_restore(vcpu);
2209                 kvm_x86_ops->hwapic_irr_update(vcpu, -1);
2210                 kvm_x86_ops->hwapic_isr_update(vcpu, -1);
2211         }
2212 
2213         vcpu->arch.apic_arb_prio = 0;
2214         vcpu->arch.apic_attention = 0;
2215 }
2216 
2217 /*
2218  *----------------------------------------------------------------------
2219  * timer interface
2220  *----------------------------------------------------------------------
2221  */
2222 
2223 static bool lapic_is_periodic(struct kvm_lapic *apic)
2224 {
2225         return apic_lvtt_period(apic);
2226 }
2227 
2228 int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2229 {
2230         struct kvm_lapic *apic = vcpu->arch.apic;
2231 
2232         if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2233                 return atomic_read(&apic->lapic_timer.pending);
2234 
2235         return 0;
2236 }
2237 
2238 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2239 {
2240         u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2241         int vector, mode, trig_mode;
2242 
2243         if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2244                 vector = reg & APIC_VECTOR_MASK;
2245                 mode = reg & APIC_MODE_MASK;
2246                 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2247                 return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
2248                                         NULL);
2249         }
2250         return 0;
2251 }
2252 
2253 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2254 {
2255         struct kvm_lapic *apic = vcpu->arch.apic;
2256 
2257         if (apic)
2258                 kvm_apic_local_deliver(apic, APIC_LVT0);
2259 }
2260 
2261 static const struct kvm_io_device_ops apic_mmio_ops = {
2262         .read     = apic_mmio_read,
2263         .write    = apic_mmio_write,
2264 };
2265 
2266 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2267 {
2268         struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2269         struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2270 
2271         apic_timer_expired(apic);
2272 
2273         if (lapic_is_periodic(apic)) {
2274                 advance_periodic_target_expiration(apic);
2275                 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2276                 return HRTIMER_RESTART;
2277         } else
2278                 return HRTIMER_NORESTART;
2279 }
2280 
2281 int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
2282 {
2283         struct kvm_lapic *apic;
2284 
2285         ASSERT(vcpu != NULL);
2286 
2287         apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
2288         if (!apic)
2289                 goto nomem;
2290 
2291         vcpu->arch.apic = apic;
2292 
2293         apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
2294         if (!apic->regs) {
2295                 printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2296                        vcpu->vcpu_id);
2297                 goto nomem_free_apic;
2298         }
2299         apic->vcpu = vcpu;
2300 
2301         hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2302                      HRTIMER_MODE_ABS_HARD);
2303         apic->lapic_timer.timer.function = apic_timer_fn;
2304         if (timer_advance_ns == -1) {
2305                 apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2306                 lapic_timer_advance_dynamic = true;
2307         } else {
2308                 apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2309                 lapic_timer_advance_dynamic = false;
2310         }
2311 
2312         /*
2313          * APIC is created enabled. This will prevent kvm_lapic_set_base from
2314          * thinking that APIC state has changed.
2315          */
2316         vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2317         static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2318         kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2319 
2320         return 0;
2321 nomem_free_apic:
2322         kfree(apic);
2323         vcpu->arch.apic = NULL;
2324 nomem:
2325         return -ENOMEM;
2326 }
2327 
2328 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2329 {
2330         struct kvm_lapic *apic = vcpu->arch.apic;
2331         u32 ppr;
2332 
2333         if (!kvm_apic_hw_enabled(apic))
2334                 return -1;
2335 
2336         __apic_update_ppr(apic, &ppr);
2337         return apic_has_interrupt_for_ppr(apic, ppr);
2338 }
2339 
2340 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2341 {
2342         u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2343         int r = 0;
2344 
2345         if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2346                 r = 1;
2347         if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2348             GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2349                 r = 1;
2350         return r;
2351 }
2352 
2353 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2354 {
2355         struct kvm_lapic *apic = vcpu->arch.apic;
2356 
2357         if (atomic_read(&apic->lapic_timer.pending) > 0) {
2358                 kvm_apic_inject_pending_timer_irqs(apic);
2359                 atomic_set(&apic->lapic_timer.pending, 0);
2360         }
2361 }
2362 
2363 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2364 {
2365         int vector = kvm_apic_has_interrupt(vcpu);
2366         struct kvm_lapic *apic = vcpu->arch.apic;
2367         u32 ppr;
2368 
2369         if (vector == -1)
2370                 return -1;
2371 
2372         /*
2373          * We get here even with APIC virtualization enabled, if doing
2374          * nested virtualization and L1 runs with the "acknowledge interrupt
2375          * on exit" mode.  Then we cannot inject the interrupt via RVI,
2376          * because the process would deliver it through the IDT.
2377          */
2378 
2379         apic_clear_irr(vector, apic);
2380         if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
2381                 /*
2382                  * For auto-EOI interrupts, there might be another pending
2383                  * interrupt above PPR, so check whether to raise another
2384                  * KVM_REQ_EVENT.
2385                  */
2386                 apic_update_ppr(apic);
2387         } else {
2388                 /*
2389                  * For normal interrupts, PPR has been raised and there cannot
2390                  * be a higher-priority pending interrupt---except if there was
2391                  * a concurrent interrupt injection, but that would have
2392                  * triggered KVM_REQ_EVENT already.
2393                  */
2394                 apic_set_isr(vector, apic);
2395                 __apic_update_ppr(apic, &ppr);
2396         }
2397 
2398         return vector;
2399 }
2400 
2401 static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2402                 struct kvm_lapic_state *s, bool set)
2403 {
2404         if (apic_x2apic_mode(vcpu->arch.apic)) {
2405                 u32 *id = (u32 *)(s->regs + APIC_ID);
2406                 u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2407 
2408                 if (vcpu->kvm->arch.x2apic_format) {
2409                         if (*id != vcpu->vcpu_id)
2410                                 return -EINVAL;
2411                 } else {
2412                         if (set)
2413                                 *id >>= 24;
2414                         else
2415                                 *id <<= 24;
2416                 }
2417 
2418                 /* In x2APIC mode, the LDR is fixed and based on the id */
2419                 if (set)
2420                         *ldr = kvm_apic_calc_x2apic_ldr(*id);
2421         }
2422 
2423         return 0;
2424 }
2425 
2426 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2427 {
2428         memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2429         return kvm_apic_state_fixup(vcpu, s, false);
2430 }
2431 
2432 int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2433 {
2434         struct kvm_lapic *apic = vcpu->arch.apic;
2435         int r;
2436 
2437 
2438         kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2439         /* set SPIV separately to get count of SW disabled APICs right */
2440         apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2441 
2442         r = kvm_apic_state_fixup(vcpu, s, true);
2443         if (r)
2444                 return r;
2445         memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
2446 
2447         recalculate_apic_map(vcpu->kvm);
2448         kvm_apic_set_version(vcpu);
2449 
2450         apic_update_ppr(apic);
2451         hrtimer_cancel(&apic->lapic_timer.timer);
2452         apic_update_lvtt(apic);
2453         apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2454         update_divide_count(apic);
2455         start_apic_timer(apic);
2456         apic->irr_pending = true;
2457         apic->isr_count = vcpu->arch.apicv_active ?
2458                                 1 : count_vectors(apic->regs + APIC_ISR);
2459         apic->highest_isr_cache = -1;
2460         if (vcpu->arch.apicv_active) {
2461                 kvm_x86_ops->apicv_post_state_restore(vcpu);
2462                 kvm_x86_ops->hwapic_irr_update(vcpu,
2463                                 apic_find_highest_irr(apic));
2464                 kvm_x86_ops->hwapic_isr_update(vcpu,
2465                                 apic_find_highest_isr(apic));
2466         }
2467         kvm_make_request(KVM_REQ_EVENT, vcpu);
2468         if (ioapic_in_kernel(vcpu->kvm))
2469                 kvm_rtc_eoi_tracking_restore_one(vcpu);
2470 
2471         vcpu->arch.apic_arb_prio = 0;
2472 
2473         return 0;
2474 }
2475 
2476 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2477 {
2478         struct hrtimer *timer;
2479 
2480         if (!lapic_in_kernel(vcpu) ||
2481                 kvm_can_post_timer_interrupt(vcpu))
2482                 return;
2483 
2484         timer = &vcpu->arch.apic->lapic_timer.timer;
2485         if (hrtimer_cancel(timer))
2486                 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
2487 }
2488 
2489 /*
2490  * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2491  *
2492  * Detect whether guest triggered PV EOI since the
2493  * last entry. If yes, set EOI on guests's behalf.
2494  * Clear PV EOI in guest memory in any case.
2495  */
2496 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2497                                         struct kvm_lapic *apic)
2498 {
2499         bool pending;
2500         int vector;
2501         /*
2502          * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2503          * and KVM_PV_EOI_ENABLED in guest memory as follows:
2504          *
2505          * KVM_APIC_PV_EOI_PENDING is unset:
2506          *      -> host disabled PV EOI.
2507          * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2508          *      -> host enabled PV EOI, guest did not execute EOI yet.
2509          * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2510          *      -> host enabled PV EOI, guest executed EOI.
2511          */
2512         BUG_ON(!pv_eoi_enabled(vcpu));
2513         pending = pv_eoi_get_pending(vcpu);
2514         /*
2515          * Clear pending bit in any case: it will be set again on vmentry.
2516          * While this might not be ideal from performance point of view,
2517          * this makes sure pv eoi is only enabled when we know it's safe.
2518          */
2519         pv_eoi_clr_pending(vcpu);
2520         if (pending)
2521                 return;
2522         vector = apic_set_eoi(apic);
2523         trace_kvm_pv_eoi(apic, vector);
2524 }
2525 
2526 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2527 {
2528         u32 data;
2529 
2530         if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2531                 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2532 
2533         if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2534                 return;
2535 
2536         if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2537                                   sizeof(u32)))
2538                 return;
2539 
2540         apic_set_tpr(vcpu->arch.apic, data & 0xff);
2541 }
2542 
2543 /*
2544  * apic_sync_pv_eoi_to_guest - called before vmentry
2545  *
2546  * Detect whether it's safe to enable PV EOI and
2547  * if yes do so.
2548  */
2549 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2550                                         struct kvm_lapic *apic)
2551 {
2552         if (!pv_eoi_enabled(vcpu) ||
2553             /* IRR set or many bits in ISR: could be nested. */
2554             apic->irr_pending ||
2555             /* Cache not set: could be safe but we don't bother. */
2556             apic->highest_isr_cache == -1 ||
2557             /* Need EOI to update ioapic. */
2558             kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2559                 /*
2560                  * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2561                  * so we need not do anything here.
2562                  */
2563                 return;
2564         }
2565 
2566         pv_eoi_set_pending(apic->vcpu);
2567 }
2568 
2569 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2570 {
2571         u32 data, tpr;
2572         int max_irr, max_isr;
2573         struct kvm_lapic *apic = vcpu->arch.apic;
2574 
2575         apic_sync_pv_eoi_to_guest(vcpu, apic);
2576 
2577         if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2578                 return;
2579 
2580         tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2581         max_irr = apic_find_highest_irr(apic);
2582         if (max_irr < 0)
2583                 max_irr = 0;
2584         max_isr = apic_find_highest_isr(apic);
2585         if (max_isr < 0)
2586                 max_isr = 0;
2587         data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2588 
2589         kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2590                                 sizeof(u32));
2591 }
2592 
2593 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2594 {
2595         if (vapic_addr) {
2596                 if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2597                                         &vcpu->arch.apic->vapic_cache,
2598                                         vapic_addr, sizeof(u32)))
2599                         return -EINVAL;
2600                 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2601         } else {
2602                 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2603         }
2604 
2605         vcpu->arch.apic->vapic_addr = vapic_addr;
2606         return 0;
2607 }
2608 
2609 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2610 {
2611         struct kvm_lapic *apic = vcpu->arch.apic;
2612         u32 reg = (msr - APIC_BASE_MSR) << 4;
2613 
2614         if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2615                 return 1;
2616 
2617         if (reg == APIC_ICR2)
2618                 return 1;
2619 
2620         /* if this is ICR write vector before command */
2621         if (reg == APIC_ICR)
2622                 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2623         return kvm_lapic_reg_write(apic, reg, (u32)data);
2624 }
2625 
2626 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2627 {
2628         struct kvm_lapic *apic = vcpu->arch.apic;
2629         u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
2630 
2631         if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2632                 return 1;
2633 
2634         if (reg == APIC_DFR || reg == APIC_ICR2)
2635                 return 1;
2636 
2637         if (kvm_lapic_reg_read(apic, reg, 4, &low))
2638                 return 1;
2639         if (reg == APIC_ICR)
2640                 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2641 
2642         *data = (((u64)high) << 32) | low;
2643 
2644         return 0;
2645 }
2646 
2647 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2648 {
2649         struct kvm_lapic *apic = vcpu->arch.apic;
2650 
2651         if (!lapic_in_kernel(vcpu))
2652                 return 1;
2653 
2654         /* if this is ICR write vector before command */
2655         if (reg == APIC_ICR)
2656                 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2657         return kvm_lapic_reg_write(apic, reg, (u32)data);
2658 }
2659 
2660 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2661 {
2662         struct kvm_lapic *apic = vcpu->arch.apic;
2663         u32 low, high = 0;
2664 
2665         if (!lapic_in_kernel(vcpu))
2666                 return 1;
2667 
2668         if (kvm_lapic_reg_read(apic, reg, 4, &low))
2669                 return 1;
2670         if (reg == APIC_ICR)
2671                 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2672 
2673         *data = (((u64)high) << 32) | low;
2674 
2675         return 0;
2676 }
2677 
2678 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
2679 {
2680         u64 addr = data & ~KVM_MSR_ENABLED;
2681         struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
2682         unsigned long new_len;
2683 
2684         if (!IS_ALIGNED(addr, 4))
2685                 return 1;
2686 
2687         vcpu->arch.pv_eoi.msr_val = data;
2688         if (!pv_eoi_enabled(vcpu))
2689                 return 0;
2690 
2691         if (addr == ghc->gpa && len <= ghc->len)
2692                 new_len = ghc->len;
2693         else
2694                 new_len = len;
2695 
2696         return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
2697 }
2698 
2699 void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2700 {
2701         struct kvm_lapic *apic = vcpu->arch.apic;
2702         u8 sipi_vector;
2703         unsigned long pe;
2704 
2705         if (!lapic_in_kernel(vcpu) || !apic->pending_events)
2706                 return;
2707 
2708         /*
2709          * INITs are latched while CPU is in specific states
2710          * (SMM, VMX non-root mode, SVM with GIF=0).
2711          * Because a CPU cannot be in these states immediately
2712          * after it has processed an INIT signal (and thus in
2713          * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
2714          * and leave the INIT pending.
2715          */
2716         if (is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu)) {
2717                 WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
2718                 if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
2719                         clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2720                 return;
2721         }
2722 
2723         pe = xchg(&apic->pending_events, 0);
2724         if (test_bit(KVM_APIC_INIT, &pe)) {
2725                 kvm_vcpu_reset(vcpu, true);
2726                 if (kvm_vcpu_is_bsp(apic->vcpu))
2727                         vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2728                 else
2729                         vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
2730         }
2731         if (test_bit(KVM_APIC_SIPI, &pe) &&
2732             vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
2733                 /* evaluate pending_events before reading the vector */
2734                 smp_rmb();
2735                 sipi_vector = apic->sipi_vector;
2736                 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
2737                 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2738         }
2739 }
2740 
2741 void kvm_lapic_init(void)
2742 {
2743         /* do not patch jump label more than once per second */
2744         jump_label_rate_limit(&apic_hw_disabled, HZ);
2745         jump_label_rate_limit(&apic_sw_disabled, HZ);
2746 }
2747 
2748 void kvm_lapic_exit(void)
2749 {
2750         static_key_deferred_flush(&apic_hw_disabled);
2751         static_key_deferred_flush(&apic_sw_disabled);
2752 }

/* [<][>][^][v][top][bottom][index][help] */