1/* 2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University 3 * Author: Christoffer Dall <c.dall@virtualopensystems.com> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License, version 2, as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 */ 18 19#include <linux/mman.h> 20#include <linux/kvm_host.h> 21#include <linux/io.h> 22#include <linux/hugetlb.h> 23#include <trace/events/kvm.h> 24#include <asm/pgalloc.h> 25#include <asm/cacheflush.h> 26#include <asm/kvm_arm.h> 27#include <asm/kvm_mmu.h> 28#include <asm/kvm_mmio.h> 29#include <asm/kvm_asm.h> 30#include <asm/kvm_emulate.h> 31 32#include "trace.h" 33 34extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; 35 36static pgd_t *boot_hyp_pgd; 37static pgd_t *hyp_pgd; 38static pgd_t *merged_hyp_pgd; 39static DEFINE_MUTEX(kvm_hyp_pgd_mutex); 40 41static unsigned long hyp_idmap_start; 42static unsigned long hyp_idmap_end; 43static phys_addr_t hyp_idmap_vector; 44 45#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) 46 47#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) 48#define kvm_pud_huge(_x) pud_huge(_x) 49 50#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) 51#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1) 52 53static bool memslot_is_logging(struct kvm_memory_slot *memslot) 54{ 55 return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY); 56} 57 58/** 59 * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8 60 * @kvm: pointer to kvm structure. 61 * 62 * Interface to HYP function to flush all VM TLB entries 63 */ 64void kvm_flush_remote_tlbs(struct kvm *kvm) 65{ 66 kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); 67} 68 69static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 70{ 71 /* 72 * This function also gets called when dealing with HYP page 73 * tables. As HYP doesn't have an associated struct kvm (and 74 * the HYP page tables are fairly static), we don't do 75 * anything there. 76 */ 77 if (kvm) 78 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); 79} 80 81/* 82 * D-Cache management functions. They take the page table entries by 83 * value, as they are flushing the cache using the kernel mapping (or 84 * kmap on 32bit). 85 */ 86static void kvm_flush_dcache_pte(pte_t pte) 87{ 88 __kvm_flush_dcache_pte(pte); 89} 90 91static void kvm_flush_dcache_pmd(pmd_t pmd) 92{ 93 __kvm_flush_dcache_pmd(pmd); 94} 95 96static void kvm_flush_dcache_pud(pud_t pud) 97{ 98 __kvm_flush_dcache_pud(pud); 99} 100 101static bool kvm_is_device_pfn(unsigned long pfn) 102{ 103 return !pfn_valid(pfn); 104} 105 106/** 107 * stage2_dissolve_pmd() - clear and flush huge PMD entry 108 * @kvm: pointer to kvm structure. 109 * @addr: IPA 110 * @pmd: pmd pointer for IPA 111 * 112 * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all 113 * pages in the range dirty. 114 */ 115static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) 116{ 117 if (!kvm_pmd_huge(*pmd)) 118 return; 119 120 pmd_clear(pmd); 121 kvm_tlb_flush_vmid_ipa(kvm, addr); 122 put_page(virt_to_page(pmd)); 123} 124 125static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 126 int min, int max) 127{ 128 void *page; 129 130 BUG_ON(max > KVM_NR_MEM_OBJS); 131 if (cache->nobjs >= min) 132 return 0; 133 while (cache->nobjs < max) { 134 page = (void *)__get_free_page(PGALLOC_GFP); 135 if (!page) 136 return -ENOMEM; 137 cache->objects[cache->nobjs++] = page; 138 } 139 return 0; 140} 141 142static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) 143{ 144 while (mc->nobjs) 145 free_page((unsigned long)mc->objects[--mc->nobjs]); 146} 147 148static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) 149{ 150 void *p; 151 152 BUG_ON(!mc || !mc->nobjs); 153 p = mc->objects[--mc->nobjs]; 154 return p; 155} 156 157static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) 158{ 159 pud_t *pud_table __maybe_unused = pud_offset(pgd, 0); 160 pgd_clear(pgd); 161 kvm_tlb_flush_vmid_ipa(kvm, addr); 162 pud_free(NULL, pud_table); 163 put_page(virt_to_page(pgd)); 164} 165 166static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) 167{ 168 pmd_t *pmd_table = pmd_offset(pud, 0); 169 VM_BUG_ON(pud_huge(*pud)); 170 pud_clear(pud); 171 kvm_tlb_flush_vmid_ipa(kvm, addr); 172 pmd_free(NULL, pmd_table); 173 put_page(virt_to_page(pud)); 174} 175 176static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) 177{ 178 pte_t *pte_table = pte_offset_kernel(pmd, 0); 179 VM_BUG_ON(kvm_pmd_huge(*pmd)); 180 pmd_clear(pmd); 181 kvm_tlb_flush_vmid_ipa(kvm, addr); 182 pte_free_kernel(NULL, pte_table); 183 put_page(virt_to_page(pmd)); 184} 185 186/* 187 * Unmapping vs dcache management: 188 * 189 * If a guest maps certain memory pages as uncached, all writes will 190 * bypass the data cache and go directly to RAM. However, the CPUs 191 * can still speculate reads (not writes) and fill cache lines with 192 * data. 193 * 194 * Those cache lines will be *clean* cache lines though, so a 195 * clean+invalidate operation is equivalent to an invalidate 196 * operation, because no cache lines are marked dirty. 197 * 198 * Those clean cache lines could be filled prior to an uncached write 199 * by the guest, and the cache coherent IO subsystem would therefore 200 * end up writing old data to disk. 201 * 202 * This is why right after unmapping a page/section and invalidating 203 * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure 204 * the IO subsystem will never hit in the cache. 205 */ 206static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, 207 phys_addr_t addr, phys_addr_t end) 208{ 209 phys_addr_t start_addr = addr; 210 pte_t *pte, *start_pte; 211 212 start_pte = pte = pte_offset_kernel(pmd, addr); 213 do { 214 if (!pte_none(*pte)) { 215 pte_t old_pte = *pte; 216 217 kvm_set_pte(pte, __pte(0)); 218 kvm_tlb_flush_vmid_ipa(kvm, addr); 219 220 /* No need to invalidate the cache for device mappings */ 221 if (!kvm_is_device_pfn(pte_pfn(old_pte))) 222 kvm_flush_dcache_pte(old_pte); 223 224 put_page(virt_to_page(pte)); 225 } 226 } while (pte++, addr += PAGE_SIZE, addr != end); 227 228 if (kvm_pte_table_empty(kvm, start_pte)) 229 clear_pmd_entry(kvm, pmd, start_addr); 230} 231 232static void unmap_pmds(struct kvm *kvm, pud_t *pud, 233 phys_addr_t addr, phys_addr_t end) 234{ 235 phys_addr_t next, start_addr = addr; 236 pmd_t *pmd, *start_pmd; 237 238 start_pmd = pmd = pmd_offset(pud, addr); 239 do { 240 next = kvm_pmd_addr_end(addr, end); 241 if (!pmd_none(*pmd)) { 242 if (kvm_pmd_huge(*pmd)) { 243 pmd_t old_pmd = *pmd; 244 245 pmd_clear(pmd); 246 kvm_tlb_flush_vmid_ipa(kvm, addr); 247 248 kvm_flush_dcache_pmd(old_pmd); 249 250 put_page(virt_to_page(pmd)); 251 } else { 252 unmap_ptes(kvm, pmd, addr, next); 253 } 254 } 255 } while (pmd++, addr = next, addr != end); 256 257 if (kvm_pmd_table_empty(kvm, start_pmd)) 258 clear_pud_entry(kvm, pud, start_addr); 259} 260 261static void unmap_puds(struct kvm *kvm, pgd_t *pgd, 262 phys_addr_t addr, phys_addr_t end) 263{ 264 phys_addr_t next, start_addr = addr; 265 pud_t *pud, *start_pud; 266 267 start_pud = pud = pud_offset(pgd, addr); 268 do { 269 next = kvm_pud_addr_end(addr, end); 270 if (!pud_none(*pud)) { 271 if (pud_huge(*pud)) { 272 pud_t old_pud = *pud; 273 274 pud_clear(pud); 275 kvm_tlb_flush_vmid_ipa(kvm, addr); 276 277 kvm_flush_dcache_pud(old_pud); 278 279 put_page(virt_to_page(pud)); 280 } else { 281 unmap_pmds(kvm, pud, addr, next); 282 } 283 } 284 } while (pud++, addr = next, addr != end); 285 286 if (kvm_pud_table_empty(kvm, start_pud)) 287 clear_pgd_entry(kvm, pgd, start_addr); 288} 289 290 291static void unmap_range(struct kvm *kvm, pgd_t *pgdp, 292 phys_addr_t start, u64 size) 293{ 294 pgd_t *pgd; 295 phys_addr_t addr = start, end = start + size; 296 phys_addr_t next; 297 298 pgd = pgdp + kvm_pgd_index(addr); 299 do { 300 next = kvm_pgd_addr_end(addr, end); 301 if (!pgd_none(*pgd)) 302 unmap_puds(kvm, pgd, addr, next); 303 } while (pgd++, addr = next, addr != end); 304} 305 306static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, 307 phys_addr_t addr, phys_addr_t end) 308{ 309 pte_t *pte; 310 311 pte = pte_offset_kernel(pmd, addr); 312 do { 313 if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte))) 314 kvm_flush_dcache_pte(*pte); 315 } while (pte++, addr += PAGE_SIZE, addr != end); 316} 317 318static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, 319 phys_addr_t addr, phys_addr_t end) 320{ 321 pmd_t *pmd; 322 phys_addr_t next; 323 324 pmd = pmd_offset(pud, addr); 325 do { 326 next = kvm_pmd_addr_end(addr, end); 327 if (!pmd_none(*pmd)) { 328 if (kvm_pmd_huge(*pmd)) 329 kvm_flush_dcache_pmd(*pmd); 330 else 331 stage2_flush_ptes(kvm, pmd, addr, next); 332 } 333 } while (pmd++, addr = next, addr != end); 334} 335 336static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, 337 phys_addr_t addr, phys_addr_t end) 338{ 339 pud_t *pud; 340 phys_addr_t next; 341 342 pud = pud_offset(pgd, addr); 343 do { 344 next = kvm_pud_addr_end(addr, end); 345 if (!pud_none(*pud)) { 346 if (pud_huge(*pud)) 347 kvm_flush_dcache_pud(*pud); 348 else 349 stage2_flush_pmds(kvm, pud, addr, next); 350 } 351 } while (pud++, addr = next, addr != end); 352} 353 354static void stage2_flush_memslot(struct kvm *kvm, 355 struct kvm_memory_slot *memslot) 356{ 357 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; 358 phys_addr_t end = addr + PAGE_SIZE * memslot->npages; 359 phys_addr_t next; 360 pgd_t *pgd; 361 362 pgd = kvm->arch.pgd + kvm_pgd_index(addr); 363 do { 364 next = kvm_pgd_addr_end(addr, end); 365 stage2_flush_puds(kvm, pgd, addr, next); 366 } while (pgd++, addr = next, addr != end); 367} 368 369/** 370 * stage2_flush_vm - Invalidate cache for pages mapped in stage 2 371 * @kvm: The struct kvm pointer 372 * 373 * Go through the stage 2 page tables and invalidate any cache lines 374 * backing memory already mapped to the VM. 375 */ 376static void stage2_flush_vm(struct kvm *kvm) 377{ 378 struct kvm_memslots *slots; 379 struct kvm_memory_slot *memslot; 380 int idx; 381 382 idx = srcu_read_lock(&kvm->srcu); 383 spin_lock(&kvm->mmu_lock); 384 385 slots = kvm_memslots(kvm); 386 kvm_for_each_memslot(memslot, slots) 387 stage2_flush_memslot(kvm, memslot); 388 389 spin_unlock(&kvm->mmu_lock); 390 srcu_read_unlock(&kvm->srcu, idx); 391} 392 393/** 394 * free_boot_hyp_pgd - free HYP boot page tables 395 * 396 * Free the HYP boot page tables. The bounce page is also freed. 397 */ 398void free_boot_hyp_pgd(void) 399{ 400 mutex_lock(&kvm_hyp_pgd_mutex); 401 402 if (boot_hyp_pgd) { 403 unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); 404 unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 405 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); 406 boot_hyp_pgd = NULL; 407 } 408 409 if (hyp_pgd) 410 unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 411 412 mutex_unlock(&kvm_hyp_pgd_mutex); 413} 414 415/** 416 * free_hyp_pgds - free Hyp-mode page tables 417 * 418 * Assumes hyp_pgd is a page table used strictly in Hyp-mode and 419 * therefore contains either mappings in the kernel memory area (above 420 * PAGE_OFFSET), or device mappings in the vmalloc range (from 421 * VMALLOC_START to VMALLOC_END). 422 * 423 * boot_hyp_pgd should only map two pages for the init code. 424 */ 425void free_hyp_pgds(void) 426{ 427 unsigned long addr; 428 429 free_boot_hyp_pgd(); 430 431 mutex_lock(&kvm_hyp_pgd_mutex); 432 433 if (hyp_pgd) { 434 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) 435 unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 436 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) 437 unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 438 439 free_pages((unsigned long)hyp_pgd, hyp_pgd_order); 440 hyp_pgd = NULL; 441 } 442 if (merged_hyp_pgd) { 443 clear_page(merged_hyp_pgd); 444 free_page((unsigned long)merged_hyp_pgd); 445 merged_hyp_pgd = NULL; 446 } 447 448 mutex_unlock(&kvm_hyp_pgd_mutex); 449} 450 451static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, 452 unsigned long end, unsigned long pfn, 453 pgprot_t prot) 454{ 455 pte_t *pte; 456 unsigned long addr; 457 458 addr = start; 459 do { 460 pte = pte_offset_kernel(pmd, addr); 461 kvm_set_pte(pte, pfn_pte(pfn, prot)); 462 get_page(virt_to_page(pte)); 463 kvm_flush_dcache_to_poc(pte, sizeof(*pte)); 464 pfn++; 465 } while (addr += PAGE_SIZE, addr != end); 466} 467 468static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, 469 unsigned long end, unsigned long pfn, 470 pgprot_t prot) 471{ 472 pmd_t *pmd; 473 pte_t *pte; 474 unsigned long addr, next; 475 476 addr = start; 477 do { 478 pmd = pmd_offset(pud, addr); 479 480 BUG_ON(pmd_sect(*pmd)); 481 482 if (pmd_none(*pmd)) { 483 pte = pte_alloc_one_kernel(NULL, addr); 484 if (!pte) { 485 kvm_err("Cannot allocate Hyp pte\n"); 486 return -ENOMEM; 487 } 488 pmd_populate_kernel(NULL, pmd, pte); 489 get_page(virt_to_page(pmd)); 490 kvm_flush_dcache_to_poc(pmd, sizeof(*pmd)); 491 } 492 493 next = pmd_addr_end(addr, end); 494 495 create_hyp_pte_mappings(pmd, addr, next, pfn, prot); 496 pfn += (next - addr) >> PAGE_SHIFT; 497 } while (addr = next, addr != end); 498 499 return 0; 500} 501 502static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start, 503 unsigned long end, unsigned long pfn, 504 pgprot_t prot) 505{ 506 pud_t *pud; 507 pmd_t *pmd; 508 unsigned long addr, next; 509 int ret; 510 511 addr = start; 512 do { 513 pud = pud_offset(pgd, addr); 514 515 if (pud_none_or_clear_bad(pud)) { 516 pmd = pmd_alloc_one(NULL, addr); 517 if (!pmd) { 518 kvm_err("Cannot allocate Hyp pmd\n"); 519 return -ENOMEM; 520 } 521 pud_populate(NULL, pud, pmd); 522 get_page(virt_to_page(pud)); 523 kvm_flush_dcache_to_poc(pud, sizeof(*pud)); 524 } 525 526 next = pud_addr_end(addr, end); 527 ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); 528 if (ret) 529 return ret; 530 pfn += (next - addr) >> PAGE_SHIFT; 531 } while (addr = next, addr != end); 532 533 return 0; 534} 535 536static int __create_hyp_mappings(pgd_t *pgdp, 537 unsigned long start, unsigned long end, 538 unsigned long pfn, pgprot_t prot) 539{ 540 pgd_t *pgd; 541 pud_t *pud; 542 unsigned long addr, next; 543 int err = 0; 544 545 mutex_lock(&kvm_hyp_pgd_mutex); 546 addr = start & PAGE_MASK; 547 end = PAGE_ALIGN(end); 548 do { 549 pgd = pgdp + pgd_index(addr); 550 551 if (pgd_none(*pgd)) { 552 pud = pud_alloc_one(NULL, addr); 553 if (!pud) { 554 kvm_err("Cannot allocate Hyp pud\n"); 555 err = -ENOMEM; 556 goto out; 557 } 558 pgd_populate(NULL, pgd, pud); 559 get_page(virt_to_page(pgd)); 560 kvm_flush_dcache_to_poc(pgd, sizeof(*pgd)); 561 } 562 563 next = pgd_addr_end(addr, end); 564 err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot); 565 if (err) 566 goto out; 567 pfn += (next - addr) >> PAGE_SHIFT; 568 } while (addr = next, addr != end); 569out: 570 mutex_unlock(&kvm_hyp_pgd_mutex); 571 return err; 572} 573 574static phys_addr_t kvm_kaddr_to_phys(void *kaddr) 575{ 576 if (!is_vmalloc_addr(kaddr)) { 577 BUG_ON(!virt_addr_valid(kaddr)); 578 return __pa(kaddr); 579 } else { 580 return page_to_phys(vmalloc_to_page(kaddr)) + 581 offset_in_page(kaddr); 582 } 583} 584 585/** 586 * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode 587 * @from: The virtual kernel start address of the range 588 * @to: The virtual kernel end address of the range (exclusive) 589 * 590 * The same virtual address as the kernel virtual address is also used 591 * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying 592 * physical pages. 593 */ 594int create_hyp_mappings(void *from, void *to) 595{ 596 phys_addr_t phys_addr; 597 unsigned long virt_addr; 598 unsigned long start = KERN_TO_HYP((unsigned long)from); 599 unsigned long end = KERN_TO_HYP((unsigned long)to); 600 601 start = start & PAGE_MASK; 602 end = PAGE_ALIGN(end); 603 604 for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { 605 int err; 606 607 phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); 608 err = __create_hyp_mappings(hyp_pgd, virt_addr, 609 virt_addr + PAGE_SIZE, 610 __phys_to_pfn(phys_addr), 611 PAGE_HYP); 612 if (err) 613 return err; 614 } 615 616 return 0; 617} 618 619/** 620 * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode 621 * @from: The kernel start VA of the range 622 * @to: The kernel end VA of the range (exclusive) 623 * @phys_addr: The physical start address which gets mapped 624 * 625 * The resulting HYP VA is the same as the kernel VA, modulo 626 * HYP_PAGE_OFFSET. 627 */ 628int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) 629{ 630 unsigned long start = KERN_TO_HYP((unsigned long)from); 631 unsigned long end = KERN_TO_HYP((unsigned long)to); 632 633 /* Check for a valid kernel IO mapping */ 634 if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) 635 return -EINVAL; 636 637 return __create_hyp_mappings(hyp_pgd, start, end, 638 __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); 639} 640 641/* Free the HW pgd, one page at a time */ 642static void kvm_free_hwpgd(void *hwpgd) 643{ 644 free_pages_exact(hwpgd, kvm_get_hwpgd_size()); 645} 646 647/* Allocate the HW PGD, making sure that each page gets its own refcount */ 648static void *kvm_alloc_hwpgd(void) 649{ 650 unsigned int size = kvm_get_hwpgd_size(); 651 652 return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); 653} 654 655/** 656 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. 657 * @kvm: The KVM struct pointer for the VM. 658 * 659 * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can 660 * support either full 40-bit input addresses or limited to 32-bit input 661 * addresses). Clears the allocated pages. 662 * 663 * Note we don't need locking here as this is only called when the VM is 664 * created, which can only be done once. 665 */ 666int kvm_alloc_stage2_pgd(struct kvm *kvm) 667{ 668 pgd_t *pgd; 669 void *hwpgd; 670 671 if (kvm->arch.pgd != NULL) { 672 kvm_err("kvm_arch already initialized?\n"); 673 return -EINVAL; 674 } 675 676 hwpgd = kvm_alloc_hwpgd(); 677 if (!hwpgd) 678 return -ENOMEM; 679 680 /* When the kernel uses more levels of page tables than the 681 * guest, we allocate a fake PGD and pre-populate it to point 682 * to the next-level page table, which will be the real 683 * initial page table pointed to by the VTTBR. 684 * 685 * When KVM_PREALLOC_LEVEL==2, we allocate a single page for 686 * the PMD and the kernel will use folded pud. 687 * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD 688 * pages. 689 */ 690 if (KVM_PREALLOC_LEVEL > 0) { 691 int i; 692 693 /* 694 * Allocate fake pgd for the page table manipulation macros to 695 * work. This is not used by the hardware and we have no 696 * alignment requirement for this allocation. 697 */ 698 pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), 699 GFP_KERNEL | __GFP_ZERO); 700 701 if (!pgd) { 702 kvm_free_hwpgd(hwpgd); 703 return -ENOMEM; 704 } 705 706 /* Plug the HW PGD into the fake one. */ 707 for (i = 0; i < PTRS_PER_S2_PGD; i++) { 708 if (KVM_PREALLOC_LEVEL == 1) 709 pgd_populate(NULL, pgd + i, 710 (pud_t *)hwpgd + i * PTRS_PER_PUD); 711 else if (KVM_PREALLOC_LEVEL == 2) 712 pud_populate(NULL, pud_offset(pgd, 0) + i, 713 (pmd_t *)hwpgd + i * PTRS_PER_PMD); 714 } 715 } else { 716 /* 717 * Allocate actual first-level Stage-2 page table used by the 718 * hardware for Stage-2 page table walks. 719 */ 720 pgd = (pgd_t *)hwpgd; 721 } 722 723 kvm_clean_pgd(pgd); 724 kvm->arch.pgd = pgd; 725 return 0; 726} 727 728/** 729 * unmap_stage2_range -- Clear stage2 page table entries to unmap a range 730 * @kvm: The VM pointer 731 * @start: The intermediate physical base address of the range to unmap 732 * @size: The size of the area to unmap 733 * 734 * Clear a range of stage-2 mappings, lowering the various ref-counts. Must 735 * be called while holding mmu_lock (unless for freeing the stage2 pgd before 736 * destroying the VM), otherwise another faulting VCPU may come in and mess 737 * with things behind our backs. 738 */ 739static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) 740{ 741 unmap_range(kvm, kvm->arch.pgd, start, size); 742} 743 744static void stage2_unmap_memslot(struct kvm *kvm, 745 struct kvm_memory_slot *memslot) 746{ 747 hva_t hva = memslot->userspace_addr; 748 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; 749 phys_addr_t size = PAGE_SIZE * memslot->npages; 750 hva_t reg_end = hva + size; 751 752 /* 753 * A memory region could potentially cover multiple VMAs, and any holes 754 * between them, so iterate over all of them to find out if we should 755 * unmap any of them. 756 * 757 * +--------------------------------------------+ 758 * +---------------+----------------+ +----------------+ 759 * | : VMA 1 | VMA 2 | | VMA 3 : | 760 * +---------------+----------------+ +----------------+ 761 * | memory region | 762 * +--------------------------------------------+ 763 */ 764 do { 765 struct vm_area_struct *vma = find_vma(current->mm, hva); 766 hva_t vm_start, vm_end; 767 768 if (!vma || vma->vm_start >= reg_end) 769 break; 770 771 /* 772 * Take the intersection of this VMA with the memory region 773 */ 774 vm_start = max(hva, vma->vm_start); 775 vm_end = min(reg_end, vma->vm_end); 776 777 if (!(vma->vm_flags & VM_PFNMAP)) { 778 gpa_t gpa = addr + (vm_start - memslot->userspace_addr); 779 unmap_stage2_range(kvm, gpa, vm_end - vm_start); 780 } 781 hva = vm_end; 782 } while (hva < reg_end); 783} 784 785/** 786 * stage2_unmap_vm - Unmap Stage-2 RAM mappings 787 * @kvm: The struct kvm pointer 788 * 789 * Go through the memregions and unmap any reguler RAM 790 * backing memory already mapped to the VM. 791 */ 792void stage2_unmap_vm(struct kvm *kvm) 793{ 794 struct kvm_memslots *slots; 795 struct kvm_memory_slot *memslot; 796 int idx; 797 798 idx = srcu_read_lock(&kvm->srcu); 799 spin_lock(&kvm->mmu_lock); 800 801 slots = kvm_memslots(kvm); 802 kvm_for_each_memslot(memslot, slots) 803 stage2_unmap_memslot(kvm, memslot); 804 805 spin_unlock(&kvm->mmu_lock); 806 srcu_read_unlock(&kvm->srcu, idx); 807} 808 809/** 810 * kvm_free_stage2_pgd - free all stage-2 tables 811 * @kvm: The KVM struct pointer for the VM. 812 * 813 * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all 814 * underlying level-2 and level-3 tables before freeing the actual level-1 table 815 * and setting the struct pointer to NULL. 816 * 817 * Note we don't need locking here as this is only called when the VM is 818 * destroyed, which can only be done once. 819 */ 820void kvm_free_stage2_pgd(struct kvm *kvm) 821{ 822 if (kvm->arch.pgd == NULL) 823 return; 824 825 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); 826 kvm_free_hwpgd(kvm_get_hwpgd(kvm)); 827 if (KVM_PREALLOC_LEVEL > 0) 828 kfree(kvm->arch.pgd); 829 830 kvm->arch.pgd = NULL; 831} 832 833static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 834 phys_addr_t addr) 835{ 836 pgd_t *pgd; 837 pud_t *pud; 838 839 pgd = kvm->arch.pgd + kvm_pgd_index(addr); 840 if (WARN_ON(pgd_none(*pgd))) { 841 if (!cache) 842 return NULL; 843 pud = mmu_memory_cache_alloc(cache); 844 pgd_populate(NULL, pgd, pud); 845 get_page(virt_to_page(pgd)); 846 } 847 848 return pud_offset(pgd, addr); 849} 850 851static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 852 phys_addr_t addr) 853{ 854 pud_t *pud; 855 pmd_t *pmd; 856 857 pud = stage2_get_pud(kvm, cache, addr); 858 if (pud_none(*pud)) { 859 if (!cache) 860 return NULL; 861 pmd = mmu_memory_cache_alloc(cache); 862 pud_populate(NULL, pud, pmd); 863 get_page(virt_to_page(pud)); 864 } 865 866 return pmd_offset(pud, addr); 867} 868 869static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache 870 *cache, phys_addr_t addr, const pmd_t *new_pmd) 871{ 872 pmd_t *pmd, old_pmd; 873 874 pmd = stage2_get_pmd(kvm, cache, addr); 875 VM_BUG_ON(!pmd); 876 877 /* 878 * Mapping in huge pages should only happen through a fault. If a 879 * page is merged into a transparent huge page, the individual 880 * subpages of that huge page should be unmapped through MMU 881 * notifiers before we get here. 882 * 883 * Merging of CompoundPages is not supported; they should become 884 * splitting first, unmapped, merged, and mapped back in on-demand. 885 */ 886 VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); 887 888 old_pmd = *pmd; 889 if (pmd_present(old_pmd)) { 890 pmd_clear(pmd); 891 kvm_tlb_flush_vmid_ipa(kvm, addr); 892 } else { 893 get_page(virt_to_page(pmd)); 894 } 895 896 kvm_set_pmd(pmd, *new_pmd); 897 return 0; 898} 899 900static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 901 phys_addr_t addr, const pte_t *new_pte, 902 unsigned long flags) 903{ 904 pmd_t *pmd; 905 pte_t *pte, old_pte; 906 bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP; 907 bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE; 908 909 VM_BUG_ON(logging_active && !cache); 910 911 /* Create stage-2 page table mapping - Levels 0 and 1 */ 912 pmd = stage2_get_pmd(kvm, cache, addr); 913 if (!pmd) { 914 /* 915 * Ignore calls from kvm_set_spte_hva for unallocated 916 * address ranges. 917 */ 918 return 0; 919 } 920 921 /* 922 * While dirty page logging - dissolve huge PMD, then continue on to 923 * allocate page. 924 */ 925 if (logging_active) 926 stage2_dissolve_pmd(kvm, addr, pmd); 927 928 /* Create stage-2 page mappings - Level 2 */ 929 if (pmd_none(*pmd)) { 930 if (!cache) 931 return 0; /* ignore calls from kvm_set_spte_hva */ 932 pte = mmu_memory_cache_alloc(cache); 933 kvm_clean_pte(pte); 934 pmd_populate_kernel(NULL, pmd, pte); 935 get_page(virt_to_page(pmd)); 936 } 937 938 pte = pte_offset_kernel(pmd, addr); 939 940 if (iomap && pte_present(*pte)) 941 return -EFAULT; 942 943 /* Create 2nd stage page table mapping - Level 3 */ 944 old_pte = *pte; 945 if (pte_present(old_pte)) { 946 kvm_set_pte(pte, __pte(0)); 947 kvm_tlb_flush_vmid_ipa(kvm, addr); 948 } else { 949 get_page(virt_to_page(pte)); 950 } 951 952 kvm_set_pte(pte, *new_pte); 953 return 0; 954} 955 956/** 957 * kvm_phys_addr_ioremap - map a device range to guest IPA 958 * 959 * @kvm: The KVM pointer 960 * @guest_ipa: The IPA at which to insert the mapping 961 * @pa: The physical address of the device 962 * @size: The size of the mapping 963 */ 964int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, 965 phys_addr_t pa, unsigned long size, bool writable) 966{ 967 phys_addr_t addr, end; 968 int ret = 0; 969 unsigned long pfn; 970 struct kvm_mmu_memory_cache cache = { 0, }; 971 972 end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK; 973 pfn = __phys_to_pfn(pa); 974 975 for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { 976 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); 977 978 if (writable) 979 kvm_set_s2pte_writable(&pte); 980 981 ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, 982 KVM_NR_MEM_OBJS); 983 if (ret) 984 goto out; 985 spin_lock(&kvm->mmu_lock); 986 ret = stage2_set_pte(kvm, &cache, addr, &pte, 987 KVM_S2PTE_FLAG_IS_IOMAP); 988 spin_unlock(&kvm->mmu_lock); 989 if (ret) 990 goto out; 991 992 pfn++; 993 } 994 995out: 996 mmu_free_memory_cache(&cache); 997 return ret; 998} 999 1000static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) 1001{ 1002 pfn_t pfn = *pfnp; 1003 gfn_t gfn = *ipap >> PAGE_SHIFT; 1004 1005 if (PageTransCompound(pfn_to_page(pfn))) { 1006 unsigned long mask; 1007 /* 1008 * The address we faulted on is backed by a transparent huge 1009 * page. However, because we map the compound huge page and 1010 * not the individual tail page, we need to transfer the 1011 * refcount to the head page. We have to be careful that the 1012 * THP doesn't start to split while we are adjusting the 1013 * refcounts. 1014 * 1015 * We are sure this doesn't happen, because mmu_notifier_retry 1016 * was successful and we are holding the mmu_lock, so if this 1017 * THP is trying to split, it will be blocked in the mmu 1018 * notifier before touching any of the pages, specifically 1019 * before being able to call __split_huge_page_refcount(). 1020 * 1021 * We can therefore safely transfer the refcount from PG_tail 1022 * to PG_head and switch the pfn from a tail page to the head 1023 * page accordingly. 1024 */ 1025 mask = PTRS_PER_PMD - 1; 1026 VM_BUG_ON((gfn & mask) != (pfn & mask)); 1027 if (pfn & mask) { 1028 *ipap &= PMD_MASK; 1029 kvm_release_pfn_clean(pfn); 1030 pfn &= ~mask; 1031 kvm_get_pfn(pfn); 1032 *pfnp = pfn; 1033 } 1034 1035 return true; 1036 } 1037 1038 return false; 1039} 1040 1041static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) 1042{ 1043 if (kvm_vcpu_trap_is_iabt(vcpu)) 1044 return false; 1045 1046 return kvm_vcpu_dabt_iswrite(vcpu); 1047} 1048 1049/** 1050 * stage2_wp_ptes - write protect PMD range 1051 * @pmd: pointer to pmd entry 1052 * @addr: range start address 1053 * @end: range end address 1054 */ 1055static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) 1056{ 1057 pte_t *pte; 1058 1059 pte = pte_offset_kernel(pmd, addr); 1060 do { 1061 if (!pte_none(*pte)) { 1062 if (!kvm_s2pte_readonly(pte)) 1063 kvm_set_s2pte_readonly(pte); 1064 } 1065 } while (pte++, addr += PAGE_SIZE, addr != end); 1066} 1067 1068/** 1069 * stage2_wp_pmds - write protect PUD range 1070 * @pud: pointer to pud entry 1071 * @addr: range start address 1072 * @end: range end address 1073 */ 1074static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) 1075{ 1076 pmd_t *pmd; 1077 phys_addr_t next; 1078 1079 pmd = pmd_offset(pud, addr); 1080 1081 do { 1082 next = kvm_pmd_addr_end(addr, end); 1083 if (!pmd_none(*pmd)) { 1084 if (kvm_pmd_huge(*pmd)) { 1085 if (!kvm_s2pmd_readonly(pmd)) 1086 kvm_set_s2pmd_readonly(pmd); 1087 } else { 1088 stage2_wp_ptes(pmd, addr, next); 1089 } 1090 } 1091 } while (pmd++, addr = next, addr != end); 1092} 1093 1094/** 1095 * stage2_wp_puds - write protect PGD range 1096 * @pgd: pointer to pgd entry 1097 * @addr: range start address 1098 * @end: range end address 1099 * 1100 * Process PUD entries, for a huge PUD we cause a panic. 1101 */ 1102static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) 1103{ 1104 pud_t *pud; 1105 phys_addr_t next; 1106 1107 pud = pud_offset(pgd, addr); 1108 do { 1109 next = kvm_pud_addr_end(addr, end); 1110 if (!pud_none(*pud)) { 1111 /* TODO:PUD not supported, revisit later if supported */ 1112 BUG_ON(kvm_pud_huge(*pud)); 1113 stage2_wp_pmds(pud, addr, next); 1114 } 1115 } while (pud++, addr = next, addr != end); 1116} 1117 1118/** 1119 * stage2_wp_range() - write protect stage2 memory region range 1120 * @kvm: The KVM pointer 1121 * @addr: Start address of range 1122 * @end: End address of range 1123 */ 1124static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) 1125{ 1126 pgd_t *pgd; 1127 phys_addr_t next; 1128 1129 pgd = kvm->arch.pgd + kvm_pgd_index(addr); 1130 do { 1131 /* 1132 * Release kvm_mmu_lock periodically if the memory region is 1133 * large. Otherwise, we may see kernel panics with 1134 * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR, 1135 * CONFIG_LOCKDEP. Additionally, holding the lock too long 1136 * will also starve other vCPUs. 1137 */ 1138 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) 1139 cond_resched_lock(&kvm->mmu_lock); 1140 1141 next = kvm_pgd_addr_end(addr, end); 1142 if (pgd_present(*pgd)) 1143 stage2_wp_puds(pgd, addr, next); 1144 } while (pgd++, addr = next, addr != end); 1145} 1146 1147/** 1148 * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot 1149 * @kvm: The KVM pointer 1150 * @slot: The memory slot to write protect 1151 * 1152 * Called to start logging dirty pages after memory region 1153 * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns 1154 * all present PMD and PTEs are write protected in the memory region. 1155 * Afterwards read of dirty page log can be called. 1156 * 1157 * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired, 1158 * serializing operations for VM memory regions. 1159 */ 1160void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) 1161{ 1162 struct kvm_memslots *slots = kvm_memslots(kvm); 1163 struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); 1164 phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; 1165 phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; 1166 1167 spin_lock(&kvm->mmu_lock); 1168 stage2_wp_range(kvm, start, end); 1169 spin_unlock(&kvm->mmu_lock); 1170 kvm_flush_remote_tlbs(kvm); 1171} 1172 1173/** 1174 * kvm_mmu_write_protect_pt_masked() - write protect dirty pages 1175 * @kvm: The KVM pointer 1176 * @slot: The memory slot associated with mask 1177 * @gfn_offset: The gfn offset in memory slot 1178 * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory 1179 * slot to be write protected 1180 * 1181 * Walks bits set in mask write protects the associated pte's. Caller must 1182 * acquire kvm_mmu_lock. 1183 */ 1184static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, 1185 struct kvm_memory_slot *slot, 1186 gfn_t gfn_offset, unsigned long mask) 1187{ 1188 phys_addr_t base_gfn = slot->base_gfn + gfn_offset; 1189 phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; 1190 phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; 1191 1192 stage2_wp_range(kvm, start, end); 1193} 1194 1195/* 1196 * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected 1197 * dirty pages. 1198 * 1199 * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to 1200 * enable dirty logging for them. 1201 */ 1202void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, 1203 struct kvm_memory_slot *slot, 1204 gfn_t gfn_offset, unsigned long mask) 1205{ 1206 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); 1207} 1208 1209static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, 1210 unsigned long size, bool uncached) 1211{ 1212 __coherent_cache_guest_page(vcpu, pfn, size, uncached); 1213} 1214 1215static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 1216 struct kvm_memory_slot *memslot, unsigned long hva, 1217 unsigned long fault_status) 1218{ 1219 int ret; 1220 bool write_fault, writable, hugetlb = false, force_pte = false; 1221 unsigned long mmu_seq; 1222 gfn_t gfn = fault_ipa >> PAGE_SHIFT; 1223 struct kvm *kvm = vcpu->kvm; 1224 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 1225 struct vm_area_struct *vma; 1226 pfn_t pfn; 1227 pgprot_t mem_type = PAGE_S2; 1228 bool fault_ipa_uncached; 1229 bool logging_active = memslot_is_logging(memslot); 1230 unsigned long flags = 0; 1231 1232 write_fault = kvm_is_write_fault(vcpu); 1233 if (fault_status == FSC_PERM && !write_fault) { 1234 kvm_err("Unexpected L2 read permission error\n"); 1235 return -EFAULT; 1236 } 1237 1238 /* Let's check if we will get back a huge page backed by hugetlbfs */ 1239 down_read(¤t->mm->mmap_sem); 1240 vma = find_vma_intersection(current->mm, hva, hva + 1); 1241 if (unlikely(!vma)) { 1242 kvm_err("Failed to find VMA for hva 0x%lx\n", hva); 1243 up_read(¤t->mm->mmap_sem); 1244 return -EFAULT; 1245 } 1246 1247 if (is_vm_hugetlb_page(vma) && !logging_active) { 1248 hugetlb = true; 1249 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; 1250 } else { 1251 /* 1252 * Pages belonging to memslots that don't have the same 1253 * alignment for userspace and IPA cannot be mapped using 1254 * block descriptors even if the pages belong to a THP for 1255 * the process, because the stage-2 block descriptor will 1256 * cover more than a single THP and we loose atomicity for 1257 * unmapping, updates, and splits of the THP or other pages 1258 * in the stage-2 block range. 1259 */ 1260 if ((memslot->userspace_addr & ~PMD_MASK) != 1261 ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK)) 1262 force_pte = true; 1263 } 1264 up_read(¤t->mm->mmap_sem); 1265 1266 /* We need minimum second+third level pages */ 1267 ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, 1268 KVM_NR_MEM_OBJS); 1269 if (ret) 1270 return ret; 1271 1272 mmu_seq = vcpu->kvm->mmu_notifier_seq; 1273 /* 1274 * Ensure the read of mmu_notifier_seq happens before we call 1275 * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk 1276 * the page we just got a reference to gets unmapped before we have a 1277 * chance to grab the mmu_lock, which ensure that if the page gets 1278 * unmapped afterwards, the call to kvm_unmap_hva will take it away 1279 * from us again properly. This smp_rmb() interacts with the smp_wmb() 1280 * in kvm_mmu_notifier_invalidate_<page|range_end>. 1281 */ 1282 smp_rmb(); 1283 1284 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); 1285 if (is_error_pfn(pfn)) 1286 return -EFAULT; 1287 1288 if (kvm_is_device_pfn(pfn)) { 1289 mem_type = PAGE_S2_DEVICE; 1290 flags |= KVM_S2PTE_FLAG_IS_IOMAP; 1291 } else if (logging_active) { 1292 /* 1293 * Faults on pages in a memslot with logging enabled 1294 * should not be mapped with huge pages (it introduces churn 1295 * and performance degradation), so force a pte mapping. 1296 */ 1297 force_pte = true; 1298 flags |= KVM_S2_FLAG_LOGGING_ACTIVE; 1299 1300 /* 1301 * Only actually map the page as writable if this was a write 1302 * fault. 1303 */ 1304 if (!write_fault) 1305 writable = false; 1306 } 1307 1308 spin_lock(&kvm->mmu_lock); 1309 if (mmu_notifier_retry(kvm, mmu_seq)) 1310 goto out_unlock; 1311 1312 if (!hugetlb && !force_pte) 1313 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); 1314 1315 fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT; 1316 1317 if (hugetlb) { 1318 pmd_t new_pmd = pfn_pmd(pfn, mem_type); 1319 new_pmd = pmd_mkhuge(new_pmd); 1320 if (writable) { 1321 kvm_set_s2pmd_writable(&new_pmd); 1322 kvm_set_pfn_dirty(pfn); 1323 } 1324 coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached); 1325 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); 1326 } else { 1327 pte_t new_pte = pfn_pte(pfn, mem_type); 1328 1329 if (writable) { 1330 kvm_set_s2pte_writable(&new_pte); 1331 kvm_set_pfn_dirty(pfn); 1332 mark_page_dirty(kvm, gfn); 1333 } 1334 coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached); 1335 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); 1336 } 1337 1338out_unlock: 1339 spin_unlock(&kvm->mmu_lock); 1340 kvm_set_pfn_accessed(pfn); 1341 kvm_release_pfn_clean(pfn); 1342 return ret; 1343} 1344 1345/* 1346 * Resolve the access fault by making the page young again. 1347 * Note that because the faulting entry is guaranteed not to be 1348 * cached in the TLB, we don't need to invalidate anything. 1349 */ 1350static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) 1351{ 1352 pmd_t *pmd; 1353 pte_t *pte; 1354 pfn_t pfn; 1355 bool pfn_valid = false; 1356 1357 trace_kvm_access_fault(fault_ipa); 1358 1359 spin_lock(&vcpu->kvm->mmu_lock); 1360 1361 pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa); 1362 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1363 goto out; 1364 1365 if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ 1366 *pmd = pmd_mkyoung(*pmd); 1367 pfn = pmd_pfn(*pmd); 1368 pfn_valid = true; 1369 goto out; 1370 } 1371 1372 pte = pte_offset_kernel(pmd, fault_ipa); 1373 if (pte_none(*pte)) /* Nothing there either */ 1374 goto out; 1375 1376 *pte = pte_mkyoung(*pte); /* Just a page... */ 1377 pfn = pte_pfn(*pte); 1378 pfn_valid = true; 1379out: 1380 spin_unlock(&vcpu->kvm->mmu_lock); 1381 if (pfn_valid) 1382 kvm_set_pfn_accessed(pfn); 1383} 1384 1385/** 1386 * kvm_handle_guest_abort - handles all 2nd stage aborts 1387 * @vcpu: the VCPU pointer 1388 * @run: the kvm_run structure 1389 * 1390 * Any abort that gets to the host is almost guaranteed to be caused by a 1391 * missing second stage translation table entry, which can mean that either the 1392 * guest simply needs more memory and we must allocate an appropriate page or it 1393 * can mean that the guest tried to access I/O memory, which is emulated by user 1394 * space. The distinction is based on the IPA causing the fault and whether this 1395 * memory region has been registered as standard RAM by user space. 1396 */ 1397int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) 1398{ 1399 unsigned long fault_status; 1400 phys_addr_t fault_ipa; 1401 struct kvm_memory_slot *memslot; 1402 unsigned long hva; 1403 bool is_iabt, write_fault, writable; 1404 gfn_t gfn; 1405 int ret, idx; 1406 1407 is_iabt = kvm_vcpu_trap_is_iabt(vcpu); 1408 fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); 1409 1410 trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), 1411 kvm_vcpu_get_hfar(vcpu), fault_ipa); 1412 1413 /* Check the stage-2 fault is trans. fault or write fault */ 1414 fault_status = kvm_vcpu_trap_get_fault_type(vcpu); 1415 if (fault_status != FSC_FAULT && fault_status != FSC_PERM && 1416 fault_status != FSC_ACCESS) { 1417 kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", 1418 kvm_vcpu_trap_get_class(vcpu), 1419 (unsigned long)kvm_vcpu_trap_get_fault(vcpu), 1420 (unsigned long)kvm_vcpu_get_hsr(vcpu)); 1421 return -EFAULT; 1422 } 1423 1424 idx = srcu_read_lock(&vcpu->kvm->srcu); 1425 1426 gfn = fault_ipa >> PAGE_SHIFT; 1427 memslot = gfn_to_memslot(vcpu->kvm, gfn); 1428 hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); 1429 write_fault = kvm_is_write_fault(vcpu); 1430 if (kvm_is_error_hva(hva) || (write_fault && !writable)) { 1431 if (is_iabt) { 1432 /* Prefetch Abort on I/O address */ 1433 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); 1434 ret = 1; 1435 goto out_unlock; 1436 } 1437 1438 /* 1439 * The IPA is reported as [MAX:12], so we need to 1440 * complement it with the bottom 12 bits from the 1441 * faulting VA. This is always 12 bits, irrespective 1442 * of the page size. 1443 */ 1444 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1); 1445 ret = io_mem_abort(vcpu, run, fault_ipa); 1446 goto out_unlock; 1447 } 1448 1449 /* Userspace should not be able to register out-of-bounds IPAs */ 1450 VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE); 1451 1452 if (fault_status == FSC_ACCESS) { 1453 handle_access_fault(vcpu, fault_ipa); 1454 ret = 1; 1455 goto out_unlock; 1456 } 1457 1458 ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); 1459 if (ret == 0) 1460 ret = 1; 1461out_unlock: 1462 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1463 return ret; 1464} 1465 1466static int handle_hva_to_gpa(struct kvm *kvm, 1467 unsigned long start, 1468 unsigned long end, 1469 int (*handler)(struct kvm *kvm, 1470 gpa_t gpa, void *data), 1471 void *data) 1472{ 1473 struct kvm_memslots *slots; 1474 struct kvm_memory_slot *memslot; 1475 int ret = 0; 1476 1477 slots = kvm_memslots(kvm); 1478 1479 /* we only care about the pages that the guest sees */ 1480 kvm_for_each_memslot(memslot, slots) { 1481 unsigned long hva_start, hva_end; 1482 gfn_t gfn, gfn_end; 1483 1484 hva_start = max(start, memslot->userspace_addr); 1485 hva_end = min(end, memslot->userspace_addr + 1486 (memslot->npages << PAGE_SHIFT)); 1487 if (hva_start >= hva_end) 1488 continue; 1489 1490 /* 1491 * {gfn(page) | page intersects with [hva_start, hva_end)} = 1492 * {gfn_start, gfn_start+1, ..., gfn_end-1}. 1493 */ 1494 gfn = hva_to_gfn_memslot(hva_start, memslot); 1495 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 1496 1497 for (; gfn < gfn_end; ++gfn) { 1498 gpa_t gpa = gfn << PAGE_SHIFT; 1499 ret |= handler(kvm, gpa, data); 1500 } 1501 } 1502 1503 return ret; 1504} 1505 1506static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) 1507{ 1508 unmap_stage2_range(kvm, gpa, PAGE_SIZE); 1509 return 0; 1510} 1511 1512int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 1513{ 1514 unsigned long end = hva + PAGE_SIZE; 1515 1516 if (!kvm->arch.pgd) 1517 return 0; 1518 1519 trace_kvm_unmap_hva(hva); 1520 handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL); 1521 return 0; 1522} 1523 1524int kvm_unmap_hva_range(struct kvm *kvm, 1525 unsigned long start, unsigned long end) 1526{ 1527 if (!kvm->arch.pgd) 1528 return 0; 1529 1530 trace_kvm_unmap_hva_range(start, end); 1531 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); 1532 return 0; 1533} 1534 1535static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) 1536{ 1537 pte_t *pte = (pte_t *)data; 1538 1539 /* 1540 * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE 1541 * flag clear because MMU notifiers will have unmapped a huge PMD before 1542 * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and 1543 * therefore stage2_set_pte() never needs to clear out a huge PMD 1544 * through this calling path. 1545 */ 1546 stage2_set_pte(kvm, NULL, gpa, pte, 0); 1547 return 0; 1548} 1549 1550 1551void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 1552{ 1553 unsigned long end = hva + PAGE_SIZE; 1554 pte_t stage2_pte; 1555 1556 if (!kvm->arch.pgd) 1557 return; 1558 1559 trace_kvm_set_spte_hva(hva); 1560 stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2); 1561 handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); 1562} 1563 1564static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) 1565{ 1566 pmd_t *pmd; 1567 pte_t *pte; 1568 1569 pmd = stage2_get_pmd(kvm, NULL, gpa); 1570 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1571 return 0; 1572 1573 if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ 1574 if (pmd_young(*pmd)) { 1575 *pmd = pmd_mkold(*pmd); 1576 return 1; 1577 } 1578 1579 return 0; 1580 } 1581 1582 pte = pte_offset_kernel(pmd, gpa); 1583 if (pte_none(*pte)) 1584 return 0; 1585 1586 if (pte_young(*pte)) { 1587 *pte = pte_mkold(*pte); /* Just a page... */ 1588 return 1; 1589 } 1590 1591 return 0; 1592} 1593 1594static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) 1595{ 1596 pmd_t *pmd; 1597 pte_t *pte; 1598 1599 pmd = stage2_get_pmd(kvm, NULL, gpa); 1600 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1601 return 0; 1602 1603 if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */ 1604 return pmd_young(*pmd); 1605 1606 pte = pte_offset_kernel(pmd, gpa); 1607 if (!pte_none(*pte)) /* Just a page... */ 1608 return pte_young(*pte); 1609 1610 return 0; 1611} 1612 1613int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) 1614{ 1615 trace_kvm_age_hva(start, end); 1616 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); 1617} 1618 1619int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 1620{ 1621 trace_kvm_test_age_hva(hva); 1622 return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); 1623} 1624 1625void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) 1626{ 1627 mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); 1628} 1629 1630phys_addr_t kvm_mmu_get_httbr(void) 1631{ 1632 if (__kvm_cpu_uses_extended_idmap()) 1633 return virt_to_phys(merged_hyp_pgd); 1634 else 1635 return virt_to_phys(hyp_pgd); 1636} 1637 1638phys_addr_t kvm_mmu_get_boot_httbr(void) 1639{ 1640 if (__kvm_cpu_uses_extended_idmap()) 1641 return virt_to_phys(merged_hyp_pgd); 1642 else 1643 return virt_to_phys(boot_hyp_pgd); 1644} 1645 1646phys_addr_t kvm_get_idmap_vector(void) 1647{ 1648 return hyp_idmap_vector; 1649} 1650 1651int kvm_mmu_init(void) 1652{ 1653 int err; 1654 1655 hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); 1656 hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); 1657 hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); 1658 1659 /* 1660 * We rely on the linker script to ensure at build time that the HYP 1661 * init code does not cross a page boundary. 1662 */ 1663 BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); 1664 1665 hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); 1666 boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); 1667 1668 if (!hyp_pgd || !boot_hyp_pgd) { 1669 kvm_err("Hyp mode PGD not allocated\n"); 1670 err = -ENOMEM; 1671 goto out; 1672 } 1673 1674 /* Create the idmap in the boot page tables */ 1675 err = __create_hyp_mappings(boot_hyp_pgd, 1676 hyp_idmap_start, hyp_idmap_end, 1677 __phys_to_pfn(hyp_idmap_start), 1678 PAGE_HYP); 1679 1680 if (err) { 1681 kvm_err("Failed to idmap %lx-%lx\n", 1682 hyp_idmap_start, hyp_idmap_end); 1683 goto out; 1684 } 1685 1686 if (__kvm_cpu_uses_extended_idmap()) { 1687 merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); 1688 if (!merged_hyp_pgd) { 1689 kvm_err("Failed to allocate extra HYP pgd\n"); 1690 goto out; 1691 } 1692 __kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd, 1693 hyp_idmap_start); 1694 return 0; 1695 } 1696 1697 /* Map the very same page at the trampoline VA */ 1698 err = __create_hyp_mappings(boot_hyp_pgd, 1699 TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, 1700 __phys_to_pfn(hyp_idmap_start), 1701 PAGE_HYP); 1702 if (err) { 1703 kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n", 1704 TRAMPOLINE_VA); 1705 goto out; 1706 } 1707 1708 /* Map the same page again into the runtime page tables */ 1709 err = __create_hyp_mappings(hyp_pgd, 1710 TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, 1711 __phys_to_pfn(hyp_idmap_start), 1712 PAGE_HYP); 1713 if (err) { 1714 kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n", 1715 TRAMPOLINE_VA); 1716 goto out; 1717 } 1718 1719 return 0; 1720out: 1721 free_hyp_pgds(); 1722 return err; 1723} 1724 1725void kvm_arch_commit_memory_region(struct kvm *kvm, 1726 const struct kvm_userspace_memory_region *mem, 1727 const struct kvm_memory_slot *old, 1728 const struct kvm_memory_slot *new, 1729 enum kvm_mr_change change) 1730{ 1731 /* 1732 * At this point memslot has been committed and there is an 1733 * allocated dirty_bitmap[], dirty pages will be be tracked while the 1734 * memory slot is write protected. 1735 */ 1736 if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) 1737 kvm_mmu_wp_memory_region(kvm, mem->slot); 1738} 1739 1740int kvm_arch_prepare_memory_region(struct kvm *kvm, 1741 struct kvm_memory_slot *memslot, 1742 const struct kvm_userspace_memory_region *mem, 1743 enum kvm_mr_change change) 1744{ 1745 hva_t hva = mem->userspace_addr; 1746 hva_t reg_end = hva + mem->memory_size; 1747 bool writable = !(mem->flags & KVM_MEM_READONLY); 1748 int ret = 0; 1749 1750 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE && 1751 change != KVM_MR_FLAGS_ONLY) 1752 return 0; 1753 1754 /* 1755 * Prevent userspace from creating a memory region outside of the IPA 1756 * space addressable by the KVM guest IPA space. 1757 */ 1758 if (memslot->base_gfn + memslot->npages >= 1759 (KVM_PHYS_SIZE >> PAGE_SHIFT)) 1760 return -EFAULT; 1761 1762 /* 1763 * A memory region could potentially cover multiple VMAs, and any holes 1764 * between them, so iterate over all of them to find out if we can map 1765 * any of them right now. 1766 * 1767 * +--------------------------------------------+ 1768 * +---------------+----------------+ +----------------+ 1769 * | : VMA 1 | VMA 2 | | VMA 3 : | 1770 * +---------------+----------------+ +----------------+ 1771 * | memory region | 1772 * +--------------------------------------------+ 1773 */ 1774 do { 1775 struct vm_area_struct *vma = find_vma(current->mm, hva); 1776 hva_t vm_start, vm_end; 1777 1778 if (!vma || vma->vm_start >= reg_end) 1779 break; 1780 1781 /* 1782 * Mapping a read-only VMA is only allowed if the 1783 * memory region is configured as read-only. 1784 */ 1785 if (writable && !(vma->vm_flags & VM_WRITE)) { 1786 ret = -EPERM; 1787 break; 1788 } 1789 1790 /* 1791 * Take the intersection of this VMA with the memory region 1792 */ 1793 vm_start = max(hva, vma->vm_start); 1794 vm_end = min(reg_end, vma->vm_end); 1795 1796 if (vma->vm_flags & VM_PFNMAP) { 1797 gpa_t gpa = mem->guest_phys_addr + 1798 (vm_start - mem->userspace_addr); 1799 phys_addr_t pa; 1800 1801 pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; 1802 pa += vm_start - vma->vm_start; 1803 1804 /* IO region dirty page logging not allowed */ 1805 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) 1806 return -EINVAL; 1807 1808 ret = kvm_phys_addr_ioremap(kvm, gpa, pa, 1809 vm_end - vm_start, 1810 writable); 1811 if (ret) 1812 break; 1813 } 1814 hva = vm_end; 1815 } while (hva < reg_end); 1816 1817 if (change == KVM_MR_FLAGS_ONLY) 1818 return ret; 1819 1820 spin_lock(&kvm->mmu_lock); 1821 if (ret) 1822 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); 1823 else 1824 stage2_flush_memslot(kvm, memslot); 1825 spin_unlock(&kvm->mmu_lock); 1826 return ret; 1827} 1828 1829void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 1830 struct kvm_memory_slot *dont) 1831{ 1832} 1833 1834int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1835 unsigned long npages) 1836{ 1837 /* 1838 * Readonly memslots are not incoherent with the caches by definition, 1839 * but in practice, they are used mostly to emulate ROMs or NOR flashes 1840 * that the guest may consider devices and hence map as uncached. 1841 * To prevent incoherency issues in these cases, tag all readonly 1842 * regions as incoherent. 1843 */ 1844 if (slot->flags & KVM_MEM_READONLY) 1845 slot->flags |= KVM_MEMSLOT_INCOHERENT; 1846 return 0; 1847} 1848 1849void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) 1850{ 1851} 1852 1853void kvm_arch_flush_shadow_all(struct kvm *kvm) 1854{ 1855} 1856 1857void kvm_arch_flush_shadow_memslot(struct kvm *kvm, 1858 struct kvm_memory_slot *slot) 1859{ 1860 gpa_t gpa = slot->base_gfn << PAGE_SHIFT; 1861 phys_addr_t size = slot->npages << PAGE_SHIFT; 1862 1863 spin_lock(&kvm->mmu_lock); 1864 unmap_stage2_range(kvm, gpa, size); 1865 spin_unlock(&kvm->mmu_lock); 1866} 1867 1868/* 1869 * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). 1870 * 1871 * Main problems: 1872 * - S/W ops are local to a CPU (not broadcast) 1873 * - We have line migration behind our back (speculation) 1874 * - System caches don't support S/W at all (damn!) 1875 * 1876 * In the face of the above, the best we can do is to try and convert 1877 * S/W ops to VA ops. Because the guest is not allowed to infer the 1878 * S/W to PA mapping, it can only use S/W to nuke the whole cache, 1879 * which is a rather good thing for us. 1880 * 1881 * Also, it is only used when turning caches on/off ("The expected 1882 * usage of the cache maintenance instructions that operate by set/way 1883 * is associated with the cache maintenance instructions associated 1884 * with the powerdown and powerup of caches, if this is required by 1885 * the implementation."). 1886 * 1887 * We use the following policy: 1888 * 1889 * - If we trap a S/W operation, we enable VM trapping to detect 1890 * caches being turned on/off, and do a full clean. 1891 * 1892 * - We flush the caches on both caches being turned on and off. 1893 * 1894 * - Once the caches are enabled, we stop trapping VM ops. 1895 */ 1896void kvm_set_way_flush(struct kvm_vcpu *vcpu) 1897{ 1898 unsigned long hcr = vcpu_get_hcr(vcpu); 1899 1900 /* 1901 * If this is the first time we do a S/W operation 1902 * (i.e. HCR_TVM not set) flush the whole memory, and set the 1903 * VM trapping. 1904 * 1905 * Otherwise, rely on the VM trapping to wait for the MMU + 1906 * Caches to be turned off. At that point, we'll be able to 1907 * clean the caches again. 1908 */ 1909 if (!(hcr & HCR_TVM)) { 1910 trace_kvm_set_way_flush(*vcpu_pc(vcpu), 1911 vcpu_has_cache_enabled(vcpu)); 1912 stage2_flush_vm(vcpu->kvm); 1913 vcpu_set_hcr(vcpu, hcr | HCR_TVM); 1914 } 1915} 1916 1917void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) 1918{ 1919 bool now_enabled = vcpu_has_cache_enabled(vcpu); 1920 1921 /* 1922 * If switching the MMU+caches on, need to invalidate the caches. 1923 * If switching it off, need to clean the caches. 1924 * Clean + invalidate does the trick always. 1925 */ 1926 if (now_enabled != was_enabled) 1927 stage2_flush_vm(vcpu->kvm); 1928 1929 /* Caches are now on, stop trapping VM ops (until a S/W op) */ 1930 if (now_enabled) 1931 vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM); 1932 1933 trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); 1934} 1935