1/* 2 * vMTRR implementation 3 * 4 * Copyright (C) 2006 Qumranet, Inc. 5 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 6 * Copyright(C) 2015 Intel Corporation. 7 * 8 * Authors: 9 * Yaniv Kamay <yaniv@qumranet.com> 10 * Avi Kivity <avi@qumranet.com> 11 * Marcelo Tosatti <mtosatti@redhat.com> 12 * Paolo Bonzini <pbonzini@redhat.com> 13 * Xiao Guangrong <guangrong.xiao@linux.intel.com> 14 * 15 * This work is licensed under the terms of the GNU GPL, version 2. See 16 * the COPYING file in the top-level directory. 17 */ 18 19#include <linux/kvm_host.h> 20#include <asm/mtrr.h> 21 22#include "cpuid.h" 23#include "mmu.h" 24 25#define IA32_MTRR_DEF_TYPE_E (1ULL << 11) 26#define IA32_MTRR_DEF_TYPE_FE (1ULL << 10) 27#define IA32_MTRR_DEF_TYPE_TYPE_MASK (0xff) 28 29static bool msr_mtrr_valid(unsigned msr) 30{ 31 switch (msr) { 32 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1: 33 case MSR_MTRRfix64K_00000: 34 case MSR_MTRRfix16K_80000: 35 case MSR_MTRRfix16K_A0000: 36 case MSR_MTRRfix4K_C0000: 37 case MSR_MTRRfix4K_C8000: 38 case MSR_MTRRfix4K_D0000: 39 case MSR_MTRRfix4K_D8000: 40 case MSR_MTRRfix4K_E0000: 41 case MSR_MTRRfix4K_E8000: 42 case MSR_MTRRfix4K_F0000: 43 case MSR_MTRRfix4K_F8000: 44 case MSR_MTRRdefType: 45 case MSR_IA32_CR_PAT: 46 return true; 47 } 48 return false; 49} 50 51static bool valid_pat_type(unsigned t) 52{ 53 return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */ 54} 55 56static bool valid_mtrr_type(unsigned t) 57{ 58 return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ 59} 60 61bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) 62{ 63 int i; 64 u64 mask; 65 66 if (!msr_mtrr_valid(msr)) 67 return false; 68 69 if (msr == MSR_IA32_CR_PAT) { 70 for (i = 0; i < 8; i++) 71 if (!valid_pat_type((data >> (i * 8)) & 0xff)) 72 return false; 73 return true; 74 } else if (msr == MSR_MTRRdefType) { 75 if (data & ~0xcff) 76 return false; 77 return valid_mtrr_type(data & 0xff); 78 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { 79 for (i = 0; i < 8 ; i++) 80 if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) 81 return false; 82 return true; 83 } 84 85 /* variable MTRRs */ 86 WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR)); 87 88 mask = (~0ULL) << cpuid_maxphyaddr(vcpu); 89 if ((msr & 1) == 0) { 90 /* MTRR base */ 91 if (!valid_mtrr_type(data & 0xff)) 92 return false; 93 mask |= 0xf00; 94 } else 95 /* MTRR mask */ 96 mask |= 0x7ff; 97 if (data & mask) { 98 kvm_inject_gp(vcpu, 0); 99 return false; 100 } 101 102 return true; 103} 104EXPORT_SYMBOL_GPL(kvm_mtrr_valid); 105 106static bool mtrr_is_enabled(struct kvm_mtrr *mtrr_state) 107{ 108 return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_E); 109} 110 111static bool fixed_mtrr_is_enabled(struct kvm_mtrr *mtrr_state) 112{ 113 return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_FE); 114} 115 116static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state) 117{ 118 return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK; 119} 120 121static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu) 122{ 123 /* 124 * Intel SDM 11.11.2.2: all MTRRs are disabled when 125 * IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC 126 * memory type is applied to all of physical memory. 127 * 128 * However, virtual machines can be run with CPUID such that 129 * there are no MTRRs. In that case, the firmware will never 130 * enable MTRRs and it is obviously undesirable to run the 131 * guest entirely with UC memory and we use WB. 132 */ 133 if (guest_cpuid_has_mtrr(vcpu)) 134 return MTRR_TYPE_UNCACHABLE; 135 else 136 return MTRR_TYPE_WRBACK; 137} 138 139/* 140* Three terms are used in the following code: 141* - segment, it indicates the address segments covered by fixed MTRRs. 142* - unit, it corresponds to the MSR entry in the segment. 143* - range, a range is covered in one memory cache type. 144*/ 145struct fixed_mtrr_segment { 146 u64 start; 147 u64 end; 148 149 int range_shift; 150 151 /* the start position in kvm_mtrr.fixed_ranges[]. */ 152 int range_start; 153}; 154 155static struct fixed_mtrr_segment fixed_seg_table[] = { 156 /* MSR_MTRRfix64K_00000, 1 unit. 64K fixed mtrr. */ 157 { 158 .start = 0x0, 159 .end = 0x80000, 160 .range_shift = 16, /* 64K */ 161 .range_start = 0, 162 }, 163 164 /* 165 * MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000, 2 units, 166 * 16K fixed mtrr. 167 */ 168 { 169 .start = 0x80000, 170 .end = 0xc0000, 171 .range_shift = 14, /* 16K */ 172 .range_start = 8, 173 }, 174 175 /* 176 * MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000, 8 units, 177 * 4K fixed mtrr. 178 */ 179 { 180 .start = 0xc0000, 181 .end = 0x100000, 182 .range_shift = 12, /* 12K */ 183 .range_start = 24, 184 } 185}; 186 187/* 188 * The size of unit is covered in one MSR, one MSR entry contains 189 * 8 ranges so that unit size is always 8 * 2^range_shift. 190 */ 191static u64 fixed_mtrr_seg_unit_size(int seg) 192{ 193 return 8 << fixed_seg_table[seg].range_shift; 194} 195 196static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit) 197{ 198 switch (msr) { 199 case MSR_MTRRfix64K_00000: 200 *seg = 0; 201 *unit = 0; 202 break; 203 case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: 204 *seg = 1; 205 *unit = msr - MSR_MTRRfix16K_80000; 206 break; 207 case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: 208 *seg = 2; 209 *unit = msr - MSR_MTRRfix4K_C0000; 210 break; 211 default: 212 return false; 213 } 214 215 return true; 216} 217 218static void fixed_mtrr_seg_unit_range(int seg, int unit, u64 *start, u64 *end) 219{ 220 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 221 u64 unit_size = fixed_mtrr_seg_unit_size(seg); 222 223 *start = mtrr_seg->start + unit * unit_size; 224 *end = *start + unit_size; 225 WARN_ON(*end > mtrr_seg->end); 226} 227 228static int fixed_mtrr_seg_unit_range_index(int seg, int unit) 229{ 230 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 231 232 WARN_ON(mtrr_seg->start + unit * fixed_mtrr_seg_unit_size(seg) 233 > mtrr_seg->end); 234 235 /* each unit has 8 ranges. */ 236 return mtrr_seg->range_start + 8 * unit; 237} 238 239static int fixed_mtrr_seg_end_range_index(int seg) 240{ 241 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 242 int n; 243 244 n = (mtrr_seg->end - mtrr_seg->start) >> mtrr_seg->range_shift; 245 return mtrr_seg->range_start + n - 1; 246} 247 248static bool fixed_msr_to_range(u32 msr, u64 *start, u64 *end) 249{ 250 int seg, unit; 251 252 if (!fixed_msr_to_seg_unit(msr, &seg, &unit)) 253 return false; 254 255 fixed_mtrr_seg_unit_range(seg, unit, start, end); 256 return true; 257} 258 259static int fixed_msr_to_range_index(u32 msr) 260{ 261 int seg, unit; 262 263 if (!fixed_msr_to_seg_unit(msr, &seg, &unit)) 264 return -1; 265 266 return fixed_mtrr_seg_unit_range_index(seg, unit); 267} 268 269static int fixed_mtrr_addr_to_seg(u64 addr) 270{ 271 struct fixed_mtrr_segment *mtrr_seg; 272 int seg, seg_num = ARRAY_SIZE(fixed_seg_table); 273 274 for (seg = 0; seg < seg_num; seg++) { 275 mtrr_seg = &fixed_seg_table[seg]; 276 if (mtrr_seg->start <= addr && addr < mtrr_seg->end) 277 return seg; 278 } 279 280 return -1; 281} 282 283static int fixed_mtrr_addr_seg_to_range_index(u64 addr, int seg) 284{ 285 struct fixed_mtrr_segment *mtrr_seg; 286 int index; 287 288 mtrr_seg = &fixed_seg_table[seg]; 289 index = mtrr_seg->range_start; 290 index += (addr - mtrr_seg->start) >> mtrr_seg->range_shift; 291 return index; 292} 293 294static u64 fixed_mtrr_range_end_addr(int seg, int index) 295{ 296 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 297 int pos = index - mtrr_seg->range_start; 298 299 return mtrr_seg->start + ((pos + 1) << mtrr_seg->range_shift); 300} 301 302static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end) 303{ 304 u64 mask; 305 306 *start = range->base & PAGE_MASK; 307 308 mask = range->mask & PAGE_MASK; 309 310 /* This cannot overflow because writing to the reserved bits of 311 * variable MTRRs causes a #GP. 312 */ 313 *end = (*start | ~mask) + 1; 314} 315 316static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr) 317{ 318 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 319 gfn_t start, end; 320 int index; 321 322 if (msr == MSR_IA32_CR_PAT || !tdp_enabled || 323 !kvm_arch_has_noncoherent_dma(vcpu->kvm)) 324 return; 325 326 if (!mtrr_is_enabled(mtrr_state) && msr != MSR_MTRRdefType) 327 return; 328 329 /* fixed MTRRs. */ 330 if (fixed_msr_to_range(msr, &start, &end)) { 331 if (!fixed_mtrr_is_enabled(mtrr_state)) 332 return; 333 } else if (msr == MSR_MTRRdefType) { 334 start = 0x0; 335 end = ~0ULL; 336 } else { 337 /* variable range MTRRs. */ 338 index = (msr - 0x200) / 2; 339 var_mtrr_range(&mtrr_state->var_ranges[index], &start, &end); 340 } 341 342 kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end)); 343} 344 345static bool var_mtrr_range_is_valid(struct kvm_mtrr_range *range) 346{ 347 return (range->mask & (1 << 11)) != 0; 348} 349 350static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 351{ 352 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 353 struct kvm_mtrr_range *tmp, *cur; 354 int index, is_mtrr_mask; 355 356 index = (msr - 0x200) / 2; 357 is_mtrr_mask = msr - 0x200 - 2 * index; 358 cur = &mtrr_state->var_ranges[index]; 359 360 /* remove the entry if it's in the list. */ 361 if (var_mtrr_range_is_valid(cur)) 362 list_del(&mtrr_state->var_ranges[index].node); 363 364 /* Extend the mask with all 1 bits to the left, since those 365 * bits must implicitly be 0. The bits are then cleared 366 * when reading them. 367 */ 368 if (!is_mtrr_mask) 369 cur->base = data; 370 else 371 cur->mask = data | (-1LL << cpuid_maxphyaddr(vcpu)); 372 373 /* add it to the list if it's enabled. */ 374 if (var_mtrr_range_is_valid(cur)) { 375 list_for_each_entry(tmp, &mtrr_state->head, node) 376 if (cur->base >= tmp->base) 377 break; 378 list_add_tail(&cur->node, &tmp->node); 379 } 380} 381 382int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 383{ 384 int index; 385 386 if (!kvm_mtrr_valid(vcpu, msr, data)) 387 return 1; 388 389 index = fixed_msr_to_range_index(msr); 390 if (index >= 0) 391 *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index] = data; 392 else if (msr == MSR_MTRRdefType) 393 vcpu->arch.mtrr_state.deftype = data; 394 else if (msr == MSR_IA32_CR_PAT) 395 vcpu->arch.pat = data; 396 else 397 set_var_mtrr_msr(vcpu, msr, data); 398 399 update_mtrr(vcpu, msr); 400 return 0; 401} 402 403int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 404{ 405 int index; 406 407 /* MSR_MTRRcap is a readonly MSR. */ 408 if (msr == MSR_MTRRcap) { 409 /* 410 * SMRR = 0 411 * WC = 1 412 * FIX = 1 413 * VCNT = KVM_NR_VAR_MTRR 414 */ 415 *pdata = 0x500 | KVM_NR_VAR_MTRR; 416 return 0; 417 } 418 419 if (!msr_mtrr_valid(msr)) 420 return 1; 421 422 index = fixed_msr_to_range_index(msr); 423 if (index >= 0) 424 *pdata = *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index]; 425 else if (msr == MSR_MTRRdefType) 426 *pdata = vcpu->arch.mtrr_state.deftype; 427 else if (msr == MSR_IA32_CR_PAT) 428 *pdata = vcpu->arch.pat; 429 else { /* Variable MTRRs */ 430 int is_mtrr_mask; 431 432 index = (msr - 0x200) / 2; 433 is_mtrr_mask = msr - 0x200 - 2 * index; 434 if (!is_mtrr_mask) 435 *pdata = vcpu->arch.mtrr_state.var_ranges[index].base; 436 else 437 *pdata = vcpu->arch.mtrr_state.var_ranges[index].mask; 438 439 *pdata &= (1ULL << cpuid_maxphyaddr(vcpu)) - 1; 440 } 441 442 return 0; 443} 444 445void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu) 446{ 447 INIT_LIST_HEAD(&vcpu->arch.mtrr_state.head); 448} 449 450struct mtrr_iter { 451 /* input fields. */ 452 struct kvm_mtrr *mtrr_state; 453 u64 start; 454 u64 end; 455 456 /* output fields. */ 457 int mem_type; 458 /* mtrr is completely disabled? */ 459 bool mtrr_disabled; 460 /* [start, end) is not fully covered in MTRRs? */ 461 bool partial_map; 462 463 /* private fields. */ 464 union { 465 /* used for fixed MTRRs. */ 466 struct { 467 int index; 468 int seg; 469 }; 470 471 /* used for var MTRRs. */ 472 struct { 473 struct kvm_mtrr_range *range; 474 /* max address has been covered in var MTRRs. */ 475 u64 start_max; 476 }; 477 }; 478 479 bool fixed; 480}; 481 482static bool mtrr_lookup_fixed_start(struct mtrr_iter *iter) 483{ 484 int seg, index; 485 486 if (!fixed_mtrr_is_enabled(iter->mtrr_state)) 487 return false; 488 489 seg = fixed_mtrr_addr_to_seg(iter->start); 490 if (seg < 0) 491 return false; 492 493 iter->fixed = true; 494 index = fixed_mtrr_addr_seg_to_range_index(iter->start, seg); 495 iter->index = index; 496 iter->seg = seg; 497 return true; 498} 499 500static bool match_var_range(struct mtrr_iter *iter, 501 struct kvm_mtrr_range *range) 502{ 503 u64 start, end; 504 505 var_mtrr_range(range, &start, &end); 506 if (!(start >= iter->end || end <= iter->start)) { 507 iter->range = range; 508 509 /* 510 * the function is called when we do kvm_mtrr.head walking. 511 * Range has the minimum base address which interleaves 512 * [looker->start_max, looker->end). 513 */ 514 iter->partial_map |= iter->start_max < start; 515 516 /* update the max address has been covered. */ 517 iter->start_max = max(iter->start_max, end); 518 return true; 519 } 520 521 return false; 522} 523 524static void __mtrr_lookup_var_next(struct mtrr_iter *iter) 525{ 526 struct kvm_mtrr *mtrr_state = iter->mtrr_state; 527 528 list_for_each_entry_continue(iter->range, &mtrr_state->head, node) 529 if (match_var_range(iter, iter->range)) 530 return; 531 532 iter->range = NULL; 533 iter->partial_map |= iter->start_max < iter->end; 534} 535 536static void mtrr_lookup_var_start(struct mtrr_iter *iter) 537{ 538 struct kvm_mtrr *mtrr_state = iter->mtrr_state; 539 540 iter->fixed = false; 541 iter->start_max = iter->start; 542 iter->range = list_prepare_entry(iter->range, &mtrr_state->head, node); 543 544 __mtrr_lookup_var_next(iter); 545} 546 547static void mtrr_lookup_fixed_next(struct mtrr_iter *iter) 548{ 549 /* terminate the lookup. */ 550 if (fixed_mtrr_range_end_addr(iter->seg, iter->index) >= iter->end) { 551 iter->fixed = false; 552 iter->range = NULL; 553 return; 554 } 555 556 iter->index++; 557 558 /* have looked up for all fixed MTRRs. */ 559 if (iter->index >= ARRAY_SIZE(iter->mtrr_state->fixed_ranges)) 560 return mtrr_lookup_var_start(iter); 561 562 /* switch to next segment. */ 563 if (iter->index > fixed_mtrr_seg_end_range_index(iter->seg)) 564 iter->seg++; 565} 566 567static void mtrr_lookup_var_next(struct mtrr_iter *iter) 568{ 569 __mtrr_lookup_var_next(iter); 570} 571 572static void mtrr_lookup_start(struct mtrr_iter *iter) 573{ 574 if (!mtrr_is_enabled(iter->mtrr_state)) { 575 iter->mtrr_disabled = true; 576 return; 577 } 578 579 if (!mtrr_lookup_fixed_start(iter)) 580 mtrr_lookup_var_start(iter); 581} 582 583static void mtrr_lookup_init(struct mtrr_iter *iter, 584 struct kvm_mtrr *mtrr_state, u64 start, u64 end) 585{ 586 iter->mtrr_state = mtrr_state; 587 iter->start = start; 588 iter->end = end; 589 iter->mtrr_disabled = false; 590 iter->partial_map = false; 591 iter->fixed = false; 592 iter->range = NULL; 593 594 mtrr_lookup_start(iter); 595} 596 597static bool mtrr_lookup_okay(struct mtrr_iter *iter) 598{ 599 if (iter->fixed) { 600 iter->mem_type = iter->mtrr_state->fixed_ranges[iter->index]; 601 return true; 602 } 603 604 if (iter->range) { 605 iter->mem_type = iter->range->base & 0xff; 606 return true; 607 } 608 609 return false; 610} 611 612static void mtrr_lookup_next(struct mtrr_iter *iter) 613{ 614 if (iter->fixed) 615 mtrr_lookup_fixed_next(iter); 616 else 617 mtrr_lookup_var_next(iter); 618} 619 620#define mtrr_for_each_mem_type(_iter_, _mtrr_, _gpa_start_, _gpa_end_) \ 621 for (mtrr_lookup_init(_iter_, _mtrr_, _gpa_start_, _gpa_end_); \ 622 mtrr_lookup_okay(_iter_); mtrr_lookup_next(_iter_)) 623 624u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) 625{ 626 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 627 struct mtrr_iter iter; 628 u64 start, end; 629 int type = -1; 630 const int wt_wb_mask = (1 << MTRR_TYPE_WRBACK) 631 | (1 << MTRR_TYPE_WRTHROUGH); 632 633 start = gfn_to_gpa(gfn); 634 end = start + PAGE_SIZE; 635 636 mtrr_for_each_mem_type(&iter, mtrr_state, start, end) { 637 int curr_type = iter.mem_type; 638 639 /* 640 * Please refer to Intel SDM Volume 3: 11.11.4.1 MTRR 641 * Precedences. 642 */ 643 644 if (type == -1) { 645 type = curr_type; 646 continue; 647 } 648 649 /* 650 * If two or more variable memory ranges match and the 651 * memory types are identical, then that memory type is 652 * used. 653 */ 654 if (type == curr_type) 655 continue; 656 657 /* 658 * If two or more variable memory ranges match and one of 659 * the memory types is UC, the UC memory type used. 660 */ 661 if (curr_type == MTRR_TYPE_UNCACHABLE) 662 return MTRR_TYPE_UNCACHABLE; 663 664 /* 665 * If two or more variable memory ranges match and the 666 * memory types are WT and WB, the WT memory type is used. 667 */ 668 if (((1 << type) & wt_wb_mask) && 669 ((1 << curr_type) & wt_wb_mask)) { 670 type = MTRR_TYPE_WRTHROUGH; 671 continue; 672 } 673 674 /* 675 * For overlaps not defined by the above rules, processor 676 * behavior is undefined. 677 */ 678 679 /* We use WB for this undefined behavior. :( */ 680 return MTRR_TYPE_WRBACK; 681 } 682 683 if (iter.mtrr_disabled) 684 return mtrr_disabled_type(vcpu); 685 686 /* not contained in any MTRRs. */ 687 if (type == -1) 688 return mtrr_default_type(mtrr_state); 689 690 /* 691 * We just check one page, partially covered by MTRRs is 692 * impossible. 693 */ 694 WARN_ON(iter.partial_map); 695 696 return type; 697} 698EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type); 699 700bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, 701 int page_num) 702{ 703 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 704 struct mtrr_iter iter; 705 u64 start, end; 706 int type = -1; 707 708 start = gfn_to_gpa(gfn); 709 end = gfn_to_gpa(gfn + page_num); 710 mtrr_for_each_mem_type(&iter, mtrr_state, start, end) { 711 if (type == -1) { 712 type = iter.mem_type; 713 continue; 714 } 715 716 if (type != iter.mem_type) 717 return false; 718 } 719 720 if (iter.mtrr_disabled) 721 return true; 722 723 if (!iter.partial_map) 724 return true; 725 726 if (type == -1) 727 return true; 728 729 return type == mtrr_default_type(mtrr_state); 730} 731