root/drivers/gpu/drm/i915/gvt/gtt.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. intel_gvt_ggtt_validate_range
  2. intel_gvt_ggtt_gmadr_g2h
  3. intel_gvt_ggtt_gmadr_h2g
  4. intel_gvt_ggtt_index_g2h
  5. intel_gvt_ggtt_h2g_index
  6. get_next_pt_type
  7. get_pt_type
  8. get_entry_type
  9. get_pse_type
  10. read_pte64
  11. ggtt_invalidate
  12. write_pte64
  13. gtt_get_entry64
  14. gtt_set_entry64
  15. gen8_gtt_get_pfn
  16. gen8_gtt_set_pfn
  17. gen8_gtt_test_pse
  18. gen8_gtt_clear_pse
  19. gen8_gtt_test_ips
  20. gen8_gtt_clear_ips
  21. gen8_gtt_test_present
  22. gtt_entry_clear_present
  23. gtt_entry_set_present
  24. gen8_gtt_test_64k_splited
  25. gen8_gtt_set_64k_splited
  26. gen8_gtt_clear_64k_splited
  27. gma_to_ggtt_pte_index
  28. update_entry_type_for_real
  29. _ppgtt_get_root_entry
  30. ppgtt_get_guest_root_entry
  31. ppgtt_get_shadow_root_entry
  32. _ppgtt_set_root_entry
  33. ppgtt_set_guest_root_entry
  34. ppgtt_set_shadow_root_entry
  35. ggtt_get_guest_entry
  36. ggtt_set_guest_entry
  37. ggtt_get_host_entry
  38. ggtt_set_host_entry
  39. ppgtt_spt_get_entry
  40. ppgtt_spt_set_entry
  41. alloc_spt
  42. free_spt
  43. ppgtt_free_spt
  44. ppgtt_free_all_spt
  45. ppgtt_write_protection_handler
  46. intel_vgpu_find_spt_by_gfn
  47. intel_vgpu_find_spt_by_mfn
  48. ppgtt_alloc_spt
  49. ppgtt_alloc_spt_gfn
  50. ppgtt_get_spt
  51. ppgtt_put_spt
  52. ppgtt_invalidate_spt_by_shadow_entry
  53. ppgtt_invalidate_pte
  54. ppgtt_invalidate_spt
  55. vgpu_ips_enabled
  56. ppgtt_populate_spt_by_guest_entry
  57. ppgtt_generate_shadow_entry
  58. is_2MB_gtt_possible
  59. split_2MB_gtt_entry
  60. split_64KB_gtt_entry
  61. ppgtt_populate_shadow_entry
  62. ppgtt_populate_spt
  63. ppgtt_handle_guest_entry_removal
  64. ppgtt_handle_guest_entry_add
  65. sync_oos_page
  66. detach_oos_page
  67. attach_oos_page
  68. ppgtt_set_guest_page_sync
  69. ppgtt_allocate_oos_page
  70. ppgtt_set_guest_page_oos
  71. intel_vgpu_sync_oos_pages
  72. ppgtt_handle_guest_write_page_table
  73. can_do_out_of_sync
  74. ppgtt_set_post_shadow
  75. intel_vgpu_flush_post_shadow
  76. ppgtt_handle_guest_write_page_table_bytes
  77. invalidate_ppgtt_mm
  78. shadow_ppgtt_mm
  79. vgpu_alloc_mm
  80. vgpu_free_mm
  81. intel_vgpu_create_ppgtt_mm
  82. intel_vgpu_create_ggtt_mm
  83. _intel_vgpu_mm_release
  84. intel_vgpu_unpin_mm
  85. intel_vgpu_pin_mm
  86. reclaim_one_ppgtt_mm
  87. ppgtt_get_next_level_entry
  88. intel_vgpu_gma_to_gpa
  89. emulate_ggtt_mmio_read
  90. intel_vgpu_emulate_ggtt_mmio_read
  91. ggtt_invalidate_pte
  92. emulate_ggtt_mmio_write
  93. intel_vgpu_emulate_ggtt_mmio_write
  94. alloc_scratch_pages
  95. release_scratch_page_tree
  96. create_scratch_page_tree
  97. intel_vgpu_init_gtt
  98. intel_vgpu_destroy_all_ppgtt_mm
  99. intel_vgpu_destroy_ggtt_mm
  100. intel_vgpu_clean_gtt
  101. clean_spt_oos
  102. setup_spt_oos
  103. intel_vgpu_find_ppgtt_mm
  104. intel_vgpu_get_ppgtt_mm
  105. intel_vgpu_put_ppgtt_mm
  106. intel_gvt_init_gtt
  107. intel_gvt_clean_gtt
  108. intel_vgpu_invalidate_ppgtt
  109. intel_vgpu_reset_ggtt
  110. intel_vgpu_reset_gtt

   1 /*
   2  * GTT virtualization
   3  *
   4  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23  * SOFTWARE.
  24  *
  25  * Authors:
  26  *    Zhi Wang <zhi.a.wang@intel.com>
  27  *    Zhenyu Wang <zhenyuw@linux.intel.com>
  28  *    Xiao Zheng <xiao.zheng@intel.com>
  29  *
  30  * Contributors:
  31  *    Min He <min.he@intel.com>
  32  *    Bing Niu <bing.niu@intel.com>
  33  *
  34  */
  35 
  36 #include "i915_drv.h"
  37 #include "gvt.h"
  38 #include "i915_pvinfo.h"
  39 #include "trace.h"
  40 
  41 #if defined(VERBOSE_DEBUG)
  42 #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
  43 #else
  44 #define gvt_vdbg_mm(fmt, args...)
  45 #endif
  46 
  47 static bool enable_out_of_sync = false;
  48 static int preallocated_oos_pages = 8192;
  49 
  50 /*
  51  * validate a gm address and related range size,
  52  * translate it to host gm address
  53  */
  54 bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
  55 {
  56         if (size == 0)
  57                 return vgpu_gmadr_is_valid(vgpu, addr);
  58 
  59         if (vgpu_gmadr_is_aperture(vgpu, addr) &&
  60             vgpu_gmadr_is_aperture(vgpu, addr + size - 1))
  61                 return true;
  62         else if (vgpu_gmadr_is_hidden(vgpu, addr) &&
  63                  vgpu_gmadr_is_hidden(vgpu, addr + size - 1))
  64                 return true;
  65 
  66         gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n",
  67                      addr, size);
  68         return false;
  69 }
  70 
  71 /* translate a guest gmadr to host gmadr */
  72 int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr)
  73 {
  74         if (WARN(!vgpu_gmadr_is_valid(vgpu, g_addr),
  75                  "invalid guest gmadr %llx\n", g_addr))
  76                 return -EACCES;
  77 
  78         if (vgpu_gmadr_is_aperture(vgpu, g_addr))
  79                 *h_addr = vgpu_aperture_gmadr_base(vgpu)
  80                           + (g_addr - vgpu_aperture_offset(vgpu));
  81         else
  82                 *h_addr = vgpu_hidden_gmadr_base(vgpu)
  83                           + (g_addr - vgpu_hidden_offset(vgpu));
  84         return 0;
  85 }
  86 
  87 /* translate a host gmadr to guest gmadr */
  88 int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr)
  89 {
  90         if (WARN(!gvt_gmadr_is_valid(vgpu->gvt, h_addr),
  91                  "invalid host gmadr %llx\n", h_addr))
  92                 return -EACCES;
  93 
  94         if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr))
  95                 *g_addr = vgpu_aperture_gmadr_base(vgpu)
  96                         + (h_addr - gvt_aperture_gmadr_base(vgpu->gvt));
  97         else
  98                 *g_addr = vgpu_hidden_gmadr_base(vgpu)
  99                         + (h_addr - gvt_hidden_gmadr_base(vgpu->gvt));
 100         return 0;
 101 }
 102 
 103 int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
 104                              unsigned long *h_index)
 105 {
 106         u64 h_addr;
 107         int ret;
 108 
 109         ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT,
 110                                        &h_addr);
 111         if (ret)
 112                 return ret;
 113 
 114         *h_index = h_addr >> I915_GTT_PAGE_SHIFT;
 115         return 0;
 116 }
 117 
 118 int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
 119                              unsigned long *g_index)
 120 {
 121         u64 g_addr;
 122         int ret;
 123 
 124         ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT,
 125                                        &g_addr);
 126         if (ret)
 127                 return ret;
 128 
 129         *g_index = g_addr >> I915_GTT_PAGE_SHIFT;
 130         return 0;
 131 }
 132 
 133 #define gtt_type_is_entry(type) \
 134         (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
 135          && type != GTT_TYPE_PPGTT_PTE_ENTRY \
 136          && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
 137 
 138 #define gtt_type_is_pt(type) \
 139         (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
 140 
 141 #define gtt_type_is_pte_pt(type) \
 142         (type == GTT_TYPE_PPGTT_PTE_PT)
 143 
 144 #define gtt_type_is_root_pointer(type) \
 145         (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
 146 
 147 #define gtt_init_entry(e, t, p, v) do { \
 148         (e)->type = t; \
 149         (e)->pdev = p; \
 150         memcpy(&(e)->val64, &v, sizeof(v)); \
 151 } while (0)
 152 
 153 /*
 154  * Mappings between GTT_TYPE* enumerations.
 155  * Following information can be found according to the given type:
 156  * - type of next level page table
 157  * - type of entry inside this level page table
 158  * - type of entry with PSE set
 159  *
 160  * If the given type doesn't have such a kind of information,
 161  * e.g. give a l4 root entry type, then request to get its PSE type,
 162  * give a PTE page table type, then request to get its next level page
 163  * table type, as we know l4 root entry doesn't have a PSE bit,
 164  * and a PTE page table doesn't have a next level page table type,
 165  * GTT_TYPE_INVALID will be returned. This is useful when traversing a
 166  * page table.
 167  */
 168 
 169 struct gtt_type_table_entry {
 170         int entry_type;
 171         int pt_type;
 172         int next_pt_type;
 173         int pse_entry_type;
 174 };
 175 
 176 #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
 177         [type] = { \
 178                 .entry_type = e_type, \
 179                 .pt_type = cpt_type, \
 180                 .next_pt_type = npt_type, \
 181                 .pse_entry_type = pse_type, \
 182         }
 183 
 184 static struct gtt_type_table_entry gtt_type_table[] = {
 185         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
 186                         GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
 187                         GTT_TYPE_INVALID,
 188                         GTT_TYPE_PPGTT_PML4_PT,
 189                         GTT_TYPE_INVALID),
 190         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
 191                         GTT_TYPE_PPGTT_PML4_ENTRY,
 192                         GTT_TYPE_PPGTT_PML4_PT,
 193                         GTT_TYPE_PPGTT_PDP_PT,
 194                         GTT_TYPE_INVALID),
 195         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
 196                         GTT_TYPE_PPGTT_PML4_ENTRY,
 197                         GTT_TYPE_PPGTT_PML4_PT,
 198                         GTT_TYPE_PPGTT_PDP_PT,
 199                         GTT_TYPE_INVALID),
 200         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
 201                         GTT_TYPE_PPGTT_PDP_ENTRY,
 202                         GTT_TYPE_PPGTT_PDP_PT,
 203                         GTT_TYPE_PPGTT_PDE_PT,
 204                         GTT_TYPE_PPGTT_PTE_1G_ENTRY),
 205         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
 206                         GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
 207                         GTT_TYPE_INVALID,
 208                         GTT_TYPE_PPGTT_PDE_PT,
 209                         GTT_TYPE_PPGTT_PTE_1G_ENTRY),
 210         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
 211                         GTT_TYPE_PPGTT_PDP_ENTRY,
 212                         GTT_TYPE_PPGTT_PDP_PT,
 213                         GTT_TYPE_PPGTT_PDE_PT,
 214                         GTT_TYPE_PPGTT_PTE_1G_ENTRY),
 215         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
 216                         GTT_TYPE_PPGTT_PDE_ENTRY,
 217                         GTT_TYPE_PPGTT_PDE_PT,
 218                         GTT_TYPE_PPGTT_PTE_PT,
 219                         GTT_TYPE_PPGTT_PTE_2M_ENTRY),
 220         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
 221                         GTT_TYPE_PPGTT_PDE_ENTRY,
 222                         GTT_TYPE_PPGTT_PDE_PT,
 223                         GTT_TYPE_PPGTT_PTE_PT,
 224                         GTT_TYPE_PPGTT_PTE_2M_ENTRY),
 225         /* We take IPS bit as 'PSE' for PTE level. */
 226         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
 227                         GTT_TYPE_PPGTT_PTE_4K_ENTRY,
 228                         GTT_TYPE_PPGTT_PTE_PT,
 229                         GTT_TYPE_INVALID,
 230                         GTT_TYPE_PPGTT_PTE_64K_ENTRY),
 231         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
 232                         GTT_TYPE_PPGTT_PTE_4K_ENTRY,
 233                         GTT_TYPE_PPGTT_PTE_PT,
 234                         GTT_TYPE_INVALID,
 235                         GTT_TYPE_PPGTT_PTE_64K_ENTRY),
 236         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY,
 237                         GTT_TYPE_PPGTT_PTE_4K_ENTRY,
 238                         GTT_TYPE_PPGTT_PTE_PT,
 239                         GTT_TYPE_INVALID,
 240                         GTT_TYPE_PPGTT_PTE_64K_ENTRY),
 241         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
 242                         GTT_TYPE_PPGTT_PDE_ENTRY,
 243                         GTT_TYPE_PPGTT_PDE_PT,
 244                         GTT_TYPE_INVALID,
 245                         GTT_TYPE_PPGTT_PTE_2M_ENTRY),
 246         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
 247                         GTT_TYPE_PPGTT_PDP_ENTRY,
 248                         GTT_TYPE_PPGTT_PDP_PT,
 249                         GTT_TYPE_INVALID,
 250                         GTT_TYPE_PPGTT_PTE_1G_ENTRY),
 251         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
 252                         GTT_TYPE_GGTT_PTE,
 253                         GTT_TYPE_INVALID,
 254                         GTT_TYPE_INVALID,
 255                         GTT_TYPE_INVALID),
 256 };
 257 
 258 static inline int get_next_pt_type(int type)
 259 {
 260         return gtt_type_table[type].next_pt_type;
 261 }
 262 
 263 static inline int get_pt_type(int type)
 264 {
 265         return gtt_type_table[type].pt_type;
 266 }
 267 
 268 static inline int get_entry_type(int type)
 269 {
 270         return gtt_type_table[type].entry_type;
 271 }
 272 
 273 static inline int get_pse_type(int type)
 274 {
 275         return gtt_type_table[type].pse_entry_type;
 276 }
 277 
 278 static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index)
 279 {
 280         void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
 281 
 282         return readq(addr);
 283 }
 284 
 285 static void ggtt_invalidate(struct drm_i915_private *dev_priv)
 286 {
 287         mmio_hw_access_pre(dev_priv);
 288         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 289         mmio_hw_access_post(dev_priv);
 290 }
 291 
 292 static void write_pte64(struct drm_i915_private *dev_priv,
 293                 unsigned long index, u64 pte)
 294 {
 295         void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
 296 
 297         writeq(pte, addr);
 298 }
 299 
 300 static inline int gtt_get_entry64(void *pt,
 301                 struct intel_gvt_gtt_entry *e,
 302                 unsigned long index, bool hypervisor_access, unsigned long gpa,
 303                 struct intel_vgpu *vgpu)
 304 {
 305         const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
 306         int ret;
 307 
 308         if (WARN_ON(info->gtt_entry_size != 8))
 309                 return -EINVAL;
 310 
 311         if (hypervisor_access) {
 312                 ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa +
 313                                 (index << info->gtt_entry_size_shift),
 314                                 &e->val64, 8);
 315                 if (WARN_ON(ret))
 316                         return ret;
 317         } else if (!pt) {
 318                 e->val64 = read_pte64(vgpu->gvt->dev_priv, index);
 319         } else {
 320                 e->val64 = *((u64 *)pt + index);
 321         }
 322         return 0;
 323 }
 324 
 325 static inline int gtt_set_entry64(void *pt,
 326                 struct intel_gvt_gtt_entry *e,
 327                 unsigned long index, bool hypervisor_access, unsigned long gpa,
 328                 struct intel_vgpu *vgpu)
 329 {
 330         const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
 331         int ret;
 332 
 333         if (WARN_ON(info->gtt_entry_size != 8))
 334                 return -EINVAL;
 335 
 336         if (hypervisor_access) {
 337                 ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa +
 338                                 (index << info->gtt_entry_size_shift),
 339                                 &e->val64, 8);
 340                 if (WARN_ON(ret))
 341                         return ret;
 342         } else if (!pt) {
 343                 write_pte64(vgpu->gvt->dev_priv, index, e->val64);
 344         } else {
 345                 *((u64 *)pt + index) = e->val64;
 346         }
 347         return 0;
 348 }
 349 
 350 #define GTT_HAW 46
 351 
 352 #define ADDR_1G_MASK    GENMASK_ULL(GTT_HAW - 1, 30)
 353 #define ADDR_2M_MASK    GENMASK_ULL(GTT_HAW - 1, 21)
 354 #define ADDR_64K_MASK   GENMASK_ULL(GTT_HAW - 1, 16)
 355 #define ADDR_4K_MASK    GENMASK_ULL(GTT_HAW - 1, 12)
 356 
 357 #define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52)
 358 #define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */
 359 
 360 #define GTT_64K_PTE_STRIDE 16
 361 
 362 static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
 363 {
 364         unsigned long pfn;
 365 
 366         if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
 367                 pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
 368         else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
 369                 pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
 370         else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY)
 371                 pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT;
 372         else
 373                 pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
 374         return pfn;
 375 }
 376 
 377 static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
 378 {
 379         if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
 380                 e->val64 &= ~ADDR_1G_MASK;
 381                 pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
 382         } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
 383                 e->val64 &= ~ADDR_2M_MASK;
 384                 pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
 385         } else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) {
 386                 e->val64 &= ~ADDR_64K_MASK;
 387                 pfn &= (ADDR_64K_MASK >> PAGE_SHIFT);
 388         } else {
 389                 e->val64 &= ~ADDR_4K_MASK;
 390                 pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
 391         }
 392 
 393         e->val64 |= (pfn << PAGE_SHIFT);
 394 }
 395 
 396 static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
 397 {
 398         return !!(e->val64 & _PAGE_PSE);
 399 }
 400 
 401 static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e)
 402 {
 403         if (gen8_gtt_test_pse(e)) {
 404                 switch (e->type) {
 405                 case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
 406                         e->val64 &= ~_PAGE_PSE;
 407                         e->type = GTT_TYPE_PPGTT_PDE_ENTRY;
 408                         break;
 409                 case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
 410                         e->type = GTT_TYPE_PPGTT_PDP_ENTRY;
 411                         e->val64 &= ~_PAGE_PSE;
 412                         break;
 413                 default:
 414                         WARN_ON(1);
 415                 }
 416         }
 417 }
 418 
 419 static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e)
 420 {
 421         if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
 422                 return false;
 423 
 424         return !!(e->val64 & GEN8_PDE_IPS_64K);
 425 }
 426 
 427 static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e)
 428 {
 429         if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
 430                 return;
 431 
 432         e->val64 &= ~GEN8_PDE_IPS_64K;
 433 }
 434 
 435 static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
 436 {
 437         /*
 438          * i915 writes PDP root pointer registers without present bit,
 439          * it also works, so we need to treat root pointer entry
 440          * specifically.
 441          */
 442         if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
 443                         || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
 444                 return (e->val64 != 0);
 445         else
 446                 return (e->val64 & _PAGE_PRESENT);
 447 }
 448 
 449 static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
 450 {
 451         e->val64 &= ~_PAGE_PRESENT;
 452 }
 453 
 454 static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
 455 {
 456         e->val64 |= _PAGE_PRESENT;
 457 }
 458 
 459 static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e)
 460 {
 461         return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED);
 462 }
 463 
 464 static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e)
 465 {
 466         e->val64 |= GTT_SPTE_FLAG_64K_SPLITED;
 467 }
 468 
 469 static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e)
 470 {
 471         e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED;
 472 }
 473 
 474 /*
 475  * Per-platform GMA routines.
 476  */
 477 static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
 478 {
 479         unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
 480 
 481         trace_gma_index(__func__, gma, x);
 482         return x;
 483 }
 484 
 485 #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
 486 static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
 487 { \
 488         unsigned long x = (exp); \
 489         trace_gma_index(__func__, gma, x); \
 490         return x; \
 491 }
 492 
 493 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
 494 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
 495 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
 496 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
 497 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
 498 
 499 static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
 500         .get_entry = gtt_get_entry64,
 501         .set_entry = gtt_set_entry64,
 502         .clear_present = gtt_entry_clear_present,
 503         .set_present = gtt_entry_set_present,
 504         .test_present = gen8_gtt_test_present,
 505         .test_pse = gen8_gtt_test_pse,
 506         .clear_pse = gen8_gtt_clear_pse,
 507         .clear_ips = gen8_gtt_clear_ips,
 508         .test_ips = gen8_gtt_test_ips,
 509         .clear_64k_splited = gen8_gtt_clear_64k_splited,
 510         .set_64k_splited = gen8_gtt_set_64k_splited,
 511         .test_64k_splited = gen8_gtt_test_64k_splited,
 512         .get_pfn = gen8_gtt_get_pfn,
 513         .set_pfn = gen8_gtt_set_pfn,
 514 };
 515 
 516 static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
 517         .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
 518         .gma_to_pte_index = gen8_gma_to_pte_index,
 519         .gma_to_pde_index = gen8_gma_to_pde_index,
 520         .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
 521         .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
 522         .gma_to_pml4_index = gen8_gma_to_pml4_index,
 523 };
 524 
 525 /* Update entry type per pse and ips bit. */
 526 static void update_entry_type_for_real(struct intel_gvt_gtt_pte_ops *pte_ops,
 527         struct intel_gvt_gtt_entry *entry, bool ips)
 528 {
 529         switch (entry->type) {
 530         case GTT_TYPE_PPGTT_PDE_ENTRY:
 531         case GTT_TYPE_PPGTT_PDP_ENTRY:
 532                 if (pte_ops->test_pse(entry))
 533                         entry->type = get_pse_type(entry->type);
 534                 break;
 535         case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
 536                 if (ips)
 537                         entry->type = get_pse_type(entry->type);
 538                 break;
 539         default:
 540                 GEM_BUG_ON(!gtt_type_is_entry(entry->type));
 541         }
 542 
 543         GEM_BUG_ON(entry->type == GTT_TYPE_INVALID);
 544 }
 545 
 546 /*
 547  * MM helpers.
 548  */
 549 static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
 550                 struct intel_gvt_gtt_entry *entry, unsigned long index,
 551                 bool guest)
 552 {
 553         struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
 554 
 555         GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
 556 
 557         entry->type = mm->ppgtt_mm.root_entry_type;
 558         pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
 559                            mm->ppgtt_mm.shadow_pdps,
 560                            entry, index, false, 0, mm->vgpu);
 561         update_entry_type_for_real(pte_ops, entry, false);
 562 }
 563 
 564 static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
 565                 struct intel_gvt_gtt_entry *entry, unsigned long index)
 566 {
 567         _ppgtt_get_root_entry(mm, entry, index, true);
 568 }
 569 
 570 static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
 571                 struct intel_gvt_gtt_entry *entry, unsigned long index)
 572 {
 573         _ppgtt_get_root_entry(mm, entry, index, false);
 574 }
 575 
 576 static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
 577                 struct intel_gvt_gtt_entry *entry, unsigned long index,
 578                 bool guest)
 579 {
 580         struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
 581 
 582         pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
 583                            mm->ppgtt_mm.shadow_pdps,
 584                            entry, index, false, 0, mm->vgpu);
 585 }
 586 
 587 static inline void ppgtt_set_guest_root_entry(struct intel_vgpu_mm *mm,
 588                 struct intel_gvt_gtt_entry *entry, unsigned long index)
 589 {
 590         _ppgtt_set_root_entry(mm, entry, index, true);
 591 }
 592 
 593 static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
 594                 struct intel_gvt_gtt_entry *entry, unsigned long index)
 595 {
 596         _ppgtt_set_root_entry(mm, entry, index, false);
 597 }
 598 
 599 static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
 600                 struct intel_gvt_gtt_entry *entry, unsigned long index)
 601 {
 602         struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
 603 
 604         GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
 605 
 606         entry->type = GTT_TYPE_GGTT_PTE;
 607         pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
 608                            false, 0, mm->vgpu);
 609 }
 610 
 611 static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
 612                 struct intel_gvt_gtt_entry *entry, unsigned long index)
 613 {
 614         struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
 615 
 616         GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
 617 
 618         pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
 619                            false, 0, mm->vgpu);
 620 }
 621 
 622 static void ggtt_get_host_entry(struct intel_vgpu_mm *mm,
 623                 struct intel_gvt_gtt_entry *entry, unsigned long index)
 624 {
 625         struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
 626 
 627         GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
 628 
 629         pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu);
 630 }
 631 
 632 static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
 633                 struct intel_gvt_gtt_entry *entry, unsigned long index)
 634 {
 635         struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
 636 
 637         GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
 638 
 639         pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
 640 }
 641 
 642 /*
 643  * PPGTT shadow page table helpers.
 644  */
 645 static inline int ppgtt_spt_get_entry(
 646                 struct intel_vgpu_ppgtt_spt *spt,
 647                 void *page_table, int type,
 648                 struct intel_gvt_gtt_entry *e, unsigned long index,
 649                 bool guest)
 650 {
 651         struct intel_gvt *gvt = spt->vgpu->gvt;
 652         struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
 653         int ret;
 654 
 655         e->type = get_entry_type(type);
 656 
 657         if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
 658                 return -EINVAL;
 659 
 660         ret = ops->get_entry(page_table, e, index, guest,
 661                         spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
 662                         spt->vgpu);
 663         if (ret)
 664                 return ret;
 665 
 666         update_entry_type_for_real(ops, e, guest ?
 667                                    spt->guest_page.pde_ips : false);
 668 
 669         gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
 670                     type, e->type, index, e->val64);
 671         return 0;
 672 }
 673 
 674 static inline int ppgtt_spt_set_entry(
 675                 struct intel_vgpu_ppgtt_spt *spt,
 676                 void *page_table, int type,
 677                 struct intel_gvt_gtt_entry *e, unsigned long index,
 678                 bool guest)
 679 {
 680         struct intel_gvt *gvt = spt->vgpu->gvt;
 681         struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
 682 
 683         if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
 684                 return -EINVAL;
 685 
 686         gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
 687                     type, e->type, index, e->val64);
 688 
 689         return ops->set_entry(page_table, e, index, guest,
 690                         spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
 691                         spt->vgpu);
 692 }
 693 
 694 #define ppgtt_get_guest_entry(spt, e, index) \
 695         ppgtt_spt_get_entry(spt, NULL, \
 696                 spt->guest_page.type, e, index, true)
 697 
 698 #define ppgtt_set_guest_entry(spt, e, index) \
 699         ppgtt_spt_set_entry(spt, NULL, \
 700                 spt->guest_page.type, e, index, true)
 701 
 702 #define ppgtt_get_shadow_entry(spt, e, index) \
 703         ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
 704                 spt->shadow_page.type, e, index, false)
 705 
 706 #define ppgtt_set_shadow_entry(spt, e, index) \
 707         ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
 708                 spt->shadow_page.type, e, index, false)
 709 
 710 static void *alloc_spt(gfp_t gfp_mask)
 711 {
 712         struct intel_vgpu_ppgtt_spt *spt;
 713 
 714         spt = kzalloc(sizeof(*spt), gfp_mask);
 715         if (!spt)
 716                 return NULL;
 717 
 718         spt->shadow_page.page = alloc_page(gfp_mask);
 719         if (!spt->shadow_page.page) {
 720                 kfree(spt);
 721                 return NULL;
 722         }
 723         return spt;
 724 }
 725 
 726 static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
 727 {
 728         __free_page(spt->shadow_page.page);
 729         kfree(spt);
 730 }
 731 
 732 static int detach_oos_page(struct intel_vgpu *vgpu,
 733                 struct intel_vgpu_oos_page *oos_page);
 734 
 735 static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
 736 {
 737         struct device *kdev = &spt->vgpu->gvt->dev_priv->drm.pdev->dev;
 738 
 739         trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type);
 740 
 741         dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
 742                        PCI_DMA_BIDIRECTIONAL);
 743 
 744         radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
 745 
 746         if (spt->guest_page.gfn) {
 747                 if (spt->guest_page.oos_page)
 748                         detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
 749 
 750                 intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
 751         }
 752 
 753         list_del_init(&spt->post_shadow_list);
 754         free_spt(spt);
 755 }
 756 
 757 static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
 758 {
 759         struct intel_vgpu_ppgtt_spt *spt, *spn;
 760         struct radix_tree_iter iter;
 761         LIST_HEAD(all_spt);
 762         void __rcu **slot;
 763 
 764         rcu_read_lock();
 765         radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
 766                 spt = radix_tree_deref_slot(slot);
 767                 list_move(&spt->post_shadow_list, &all_spt);
 768         }
 769         rcu_read_unlock();
 770 
 771         list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list)
 772                 ppgtt_free_spt(spt);
 773 }
 774 
 775 static int ppgtt_handle_guest_write_page_table_bytes(
 776                 struct intel_vgpu_ppgtt_spt *spt,
 777                 u64 pa, void *p_data, int bytes);
 778 
 779 static int ppgtt_write_protection_handler(
 780                 struct intel_vgpu_page_track *page_track,
 781                 u64 gpa, void *data, int bytes)
 782 {
 783         struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
 784 
 785         int ret;
 786 
 787         if (bytes != 4 && bytes != 8)
 788                 return -EINVAL;
 789 
 790         ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
 791         if (ret)
 792                 return ret;
 793         return ret;
 794 }
 795 
 796 /* Find a spt by guest gfn. */
 797 static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
 798                 struct intel_vgpu *vgpu, unsigned long gfn)
 799 {
 800         struct intel_vgpu_page_track *track;
 801 
 802         track = intel_vgpu_find_page_track(vgpu, gfn);
 803         if (track && track->handler == ppgtt_write_protection_handler)
 804                 return track->priv_data;
 805 
 806         return NULL;
 807 }
 808 
 809 /* Find the spt by shadow page mfn. */
 810 static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
 811                 struct intel_vgpu *vgpu, unsigned long mfn)
 812 {
 813         return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
 814 }
 815 
 816 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
 817 
 818 /* Allocate shadow page table without guest page. */
 819 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
 820                 struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
 821 {
 822         struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
 823         struct intel_vgpu_ppgtt_spt *spt = NULL;
 824         dma_addr_t daddr;
 825         int ret;
 826 
 827 retry:
 828         spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
 829         if (!spt) {
 830                 if (reclaim_one_ppgtt_mm(vgpu->gvt))
 831                         goto retry;
 832 
 833                 gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
 834                 return ERR_PTR(-ENOMEM);
 835         }
 836 
 837         spt->vgpu = vgpu;
 838         atomic_set(&spt->refcount, 1);
 839         INIT_LIST_HEAD(&spt->post_shadow_list);
 840 
 841         /*
 842          * Init shadow_page.
 843          */
 844         spt->shadow_page.type = type;
 845         daddr = dma_map_page(kdev, spt->shadow_page.page,
 846                              0, 4096, PCI_DMA_BIDIRECTIONAL);
 847         if (dma_mapping_error(kdev, daddr)) {
 848                 gvt_vgpu_err("fail to map dma addr\n");
 849                 ret = -EINVAL;
 850                 goto err_free_spt;
 851         }
 852         spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
 853         spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
 854 
 855         ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
 856         if (ret)
 857                 goto err_unmap_dma;
 858 
 859         return spt;
 860 
 861 err_unmap_dma:
 862         dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 863 err_free_spt:
 864         free_spt(spt);
 865         return ERR_PTR(ret);
 866 }
 867 
 868 /* Allocate shadow page table associated with specific gfn. */
 869 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
 870                 struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
 871                 unsigned long gfn, bool guest_pde_ips)
 872 {
 873         struct intel_vgpu_ppgtt_spt *spt;
 874         int ret;
 875 
 876         spt = ppgtt_alloc_spt(vgpu, type);
 877         if (IS_ERR(spt))
 878                 return spt;
 879 
 880         /*
 881          * Init guest_page.
 882          */
 883         ret = intel_vgpu_register_page_track(vgpu, gfn,
 884                         ppgtt_write_protection_handler, spt);
 885         if (ret) {
 886                 ppgtt_free_spt(spt);
 887                 return ERR_PTR(ret);
 888         }
 889 
 890         spt->guest_page.type = type;
 891         spt->guest_page.gfn = gfn;
 892         spt->guest_page.pde_ips = guest_pde_ips;
 893 
 894         trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
 895 
 896         return spt;
 897 }
 898 
 899 #define pt_entry_size_shift(spt) \
 900         ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
 901 
 902 #define pt_entries(spt) \
 903         (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
 904 
 905 #define for_each_present_guest_entry(spt, e, i) \
 906         for (i = 0; i < pt_entries(spt); \
 907              i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
 908                 if (!ppgtt_get_guest_entry(spt, e, i) && \
 909                     spt->vgpu->gvt->gtt.pte_ops->test_present(e))
 910 
 911 #define for_each_present_shadow_entry(spt, e, i) \
 912         for (i = 0; i < pt_entries(spt); \
 913              i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
 914                 if (!ppgtt_get_shadow_entry(spt, e, i) && \
 915                     spt->vgpu->gvt->gtt.pte_ops->test_present(e))
 916 
 917 #define for_each_shadow_entry(spt, e, i) \
 918         for (i = 0; i < pt_entries(spt); \
 919              i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \
 920                 if (!ppgtt_get_shadow_entry(spt, e, i))
 921 
 922 static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
 923 {
 924         int v = atomic_read(&spt->refcount);
 925 
 926         trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
 927         atomic_inc(&spt->refcount);
 928 }
 929 
 930 static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
 931 {
 932         int v = atomic_read(&spt->refcount);
 933 
 934         trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
 935         return atomic_dec_return(&spt->refcount);
 936 }
 937 
 938 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
 939 
 940 static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
 941                 struct intel_gvt_gtt_entry *e)
 942 {
 943         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
 944         struct intel_vgpu_ppgtt_spt *s;
 945         enum intel_gvt_gtt_type cur_pt_type;
 946 
 947         GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
 948 
 949         if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
 950                 && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
 951                 cur_pt_type = get_next_pt_type(e->type);
 952 
 953                 if (!gtt_type_is_pt(cur_pt_type) ||
 954                                 !gtt_type_is_pt(cur_pt_type + 1)) {
 955                         WARN(1, "Invalid page table type, cur_pt_type is: %d\n", cur_pt_type);
 956                         return -EINVAL;
 957                 }
 958 
 959                 cur_pt_type += 1;
 960 
 961                 if (ops->get_pfn(e) ==
 962                         vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
 963                         return 0;
 964         }
 965         s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
 966         if (!s) {
 967                 gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
 968                                 ops->get_pfn(e));
 969                 return -ENXIO;
 970         }
 971         return ppgtt_invalidate_spt(s);
 972 }
 973 
 974 static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
 975                 struct intel_gvt_gtt_entry *entry)
 976 {
 977         struct intel_vgpu *vgpu = spt->vgpu;
 978         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
 979         unsigned long pfn;
 980         int type;
 981 
 982         pfn = ops->get_pfn(entry);
 983         type = spt->shadow_page.type;
 984 
 985         /* Uninitialized spte or unshadowed spte. */
 986         if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn)
 987                 return;
 988 
 989         intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
 990 }
 991 
 992 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
 993 {
 994         struct intel_vgpu *vgpu = spt->vgpu;
 995         struct intel_gvt_gtt_entry e;
 996         unsigned long index;
 997         int ret;
 998 
 999         trace_spt_change(spt->vgpu->id, "die", spt,
1000                         spt->guest_page.gfn, spt->shadow_page.type);
1001 
1002         if (ppgtt_put_spt(spt) > 0)
1003                 return 0;
1004 
1005         for_each_present_shadow_entry(spt, &e, index) {
1006                 switch (e.type) {
1007                 case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
1008                         gvt_vdbg_mm("invalidate 4K entry\n");
1009                         ppgtt_invalidate_pte(spt, &e);
1010                         break;
1011                 case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
1012                         /* We don't setup 64K shadow entry so far. */
1013                         WARN(1, "suspicious 64K gtt entry\n");
1014                         continue;
1015                 case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
1016                         gvt_vdbg_mm("invalidate 2M entry\n");
1017                         continue;
1018                 case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
1019                         WARN(1, "GVT doesn't support 1GB page\n");
1020                         continue;
1021                 case GTT_TYPE_PPGTT_PML4_ENTRY:
1022                 case GTT_TYPE_PPGTT_PDP_ENTRY:
1023                 case GTT_TYPE_PPGTT_PDE_ENTRY:
1024                         gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
1025                         ret = ppgtt_invalidate_spt_by_shadow_entry(
1026                                         spt->vgpu, &e);
1027                         if (ret)
1028                                 goto fail;
1029                         break;
1030                 default:
1031                         GEM_BUG_ON(1);
1032                 }
1033         }
1034 
1035         trace_spt_change(spt->vgpu->id, "release", spt,
1036                          spt->guest_page.gfn, spt->shadow_page.type);
1037         ppgtt_free_spt(spt);
1038         return 0;
1039 fail:
1040         gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
1041                         spt, e.val64, e.type);
1042         return ret;
1043 }
1044 
1045 static bool vgpu_ips_enabled(struct intel_vgpu *vgpu)
1046 {
1047         struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
1048 
1049         if (INTEL_GEN(dev_priv) == 9 || INTEL_GEN(dev_priv) == 10) {
1050                 u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) &
1051                         GAMW_ECO_ENABLE_64K_IPS_FIELD;
1052 
1053                 return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD;
1054         } else if (INTEL_GEN(dev_priv) >= 11) {
1055                 /* 64K paging only controlled by IPS bit in PTE now. */
1056                 return true;
1057         } else
1058                 return false;
1059 }
1060 
1061 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
1062 
1063 static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
1064                 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
1065 {
1066         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1067         struct intel_vgpu_ppgtt_spt *spt = NULL;
1068         bool ips = false;
1069         int ret;
1070 
1071         GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
1072 
1073         if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1074                 ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
1075 
1076         spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
1077         if (spt) {
1078                 ppgtt_get_spt(spt);
1079 
1080                 if (ips != spt->guest_page.pde_ips) {
1081                         spt->guest_page.pde_ips = ips;
1082 
1083                         gvt_dbg_mm("reshadow PDE since ips changed\n");
1084                         clear_page(spt->shadow_page.vaddr);
1085                         ret = ppgtt_populate_spt(spt);
1086                         if (ret) {
1087                                 ppgtt_put_spt(spt);
1088                                 goto err;
1089                         }
1090                 }
1091         } else {
1092                 int type = get_next_pt_type(we->type);
1093 
1094                 if (!gtt_type_is_pt(type)) {
1095                         ret = -EINVAL;
1096                         goto err;
1097                 }
1098 
1099                 spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
1100                 if (IS_ERR(spt)) {
1101                         ret = PTR_ERR(spt);
1102                         goto err;
1103                 }
1104 
1105                 ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
1106                 if (ret)
1107                         goto err_free_spt;
1108 
1109                 ret = ppgtt_populate_spt(spt);
1110                 if (ret)
1111                         goto err_free_spt;
1112 
1113                 trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
1114                                  spt->shadow_page.type);
1115         }
1116         return spt;
1117 
1118 err_free_spt:
1119         ppgtt_free_spt(spt);
1120         spt = NULL;
1121 err:
1122         gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1123                      spt, we->val64, we->type);
1124         return ERR_PTR(ret);
1125 }
1126 
1127 static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
1128                 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
1129 {
1130         struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
1131 
1132         se->type = ge->type;
1133         se->val64 = ge->val64;
1134 
1135         /* Because we always split 64KB pages, so clear IPS in shadow PDE. */
1136         if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1137                 ops->clear_ips(se);
1138 
1139         ops->set_pfn(se, s->shadow_page.mfn);
1140 }
1141 
1142 /**
1143  * Check if can do 2M page
1144  * @vgpu: target vgpu
1145  * @entry: target pfn's gtt entry
1146  *
1147  * Return 1 if 2MB huge gtt shadowing is possilbe, 0 if miscondition,
1148  * negtive if found err.
1149  */
1150 static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
1151         struct intel_gvt_gtt_entry *entry)
1152 {
1153         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1154         unsigned long pfn;
1155 
1156         if (!HAS_PAGE_SIZES(vgpu->gvt->dev_priv, I915_GTT_PAGE_SIZE_2M))
1157                 return 0;
1158 
1159         pfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, ops->get_pfn(entry));
1160         if (pfn == INTEL_GVT_INVALID_ADDR)
1161                 return -EINVAL;
1162 
1163         return PageTransHuge(pfn_to_page(pfn));
1164 }
1165 
1166 static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
1167         struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1168         struct intel_gvt_gtt_entry *se)
1169 {
1170         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1171         struct intel_vgpu_ppgtt_spt *sub_spt;
1172         struct intel_gvt_gtt_entry sub_se;
1173         unsigned long start_gfn;
1174         dma_addr_t dma_addr;
1175         unsigned long sub_index;
1176         int ret;
1177 
1178         gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index);
1179 
1180         start_gfn = ops->get_pfn(se);
1181 
1182         sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT);
1183         if (IS_ERR(sub_spt))
1184                 return PTR_ERR(sub_spt);
1185 
1186         for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
1187                 ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
1188                                 start_gfn + sub_index, PAGE_SIZE, &dma_addr);
1189                 if (ret) {
1190                         ppgtt_invalidate_spt(spt);
1191                         return ret;
1192                 }
1193                 sub_se.val64 = se->val64;
1194 
1195                 /* Copy the PAT field from PDE. */
1196                 sub_se.val64 &= ~_PAGE_PAT;
1197                 sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5;
1198 
1199                 ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT);
1200                 ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index);
1201         }
1202 
1203         /* Clear dirty field. */
1204         se->val64 &= ~_PAGE_DIRTY;
1205 
1206         ops->clear_pse(se);
1207         ops->clear_ips(se);
1208         ops->set_pfn(se, sub_spt->shadow_page.mfn);
1209         ppgtt_set_shadow_entry(spt, se, index);
1210         return 0;
1211 }
1212 
1213 static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
1214         struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1215         struct intel_gvt_gtt_entry *se)
1216 {
1217         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1218         struct intel_gvt_gtt_entry entry = *se;
1219         unsigned long start_gfn;
1220         dma_addr_t dma_addr;
1221         int i, ret;
1222 
1223         gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index);
1224 
1225         GEM_BUG_ON(index % GTT_64K_PTE_STRIDE);
1226 
1227         start_gfn = ops->get_pfn(se);
1228 
1229         entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY;
1230         ops->set_64k_splited(&entry);
1231 
1232         for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1233                 ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
1234                                         start_gfn + i, PAGE_SIZE, &dma_addr);
1235                 if (ret)
1236                         return ret;
1237 
1238                 ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT);
1239                 ppgtt_set_shadow_entry(spt, &entry, index + i);
1240         }
1241         return 0;
1242 }
1243 
1244 static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
1245         struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1246         struct intel_gvt_gtt_entry *ge)
1247 {
1248         struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
1249         struct intel_gvt_gtt_entry se = *ge;
1250         unsigned long gfn, page_size = PAGE_SIZE;
1251         dma_addr_t dma_addr;
1252         int ret;
1253 
1254         if (!pte_ops->test_present(ge))
1255                 return 0;
1256 
1257         gfn = pte_ops->get_pfn(ge);
1258 
1259         switch (ge->type) {
1260         case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
1261                 gvt_vdbg_mm("shadow 4K gtt entry\n");
1262                 break;
1263         case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
1264                 gvt_vdbg_mm("shadow 64K gtt entry\n");
1265                 /*
1266                  * The layout of 64K page is special, the page size is
1267                  * controlled by uper PDE. To be simple, we always split
1268                  * 64K page to smaller 4K pages in shadow PT.
1269                  */
1270                 return split_64KB_gtt_entry(vgpu, spt, index, &se);
1271         case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
1272                 gvt_vdbg_mm("shadow 2M gtt entry\n");
1273                 ret = is_2MB_gtt_possible(vgpu, ge);
1274                 if (ret == 0)
1275                         return split_2MB_gtt_entry(vgpu, spt, index, &se);
1276                 else if (ret < 0)
1277                         return ret;
1278                 page_size = I915_GTT_PAGE_SIZE_2M;
1279                 break;
1280         case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
1281                 gvt_vgpu_err("GVT doesn't support 1GB entry\n");
1282                 return -EINVAL;
1283         default:
1284                 GEM_BUG_ON(1);
1285         };
1286 
1287         /* direct shadow */
1288         ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, page_size,
1289                                                       &dma_addr);
1290         if (ret)
1291                 return -ENXIO;
1292 
1293         pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
1294         ppgtt_set_shadow_entry(spt, &se, index);
1295         return 0;
1296 }
1297 
1298 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
1299 {
1300         struct intel_vgpu *vgpu = spt->vgpu;
1301         struct intel_gvt *gvt = vgpu->gvt;
1302         struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1303         struct intel_vgpu_ppgtt_spt *s;
1304         struct intel_gvt_gtt_entry se, ge;
1305         unsigned long gfn, i;
1306         int ret;
1307 
1308         trace_spt_change(spt->vgpu->id, "born", spt,
1309                          spt->guest_page.gfn, spt->shadow_page.type);
1310 
1311         for_each_present_guest_entry(spt, &ge, i) {
1312                 if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
1313                         s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
1314                         if (IS_ERR(s)) {
1315                                 ret = PTR_ERR(s);
1316                                 goto fail;
1317                         }
1318                         ppgtt_get_shadow_entry(spt, &se, i);
1319                         ppgtt_generate_shadow_entry(&se, s, &ge);
1320                         ppgtt_set_shadow_entry(spt, &se, i);
1321                 } else {
1322                         gfn = ops->get_pfn(&ge);
1323                         if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
1324                                 ops->set_pfn(&se, gvt->gtt.scratch_mfn);
1325                                 ppgtt_set_shadow_entry(spt, &se, i);
1326                                 continue;
1327                         }
1328 
1329                         ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
1330                         if (ret)
1331                                 goto fail;
1332                 }
1333         }
1334         return 0;
1335 fail:
1336         gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1337                         spt, ge.val64, ge.type);
1338         return ret;
1339 }
1340 
1341 static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
1342                 struct intel_gvt_gtt_entry *se, unsigned long index)
1343 {
1344         struct intel_vgpu *vgpu = spt->vgpu;
1345         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1346         int ret;
1347 
1348         trace_spt_guest_change(spt->vgpu->id, "remove", spt,
1349                                spt->shadow_page.type, se->val64, index);
1350 
1351         gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
1352                     se->type, index, se->val64);
1353 
1354         if (!ops->test_present(se))
1355                 return 0;
1356 
1357         if (ops->get_pfn(se) ==
1358             vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
1359                 return 0;
1360 
1361         if (gtt_type_is_pt(get_next_pt_type(se->type))) {
1362                 struct intel_vgpu_ppgtt_spt *s =
1363                         intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
1364                 if (!s) {
1365                         gvt_vgpu_err("fail to find guest page\n");
1366                         ret = -ENXIO;
1367                         goto fail;
1368                 }
1369                 ret = ppgtt_invalidate_spt(s);
1370                 if (ret)
1371                         goto fail;
1372         } else {
1373                 /* We don't setup 64K shadow entry so far. */
1374                 WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY,
1375                      "suspicious 64K entry\n");
1376                 ppgtt_invalidate_pte(spt, se);
1377         }
1378 
1379         return 0;
1380 fail:
1381         gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1382                         spt, se->val64, se->type);
1383         return ret;
1384 }
1385 
1386 static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
1387                 struct intel_gvt_gtt_entry *we, unsigned long index)
1388 {
1389         struct intel_vgpu *vgpu = spt->vgpu;
1390         struct intel_gvt_gtt_entry m;
1391         struct intel_vgpu_ppgtt_spt *s;
1392         int ret;
1393 
1394         trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type,
1395                                we->val64, index);
1396 
1397         gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
1398                     we->type, index, we->val64);
1399 
1400         if (gtt_type_is_pt(get_next_pt_type(we->type))) {
1401                 s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
1402                 if (IS_ERR(s)) {
1403                         ret = PTR_ERR(s);
1404                         goto fail;
1405                 }
1406                 ppgtt_get_shadow_entry(spt, &m, index);
1407                 ppgtt_generate_shadow_entry(&m, s, we);
1408                 ppgtt_set_shadow_entry(spt, &m, index);
1409         } else {
1410                 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we);
1411                 if (ret)
1412                         goto fail;
1413         }
1414         return 0;
1415 fail:
1416         gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
1417                 spt, we->val64, we->type);
1418         return ret;
1419 }
1420 
1421 static int sync_oos_page(struct intel_vgpu *vgpu,
1422                 struct intel_vgpu_oos_page *oos_page)
1423 {
1424         const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1425         struct intel_gvt *gvt = vgpu->gvt;
1426         struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1427         struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1428         struct intel_gvt_gtt_entry old, new;
1429         int index;
1430         int ret;
1431 
1432         trace_oos_change(vgpu->id, "sync", oos_page->id,
1433                          spt, spt->guest_page.type);
1434 
1435         old.type = new.type = get_entry_type(spt->guest_page.type);
1436         old.val64 = new.val64 = 0;
1437 
1438         for (index = 0; index < (I915_GTT_PAGE_SIZE >>
1439                                 info->gtt_entry_size_shift); index++) {
1440                 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
1441                 ops->get_entry(NULL, &new, index, true,
1442                                spt->guest_page.gfn << PAGE_SHIFT, vgpu);
1443 
1444                 if (old.val64 == new.val64
1445                         && !test_and_clear_bit(index, spt->post_shadow_bitmap))
1446                         continue;
1447 
1448                 trace_oos_sync(vgpu->id, oos_page->id,
1449                                 spt, spt->guest_page.type,
1450                                 new.val64, index);
1451 
1452                 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new);
1453                 if (ret)
1454                         return ret;
1455 
1456                 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
1457         }
1458 
1459         spt->guest_page.write_cnt = 0;
1460         list_del_init(&spt->post_shadow_list);
1461         return 0;
1462 }
1463 
1464 static int detach_oos_page(struct intel_vgpu *vgpu,
1465                 struct intel_vgpu_oos_page *oos_page)
1466 {
1467         struct intel_gvt *gvt = vgpu->gvt;
1468         struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1469 
1470         trace_oos_change(vgpu->id, "detach", oos_page->id,
1471                          spt, spt->guest_page.type);
1472 
1473         spt->guest_page.write_cnt = 0;
1474         spt->guest_page.oos_page = NULL;
1475         oos_page->spt = NULL;
1476 
1477         list_del_init(&oos_page->vm_list);
1478         list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
1479 
1480         return 0;
1481 }
1482 
1483 static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
1484                 struct intel_vgpu_ppgtt_spt *spt)
1485 {
1486         struct intel_gvt *gvt = spt->vgpu->gvt;
1487         int ret;
1488 
1489         ret = intel_gvt_hypervisor_read_gpa(spt->vgpu,
1490                         spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
1491                         oos_page->mem, I915_GTT_PAGE_SIZE);
1492         if (ret)
1493                 return ret;
1494 
1495         oos_page->spt = spt;
1496         spt->guest_page.oos_page = oos_page;
1497 
1498         list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
1499 
1500         trace_oos_change(spt->vgpu->id, "attach", oos_page->id,
1501                          spt, spt->guest_page.type);
1502         return 0;
1503 }
1504 
1505 static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
1506 {
1507         struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1508         int ret;
1509 
1510         ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn);
1511         if (ret)
1512                 return ret;
1513 
1514         trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id,
1515                          spt, spt->guest_page.type);
1516 
1517         list_del_init(&oos_page->vm_list);
1518         return sync_oos_page(spt->vgpu, oos_page);
1519 }
1520 
1521 static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
1522 {
1523         struct intel_gvt *gvt = spt->vgpu->gvt;
1524         struct intel_gvt_gtt *gtt = &gvt->gtt;
1525         struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1526         int ret;
1527 
1528         WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
1529 
1530         if (list_empty(&gtt->oos_page_free_list_head)) {
1531                 oos_page = container_of(gtt->oos_page_use_list_head.next,
1532                         struct intel_vgpu_oos_page, list);
1533                 ret = ppgtt_set_guest_page_sync(oos_page->spt);
1534                 if (ret)
1535                         return ret;
1536                 ret = detach_oos_page(spt->vgpu, oos_page);
1537                 if (ret)
1538                         return ret;
1539         } else
1540                 oos_page = container_of(gtt->oos_page_free_list_head.next,
1541                         struct intel_vgpu_oos_page, list);
1542         return attach_oos_page(oos_page, spt);
1543 }
1544 
1545 static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
1546 {
1547         struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1548 
1549         if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
1550                 return -EINVAL;
1551 
1552         trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id,
1553                          spt, spt->guest_page.type);
1554 
1555         list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head);
1556         return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn);
1557 }
1558 
1559 /**
1560  * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
1561  * @vgpu: a vGPU
1562  *
1563  * This function is called before submitting a guest workload to host,
1564  * to sync all the out-of-synced shadow for vGPU
1565  *
1566  * Returns:
1567  * Zero on success, negative error code if failed.
1568  */
1569 int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
1570 {
1571         struct list_head *pos, *n;
1572         struct intel_vgpu_oos_page *oos_page;
1573         int ret;
1574 
1575         if (!enable_out_of_sync)
1576                 return 0;
1577 
1578         list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
1579                 oos_page = container_of(pos,
1580                                 struct intel_vgpu_oos_page, vm_list);
1581                 ret = ppgtt_set_guest_page_sync(oos_page->spt);
1582                 if (ret)
1583                         return ret;
1584         }
1585         return 0;
1586 }
1587 
1588 /*
1589  * The heart of PPGTT shadow page table.
1590  */
1591 static int ppgtt_handle_guest_write_page_table(
1592                 struct intel_vgpu_ppgtt_spt *spt,
1593                 struct intel_gvt_gtt_entry *we, unsigned long index)
1594 {
1595         struct intel_vgpu *vgpu = spt->vgpu;
1596         int type = spt->shadow_page.type;
1597         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1598         struct intel_gvt_gtt_entry old_se;
1599         int new_present;
1600         int i, ret;
1601 
1602         new_present = ops->test_present(we);
1603 
1604         /*
1605          * Adding the new entry first and then removing the old one, that can
1606          * guarantee the ppgtt table is validated during the window between
1607          * adding and removal.
1608          */
1609         ppgtt_get_shadow_entry(spt, &old_se, index);
1610 
1611         if (new_present) {
1612                 ret = ppgtt_handle_guest_entry_add(spt, we, index);
1613                 if (ret)
1614                         goto fail;
1615         }
1616 
1617         ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index);
1618         if (ret)
1619                 goto fail;
1620 
1621         if (!new_present) {
1622                 /* For 64KB splited entries, we need clear them all. */
1623                 if (ops->test_64k_splited(&old_se) &&
1624                     !(index % GTT_64K_PTE_STRIDE)) {
1625                         gvt_vdbg_mm("remove splited 64K shadow entries\n");
1626                         for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1627                                 ops->clear_64k_splited(&old_se);
1628                                 ops->set_pfn(&old_se,
1629                                         vgpu->gtt.scratch_pt[type].page_mfn);
1630                                 ppgtt_set_shadow_entry(spt, &old_se, index + i);
1631                         }
1632                 } else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY ||
1633                            old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
1634                         ops->clear_pse(&old_se);
1635                         ops->set_pfn(&old_se,
1636                                      vgpu->gtt.scratch_pt[type].page_mfn);
1637                         ppgtt_set_shadow_entry(spt, &old_se, index);
1638                 } else {
1639                         ops->set_pfn(&old_se,
1640                                      vgpu->gtt.scratch_pt[type].page_mfn);
1641                         ppgtt_set_shadow_entry(spt, &old_se, index);
1642                 }
1643         }
1644 
1645         return 0;
1646 fail:
1647         gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
1648                         spt, we->val64, we->type);
1649         return ret;
1650 }
1651 
1652 
1653 
1654 static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
1655 {
1656         return enable_out_of_sync
1657                 && gtt_type_is_pte_pt(spt->guest_page.type)
1658                 && spt->guest_page.write_cnt >= 2;
1659 }
1660 
1661 static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
1662                 unsigned long index)
1663 {
1664         set_bit(index, spt->post_shadow_bitmap);
1665         if (!list_empty(&spt->post_shadow_list))
1666                 return;
1667 
1668         list_add_tail(&spt->post_shadow_list,
1669                         &spt->vgpu->gtt.post_shadow_list_head);
1670 }
1671 
1672 /**
1673  * intel_vgpu_flush_post_shadow - flush the post shadow transactions
1674  * @vgpu: a vGPU
1675  *
1676  * This function is called before submitting a guest workload to host,
1677  * to flush all the post shadows for a vGPU.
1678  *
1679  * Returns:
1680  * Zero on success, negative error code if failed.
1681  */
1682 int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
1683 {
1684         struct list_head *pos, *n;
1685         struct intel_vgpu_ppgtt_spt *spt;
1686         struct intel_gvt_gtt_entry ge;
1687         unsigned long index;
1688         int ret;
1689 
1690         list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
1691                 spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
1692                                 post_shadow_list);
1693 
1694                 for_each_set_bit(index, spt->post_shadow_bitmap,
1695                                 GTT_ENTRY_NUM_IN_ONE_PAGE) {
1696                         ppgtt_get_guest_entry(spt, &ge, index);
1697 
1698                         ret = ppgtt_handle_guest_write_page_table(spt,
1699                                                         &ge, index);
1700                         if (ret)
1701                                 return ret;
1702                         clear_bit(index, spt->post_shadow_bitmap);
1703                 }
1704                 list_del_init(&spt->post_shadow_list);
1705         }
1706         return 0;
1707 }
1708 
1709 static int ppgtt_handle_guest_write_page_table_bytes(
1710                 struct intel_vgpu_ppgtt_spt *spt,
1711                 u64 pa, void *p_data, int bytes)
1712 {
1713         struct intel_vgpu *vgpu = spt->vgpu;
1714         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1715         const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1716         struct intel_gvt_gtt_entry we, se;
1717         unsigned long index;
1718         int ret;
1719 
1720         index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
1721 
1722         ppgtt_get_guest_entry(spt, &we, index);
1723 
1724         /*
1725          * For page table which has 64K gtt entry, only PTE#0, PTE#16,
1726          * PTE#32, ... PTE#496 are used. Unused PTEs update should be
1727          * ignored.
1728          */
1729         if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY &&
1730             (index % GTT_64K_PTE_STRIDE)) {
1731                 gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n",
1732                             index);
1733                 return 0;
1734         }
1735 
1736         if (bytes == info->gtt_entry_size) {
1737                 ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
1738                 if (ret)
1739                         return ret;
1740         } else {
1741                 if (!test_bit(index, spt->post_shadow_bitmap)) {
1742                         int type = spt->shadow_page.type;
1743 
1744                         ppgtt_get_shadow_entry(spt, &se, index);
1745                         ret = ppgtt_handle_guest_entry_removal(spt, &se, index);
1746                         if (ret)
1747                                 return ret;
1748                         ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
1749                         ppgtt_set_shadow_entry(spt, &se, index);
1750                 }
1751                 ppgtt_set_post_shadow(spt, index);
1752         }
1753 
1754         if (!enable_out_of_sync)
1755                 return 0;
1756 
1757         spt->guest_page.write_cnt++;
1758 
1759         if (spt->guest_page.oos_page)
1760                 ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
1761                                 false, 0, vgpu);
1762 
1763         if (can_do_out_of_sync(spt)) {
1764                 if (!spt->guest_page.oos_page)
1765                         ppgtt_allocate_oos_page(spt);
1766 
1767                 ret = ppgtt_set_guest_page_oos(spt);
1768                 if (ret < 0)
1769                         return ret;
1770         }
1771         return 0;
1772 }
1773 
1774 static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
1775 {
1776         struct intel_vgpu *vgpu = mm->vgpu;
1777         struct intel_gvt *gvt = vgpu->gvt;
1778         struct intel_gvt_gtt *gtt = &gvt->gtt;
1779         struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1780         struct intel_gvt_gtt_entry se;
1781         int index;
1782 
1783         if (!mm->ppgtt_mm.shadowed)
1784                 return;
1785 
1786         for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
1787                 ppgtt_get_shadow_root_entry(mm, &se, index);
1788 
1789                 if (!ops->test_present(&se))
1790                         continue;
1791 
1792                 ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
1793                 se.val64 = 0;
1794                 ppgtt_set_shadow_root_entry(mm, &se, index);
1795 
1796                 trace_spt_guest_change(vgpu->id, "destroy root pointer",
1797                                        NULL, se.type, se.val64, index);
1798         }
1799 
1800         mm->ppgtt_mm.shadowed = false;
1801 }
1802 
1803 
1804 static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
1805 {
1806         struct intel_vgpu *vgpu = mm->vgpu;
1807         struct intel_gvt *gvt = vgpu->gvt;
1808         struct intel_gvt_gtt *gtt = &gvt->gtt;
1809         struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1810         struct intel_vgpu_ppgtt_spt *spt;
1811         struct intel_gvt_gtt_entry ge, se;
1812         int index, ret;
1813 
1814         if (mm->ppgtt_mm.shadowed)
1815                 return 0;
1816 
1817         mm->ppgtt_mm.shadowed = true;
1818 
1819         for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
1820                 ppgtt_get_guest_root_entry(mm, &ge, index);
1821 
1822                 if (!ops->test_present(&ge))
1823                         continue;
1824 
1825                 trace_spt_guest_change(vgpu->id, __func__, NULL,
1826                                        ge.type, ge.val64, index);
1827 
1828                 spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
1829                 if (IS_ERR(spt)) {
1830                         gvt_vgpu_err("fail to populate guest root pointer\n");
1831                         ret = PTR_ERR(spt);
1832                         goto fail;
1833                 }
1834                 ppgtt_generate_shadow_entry(&se, spt, &ge);
1835                 ppgtt_set_shadow_root_entry(mm, &se, index);
1836 
1837                 trace_spt_guest_change(vgpu->id, "populate root pointer",
1838                                        NULL, se.type, se.val64, index);
1839         }
1840 
1841         return 0;
1842 fail:
1843         invalidate_ppgtt_mm(mm);
1844         return ret;
1845 }
1846 
1847 static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
1848 {
1849         struct intel_vgpu_mm *mm;
1850 
1851         mm = kzalloc(sizeof(*mm), GFP_KERNEL);
1852         if (!mm)
1853                 return NULL;
1854 
1855         mm->vgpu = vgpu;
1856         kref_init(&mm->ref);
1857         atomic_set(&mm->pincount, 0);
1858 
1859         return mm;
1860 }
1861 
1862 static void vgpu_free_mm(struct intel_vgpu_mm *mm)
1863 {
1864         kfree(mm);
1865 }
1866 
1867 /**
1868  * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
1869  * @vgpu: a vGPU
1870  * @root_entry_type: ppgtt root entry type
1871  * @pdps: guest pdps.
1872  *
1873  * This function is used to create a ppgtt mm object for a vGPU.
1874  *
1875  * Returns:
1876  * Zero on success, negative error code in pointer if failed.
1877  */
1878 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
1879                 enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
1880 {
1881         struct intel_gvt *gvt = vgpu->gvt;
1882         struct intel_vgpu_mm *mm;
1883         int ret;
1884 
1885         mm = vgpu_alloc_mm(vgpu);
1886         if (!mm)
1887                 return ERR_PTR(-ENOMEM);
1888 
1889         mm->type = INTEL_GVT_MM_PPGTT;
1890 
1891         GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
1892                    root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
1893         mm->ppgtt_mm.root_entry_type = root_entry_type;
1894 
1895         INIT_LIST_HEAD(&mm->ppgtt_mm.list);
1896         INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
1897 
1898         if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
1899                 mm->ppgtt_mm.guest_pdps[0] = pdps[0];
1900         else
1901                 memcpy(mm->ppgtt_mm.guest_pdps, pdps,
1902                        sizeof(mm->ppgtt_mm.guest_pdps));
1903 
1904         ret = shadow_ppgtt_mm(mm);
1905         if (ret) {
1906                 gvt_vgpu_err("failed to shadow ppgtt mm\n");
1907                 vgpu_free_mm(mm);
1908                 return ERR_PTR(ret);
1909         }
1910 
1911         list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
1912 
1913         mutex_lock(&gvt->gtt.ppgtt_mm_lock);
1914         list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
1915         mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
1916 
1917         return mm;
1918 }
1919 
1920 static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
1921 {
1922         struct intel_vgpu_mm *mm;
1923         unsigned long nr_entries;
1924 
1925         mm = vgpu_alloc_mm(vgpu);
1926         if (!mm)
1927                 return ERR_PTR(-ENOMEM);
1928 
1929         mm->type = INTEL_GVT_MM_GGTT;
1930 
1931         nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
1932         mm->ggtt_mm.virtual_ggtt =
1933                 vzalloc(array_size(nr_entries,
1934                                    vgpu->gvt->device_info.gtt_entry_size));
1935         if (!mm->ggtt_mm.virtual_ggtt) {
1936                 vgpu_free_mm(mm);
1937                 return ERR_PTR(-ENOMEM);
1938         }
1939 
1940         return mm;
1941 }
1942 
1943 /**
1944  * _intel_vgpu_mm_release - destroy a mm object
1945  * @mm_ref: a kref object
1946  *
1947  * This function is used to destroy a mm object for vGPU
1948  *
1949  */
1950 void _intel_vgpu_mm_release(struct kref *mm_ref)
1951 {
1952         struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
1953 
1954         if (GEM_WARN_ON(atomic_read(&mm->pincount)))
1955                 gvt_err("vgpu mm pin count bug detected\n");
1956 
1957         if (mm->type == INTEL_GVT_MM_PPGTT) {
1958                 list_del(&mm->ppgtt_mm.list);
1959 
1960                 mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1961                 list_del(&mm->ppgtt_mm.lru_list);
1962                 mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1963 
1964                 invalidate_ppgtt_mm(mm);
1965         } else {
1966                 vfree(mm->ggtt_mm.virtual_ggtt);
1967         }
1968 
1969         vgpu_free_mm(mm);
1970 }
1971 
1972 /**
1973  * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
1974  * @mm: a vGPU mm object
1975  *
1976  * This function is called when user doesn't want to use a vGPU mm object
1977  */
1978 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
1979 {
1980         atomic_dec_if_positive(&mm->pincount);
1981 }
1982 
1983 /**
1984  * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
1985  * @mm: target vgpu mm
1986  *
1987  * This function is called when user wants to use a vGPU mm object. If this
1988  * mm object hasn't been shadowed yet, the shadow will be populated at this
1989  * time.
1990  *
1991  * Returns:
1992  * Zero on success, negative error code if failed.
1993  */
1994 int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
1995 {
1996         int ret;
1997 
1998         atomic_inc(&mm->pincount);
1999 
2000         if (mm->type == INTEL_GVT_MM_PPGTT) {
2001                 ret = shadow_ppgtt_mm(mm);
2002                 if (ret)
2003                         return ret;
2004 
2005                 mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
2006                 list_move_tail(&mm->ppgtt_mm.lru_list,
2007                                &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
2008                 mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
2009         }
2010 
2011         return 0;
2012 }
2013 
2014 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
2015 {
2016         struct intel_vgpu_mm *mm;
2017         struct list_head *pos, *n;
2018 
2019         mutex_lock(&gvt->gtt.ppgtt_mm_lock);
2020 
2021         list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
2022                 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
2023 
2024                 if (atomic_read(&mm->pincount))
2025                         continue;
2026 
2027                 list_del_init(&mm->ppgtt_mm.lru_list);
2028                 mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
2029                 invalidate_ppgtt_mm(mm);
2030                 return 1;
2031         }
2032         mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
2033         return 0;
2034 }
2035 
2036 /*
2037  * GMA translation APIs.
2038  */
2039 static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
2040                 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
2041 {
2042         struct intel_vgpu *vgpu = mm->vgpu;
2043         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
2044         struct intel_vgpu_ppgtt_spt *s;
2045 
2046         s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
2047         if (!s)
2048                 return -ENXIO;
2049 
2050         if (!guest)
2051                 ppgtt_get_shadow_entry(s, e, index);
2052         else
2053                 ppgtt_get_guest_entry(s, e, index);
2054         return 0;
2055 }
2056 
2057 /**
2058  * intel_vgpu_gma_to_gpa - translate a gma to GPA
2059  * @mm: mm object. could be a PPGTT or GGTT mm object
2060  * @gma: graphics memory address in this mm object
2061  *
2062  * This function is used to translate a graphics memory address in specific
2063  * graphics memory space to guest physical address.
2064  *
2065  * Returns:
2066  * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
2067  */
2068 unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
2069 {
2070         struct intel_vgpu *vgpu = mm->vgpu;
2071         struct intel_gvt *gvt = vgpu->gvt;
2072         struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
2073         struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
2074         unsigned long gpa = INTEL_GVT_INVALID_ADDR;
2075         unsigned long gma_index[4];
2076         struct intel_gvt_gtt_entry e;
2077         int i, levels = 0;
2078         int ret;
2079 
2080         GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
2081                    mm->type != INTEL_GVT_MM_PPGTT);
2082 
2083         if (mm->type == INTEL_GVT_MM_GGTT) {
2084                 if (!vgpu_gmadr_is_valid(vgpu, gma))
2085                         goto err;
2086 
2087                 ggtt_get_guest_entry(mm, &e,
2088                         gma_ops->gma_to_ggtt_pte_index(gma));
2089 
2090                 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
2091                         + (gma & ~I915_GTT_PAGE_MASK);
2092 
2093                 trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
2094         } else {
2095                 switch (mm->ppgtt_mm.root_entry_type) {
2096                 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2097                         ppgtt_get_shadow_root_entry(mm, &e, 0);
2098 
2099                         gma_index[0] = gma_ops->gma_to_pml4_index(gma);
2100                         gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
2101                         gma_index[2] = gma_ops->gma_to_pde_index(gma);
2102                         gma_index[3] = gma_ops->gma_to_pte_index(gma);
2103                         levels = 4;
2104                         break;
2105                 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2106                         ppgtt_get_shadow_root_entry(mm, &e,
2107                                         gma_ops->gma_to_l3_pdp_index(gma));
2108 
2109                         gma_index[0] = gma_ops->gma_to_pde_index(gma);
2110                         gma_index[1] = gma_ops->gma_to_pte_index(gma);
2111                         levels = 2;
2112                         break;
2113                 default:
2114                         GEM_BUG_ON(1);
2115                 }
2116 
2117                 /* walk the shadow page table and get gpa from guest entry */
2118                 for (i = 0; i < levels; i++) {
2119                         ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
2120                                 (i == levels - 1));
2121                         if (ret)
2122                                 goto err;
2123 
2124                         if (!pte_ops->test_present(&e)) {
2125                                 gvt_dbg_core("GMA 0x%lx is not present\n", gma);
2126                                 goto err;
2127                         }
2128                 }
2129 
2130                 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
2131                                         (gma & ~I915_GTT_PAGE_MASK);
2132                 trace_gma_translate(vgpu->id, "ppgtt", 0,
2133                                     mm->ppgtt_mm.root_entry_type, gma, gpa);
2134         }
2135 
2136         return gpa;
2137 err:
2138         gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
2139         return INTEL_GVT_INVALID_ADDR;
2140 }
2141 
2142 static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
2143         unsigned int off, void *p_data, unsigned int bytes)
2144 {
2145         struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2146         const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2147         unsigned long index = off >> info->gtt_entry_size_shift;
2148         unsigned long gma;
2149         struct intel_gvt_gtt_entry e;
2150 
2151         if (bytes != 4 && bytes != 8)
2152                 return -EINVAL;
2153 
2154         gma = index << I915_GTT_PAGE_SHIFT;
2155         if (!intel_gvt_ggtt_validate_range(vgpu,
2156                                            gma, 1 << I915_GTT_PAGE_SHIFT)) {
2157                 gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma);
2158                 memset(p_data, 0, bytes);
2159                 return 0;
2160         }
2161 
2162         ggtt_get_guest_entry(ggtt_mm, &e, index);
2163         memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
2164                         bytes);
2165         return 0;
2166 }
2167 
2168 /**
2169  * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read
2170  * @vgpu: a vGPU
2171  * @off: register offset
2172  * @p_data: data will be returned to guest
2173  * @bytes: data length
2174  *
2175  * This function is used to emulate the GTT MMIO register read
2176  *
2177  * Returns:
2178  * Zero on success, error code if failed.
2179  */
2180 int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
2181         void *p_data, unsigned int bytes)
2182 {
2183         const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2184         int ret;
2185 
2186         if (bytes != 4 && bytes != 8)
2187                 return -EINVAL;
2188 
2189         off -= info->gtt_start_offset;
2190         ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
2191         return ret;
2192 }
2193 
2194 static void ggtt_invalidate_pte(struct intel_vgpu *vgpu,
2195                 struct intel_gvt_gtt_entry *entry)
2196 {
2197         struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2198         unsigned long pfn;
2199 
2200         pfn = pte_ops->get_pfn(entry);
2201         if (pfn != vgpu->gvt->gtt.scratch_mfn)
2202                 intel_gvt_hypervisor_dma_unmap_guest_page(vgpu,
2203                                                 pfn << PAGE_SHIFT);
2204 }
2205 
2206 static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
2207         void *p_data, unsigned int bytes)
2208 {
2209         struct intel_gvt *gvt = vgpu->gvt;
2210         const struct intel_gvt_device_info *info = &gvt->device_info;
2211         struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2212         struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
2213         unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
2214         unsigned long gma, gfn;
2215         struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2216         struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2217         dma_addr_t dma_addr;
2218         int ret;
2219         struct intel_gvt_partial_pte *partial_pte, *pos, *n;
2220         bool partial_update = false;
2221 
2222         if (bytes != 4 && bytes != 8)
2223                 return -EINVAL;
2224 
2225         gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
2226 
2227         /* the VM may configure the whole GM space when ballooning is used */
2228         if (!vgpu_gmadr_is_valid(vgpu, gma))
2229                 return 0;
2230 
2231         e.type = GTT_TYPE_GGTT_PTE;
2232         memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
2233                         bytes);
2234 
2235         /* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
2236          * write, save the first 4 bytes in a list and update virtual
2237          * PTE. Only update shadow PTE when the second 4 bytes comes.
2238          */
2239         if (bytes < info->gtt_entry_size) {
2240                 bool found = false;
2241 
2242                 list_for_each_entry_safe(pos, n,
2243                                 &ggtt_mm->ggtt_mm.partial_pte_list, list) {
2244                         if (g_gtt_index == pos->offset >>
2245                                         info->gtt_entry_size_shift) {
2246                                 if (off != pos->offset) {
2247                                         /* the second partial part*/
2248                                         int last_off = pos->offset &
2249                                                 (info->gtt_entry_size - 1);
2250 
2251                                         memcpy((void *)&e.val64 + last_off,
2252                                                 (void *)&pos->data + last_off,
2253                                                 bytes);
2254 
2255                                         list_del(&pos->list);
2256                                         kfree(pos);
2257                                         found = true;
2258                                         break;
2259                                 }
2260 
2261                                 /* update of the first partial part */
2262                                 pos->data = e.val64;
2263                                 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2264                                 return 0;
2265                         }
2266                 }
2267 
2268                 if (!found) {
2269                         /* the first partial part */
2270                         partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL);
2271                         if (!partial_pte)
2272                                 return -ENOMEM;
2273                         partial_pte->offset = off;
2274                         partial_pte->data = e.val64;
2275                         list_add_tail(&partial_pte->list,
2276                                 &ggtt_mm->ggtt_mm.partial_pte_list);
2277                         partial_update = true;
2278                 }
2279         }
2280 
2281         if (!partial_update && (ops->test_present(&e))) {
2282                 gfn = ops->get_pfn(&e);
2283                 m.val64 = e.val64;
2284                 m.type = e.type;
2285 
2286                 /* one PTE update may be issued in multiple writes and the
2287                  * first write may not construct a valid gfn
2288                  */
2289                 if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
2290                         ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2291                         goto out;
2292                 }
2293 
2294                 ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn,
2295                                                         PAGE_SIZE, &dma_addr);
2296                 if (ret) {
2297                         gvt_vgpu_err("fail to populate guest ggtt entry\n");
2298                         /* guest driver may read/write the entry when partial
2299                          * update the entry in this situation p2m will fail
2300                          * settting the shadow entry to point to a scratch page
2301                          */
2302                         ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2303                 } else
2304                         ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
2305         } else {
2306                 ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2307                 ops->clear_present(&m);
2308         }
2309 
2310 out:
2311         ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2312 
2313         ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
2314         ggtt_invalidate_pte(vgpu, &e);
2315 
2316         ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
2317         ggtt_invalidate(gvt->dev_priv);
2318         return 0;
2319 }
2320 
2321 /*
2322  * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
2323  * @vgpu: a vGPU
2324  * @off: register offset
2325  * @p_data: data from guest write
2326  * @bytes: data length
2327  *
2328  * This function is used to emulate the GTT MMIO register write
2329  *
2330  * Returns:
2331  * Zero on success, error code if failed.
2332  */
2333 int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
2334                 unsigned int off, void *p_data, unsigned int bytes)
2335 {
2336         const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2337         int ret;
2338 
2339         if (bytes != 4 && bytes != 8)
2340                 return -EINVAL;
2341 
2342         off -= info->gtt_start_offset;
2343         ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
2344         return ret;
2345 }
2346 
2347 static int alloc_scratch_pages(struct intel_vgpu *vgpu,
2348                 enum intel_gvt_gtt_type type)
2349 {
2350         struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2351         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
2352         int page_entry_num = I915_GTT_PAGE_SIZE >>
2353                                 vgpu->gvt->device_info.gtt_entry_size_shift;
2354         void *scratch_pt;
2355         int i;
2356         struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
2357         dma_addr_t daddr;
2358 
2359         if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
2360                 return -EINVAL;
2361 
2362         scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
2363         if (!scratch_pt) {
2364                 gvt_vgpu_err("fail to allocate scratch page\n");
2365                 return -ENOMEM;
2366         }
2367 
2368         daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0,
2369                         4096, PCI_DMA_BIDIRECTIONAL);
2370         if (dma_mapping_error(dev, daddr)) {
2371                 gvt_vgpu_err("fail to dmamap scratch_pt\n");
2372                 __free_page(virt_to_page(scratch_pt));
2373                 return -ENOMEM;
2374         }
2375         gtt->scratch_pt[type].page_mfn =
2376                 (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2377         gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
2378         gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
2379                         vgpu->id, type, gtt->scratch_pt[type].page_mfn);
2380 
2381         /* Build the tree by full filled the scratch pt with the entries which
2382          * point to the next level scratch pt or scratch page. The
2383          * scratch_pt[type] indicate the scratch pt/scratch page used by the
2384          * 'type' pt.
2385          * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
2386          * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
2387          * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
2388          */
2389         if (type > GTT_TYPE_PPGTT_PTE_PT) {
2390                 struct intel_gvt_gtt_entry se;
2391 
2392                 memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
2393                 se.type = get_entry_type(type - 1);
2394                 ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
2395 
2396                 /* The entry parameters like present/writeable/cache type
2397                  * set to the same as i915's scratch page tree.
2398                  */
2399                 se.val64 |= _PAGE_PRESENT | _PAGE_RW;
2400                 if (type == GTT_TYPE_PPGTT_PDE_PT)
2401                         se.val64 |= PPAT_CACHED;
2402 
2403                 for (i = 0; i < page_entry_num; i++)
2404                         ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
2405         }
2406 
2407         return 0;
2408 }
2409 
2410 static int release_scratch_page_tree(struct intel_vgpu *vgpu)
2411 {
2412         int i;
2413         struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
2414         dma_addr_t daddr;
2415 
2416         for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2417                 if (vgpu->gtt.scratch_pt[i].page != NULL) {
2418                         daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
2419                                         I915_GTT_PAGE_SHIFT);
2420                         dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2421                         __free_page(vgpu->gtt.scratch_pt[i].page);
2422                         vgpu->gtt.scratch_pt[i].page = NULL;
2423                         vgpu->gtt.scratch_pt[i].page_mfn = 0;
2424                 }
2425         }
2426 
2427         return 0;
2428 }
2429 
2430 static int create_scratch_page_tree(struct intel_vgpu *vgpu)
2431 {
2432         int i, ret;
2433 
2434         for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2435                 ret = alloc_scratch_pages(vgpu, i);
2436                 if (ret)
2437                         goto err;
2438         }
2439 
2440         return 0;
2441 
2442 err:
2443         release_scratch_page_tree(vgpu);
2444         return ret;
2445 }
2446 
2447 /**
2448  * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
2449  * @vgpu: a vGPU
2450  *
2451  * This function is used to initialize per-vGPU graphics memory virtualization
2452  * components.
2453  *
2454  * Returns:
2455  * Zero on success, error code if failed.
2456  */
2457 int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
2458 {
2459         struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2460 
2461         INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
2462 
2463         INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
2464         INIT_LIST_HEAD(&gtt->oos_page_list_head);
2465         INIT_LIST_HEAD(&gtt->post_shadow_list_head);
2466 
2467         gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
2468         if (IS_ERR(gtt->ggtt_mm)) {
2469                 gvt_vgpu_err("fail to create mm for ggtt.\n");
2470                 return PTR_ERR(gtt->ggtt_mm);
2471         }
2472 
2473         intel_vgpu_reset_ggtt(vgpu, false);
2474 
2475         INIT_LIST_HEAD(&gtt->ggtt_mm->ggtt_mm.partial_pte_list);
2476 
2477         return create_scratch_page_tree(vgpu);
2478 }
2479 
2480 static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
2481 {
2482         struct list_head *pos, *n;
2483         struct intel_vgpu_mm *mm;
2484 
2485         list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2486                 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2487                 intel_vgpu_destroy_mm(mm);
2488         }
2489 
2490         if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
2491                 gvt_err("vgpu ppgtt mm is not fully destroyed\n");
2492 
2493         if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
2494                 gvt_err("Why we still has spt not freed?\n");
2495                 ppgtt_free_all_spt(vgpu);
2496         }
2497 }
2498 
2499 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
2500 {
2501         struct intel_gvt_partial_pte *pos, *next;
2502 
2503         list_for_each_entry_safe(pos, next,
2504                                  &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list,
2505                                  list) {
2506                 gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
2507                         pos->offset, pos->data);
2508                 kfree(pos);
2509         }
2510         intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
2511         vgpu->gtt.ggtt_mm = NULL;
2512 }
2513 
2514 /**
2515  * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
2516  * @vgpu: a vGPU
2517  *
2518  * This function is used to clean up per-vGPU graphics memory virtualization
2519  * components.
2520  *
2521  * Returns:
2522  * Zero on success, error code if failed.
2523  */
2524 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
2525 {
2526         intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2527         intel_vgpu_destroy_ggtt_mm(vgpu);
2528         release_scratch_page_tree(vgpu);
2529 }
2530 
2531 static void clean_spt_oos(struct intel_gvt *gvt)
2532 {
2533         struct intel_gvt_gtt *gtt = &gvt->gtt;
2534         struct list_head *pos, *n;
2535         struct intel_vgpu_oos_page *oos_page;
2536 
2537         WARN(!list_empty(&gtt->oos_page_use_list_head),
2538                 "someone is still using oos page\n");
2539 
2540         list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
2541                 oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
2542                 list_del(&oos_page->list);
2543                 free_page((unsigned long)oos_page->mem);
2544                 kfree(oos_page);
2545         }
2546 }
2547 
2548 static int setup_spt_oos(struct intel_gvt *gvt)
2549 {
2550         struct intel_gvt_gtt *gtt = &gvt->gtt;
2551         struct intel_vgpu_oos_page *oos_page;
2552         int i;
2553         int ret;
2554 
2555         INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
2556         INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
2557 
2558         for (i = 0; i < preallocated_oos_pages; i++) {
2559                 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
2560                 if (!oos_page) {
2561                         ret = -ENOMEM;
2562                         goto fail;
2563                 }
2564                 oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0);
2565                 if (!oos_page->mem) {
2566                         ret = -ENOMEM;
2567                         kfree(oos_page);
2568                         goto fail;
2569                 }
2570 
2571                 INIT_LIST_HEAD(&oos_page->list);
2572                 INIT_LIST_HEAD(&oos_page->vm_list);
2573                 oos_page->id = i;
2574                 list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
2575         }
2576 
2577         gvt_dbg_mm("%d oos pages preallocated\n", i);
2578 
2579         return 0;
2580 fail:
2581         clean_spt_oos(gvt);
2582         return ret;
2583 }
2584 
2585 /**
2586  * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
2587  * @vgpu: a vGPU
2588  * @pdps: pdp root array
2589  *
2590  * This function is used to find a PPGTT mm object from mm object pool
2591  *
2592  * Returns:
2593  * pointer to mm object on success, NULL if failed.
2594  */
2595 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
2596                 u64 pdps[])
2597 {
2598         struct intel_vgpu_mm *mm;
2599         struct list_head *pos;
2600 
2601         list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
2602                 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2603 
2604                 switch (mm->ppgtt_mm.root_entry_type) {
2605                 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2606                         if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
2607                                 return mm;
2608                         break;
2609                 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2610                         if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
2611                                     sizeof(mm->ppgtt_mm.guest_pdps)))
2612                                 return mm;
2613                         break;
2614                 default:
2615                         GEM_BUG_ON(1);
2616                 }
2617         }
2618         return NULL;
2619 }
2620 
2621 /**
2622  * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
2623  * @vgpu: a vGPU
2624  * @root_entry_type: ppgtt root entry type
2625  * @pdps: guest pdps
2626  *
2627  * This function is used to find or create a PPGTT mm object from a guest.
2628  *
2629  * Returns:
2630  * Zero on success, negative error code if failed.
2631  */
2632 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
2633                 enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
2634 {
2635         struct intel_vgpu_mm *mm;
2636 
2637         mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2638         if (mm) {
2639                 intel_vgpu_mm_get(mm);
2640         } else {
2641                 mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
2642                 if (IS_ERR(mm))
2643                         gvt_vgpu_err("fail to create mm\n");
2644         }
2645         return mm;
2646 }
2647 
2648 /**
2649  * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
2650  * @vgpu: a vGPU
2651  * @pdps: guest pdps
2652  *
2653  * This function is used to find a PPGTT mm object from a guest and destroy it.
2654  *
2655  * Returns:
2656  * Zero on success, negative error code if failed.
2657  */
2658 int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
2659 {
2660         struct intel_vgpu_mm *mm;
2661 
2662         mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2663         if (!mm) {
2664                 gvt_vgpu_err("fail to find ppgtt instance.\n");
2665                 return -EINVAL;
2666         }
2667         intel_vgpu_mm_put(mm);
2668         return 0;
2669 }
2670 
2671 /**
2672  * intel_gvt_init_gtt - initialize mm components of a GVT device
2673  * @gvt: GVT device
2674  *
2675  * This function is called at the initialization stage, to initialize
2676  * the mm components of a GVT device.
2677  *
2678  * Returns:
2679  * zero on success, negative error code if failed.
2680  */
2681 int intel_gvt_init_gtt(struct intel_gvt *gvt)
2682 {
2683         int ret;
2684         void *page;
2685         struct device *dev = &gvt->dev_priv->drm.pdev->dev;
2686         dma_addr_t daddr;
2687 
2688         gvt_dbg_core("init gtt\n");
2689 
2690         gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
2691         gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
2692 
2693         page = (void *)get_zeroed_page(GFP_KERNEL);
2694         if (!page) {
2695                 gvt_err("fail to allocate scratch ggtt page\n");
2696                 return -ENOMEM;
2697         }
2698 
2699         daddr = dma_map_page(dev, virt_to_page(page), 0,
2700                         4096, PCI_DMA_BIDIRECTIONAL);
2701         if (dma_mapping_error(dev, daddr)) {
2702                 gvt_err("fail to dmamap scratch ggtt page\n");
2703                 __free_page(virt_to_page(page));
2704                 return -ENOMEM;
2705         }
2706 
2707         gvt->gtt.scratch_page = virt_to_page(page);
2708         gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2709 
2710         if (enable_out_of_sync) {
2711                 ret = setup_spt_oos(gvt);
2712                 if (ret) {
2713                         gvt_err("fail to initialize SPT oos\n");
2714                         dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2715                         __free_page(gvt->gtt.scratch_page);
2716                         return ret;
2717                 }
2718         }
2719         INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
2720         mutex_init(&gvt->gtt.ppgtt_mm_lock);
2721         return 0;
2722 }
2723 
2724 /**
2725  * intel_gvt_clean_gtt - clean up mm components of a GVT device
2726  * @gvt: GVT device
2727  *
2728  * This function is called at the driver unloading stage, to clean up the
2729  * the mm components of a GVT device.
2730  *
2731  */
2732 void intel_gvt_clean_gtt(struct intel_gvt *gvt)
2733 {
2734         struct device *dev = &gvt->dev_priv->drm.pdev->dev;
2735         dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
2736                                         I915_GTT_PAGE_SHIFT);
2737 
2738         dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2739 
2740         __free_page(gvt->gtt.scratch_page);
2741 
2742         if (enable_out_of_sync)
2743                 clean_spt_oos(gvt);
2744 }
2745 
2746 /**
2747  * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances
2748  * @vgpu: a vGPU
2749  *
2750  * This function is called when invalidate all PPGTT instances of a vGPU.
2751  *
2752  */
2753 void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
2754 {
2755         struct list_head *pos, *n;
2756         struct intel_vgpu_mm *mm;
2757 
2758         list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2759                 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2760                 if (mm->type == INTEL_GVT_MM_PPGTT) {
2761                         mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2762                         list_del_init(&mm->ppgtt_mm.lru_list);
2763                         mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2764                         if (mm->ppgtt_mm.shadowed)
2765                                 invalidate_ppgtt_mm(mm);
2766                 }
2767         }
2768 }
2769 
2770 /**
2771  * intel_vgpu_reset_ggtt - reset the GGTT entry
2772  * @vgpu: a vGPU
2773  * @invalidate_old: invalidate old entries
2774  *
2775  * This function is called at the vGPU create stage
2776  * to reset all the GGTT entries.
2777  *
2778  */
2779 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
2780 {
2781         struct intel_gvt *gvt = vgpu->gvt;
2782         struct drm_i915_private *dev_priv = gvt->dev_priv;
2783         struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2784         struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
2785         struct intel_gvt_gtt_entry old_entry;
2786         u32 index;
2787         u32 num_entries;
2788 
2789         pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
2790         pte_ops->set_present(&entry);
2791 
2792         index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2793         num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2794         while (num_entries--) {
2795                 if (invalidate_old) {
2796                         ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2797                         ggtt_invalidate_pte(vgpu, &old_entry);
2798                 }
2799                 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2800         }
2801 
2802         index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2803         num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2804         while (num_entries--) {
2805                 if (invalidate_old) {
2806                         ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2807                         ggtt_invalidate_pte(vgpu, &old_entry);
2808                 }
2809                 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2810         }
2811 
2812         ggtt_invalidate(dev_priv);
2813 }
2814 
2815 /**
2816  * intel_vgpu_reset_gtt - reset the all GTT related status
2817  * @vgpu: a vGPU
2818  *
2819  * This function is called from vfio core to reset reset all
2820  * GTT related status, including GGTT, PPGTT, scratch page.
2821  *
2822  */
2823 void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
2824 {
2825         /* Shadow pages are only created when there is no page
2826          * table tracking data, so remove page tracking data after
2827          * removing the shadow pages.
2828          */
2829         intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2830         intel_vgpu_reset_ggtt(vgpu, true);
2831 }

/* [<][>][^][v][top][bottom][index][help] */