1/* 2 * Xen implementation for transcendent memory (tmem) 3 * 4 * Copyright (C) 2009-2011 Oracle Corp. All rights reserved. 5 * Author: Dan Magenheimer 6 */ 7 8#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt 9 10#include <linux/module.h> 11#include <linux/kernel.h> 12#include <linux/types.h> 13#include <linux/init.h> 14#include <linux/pagemap.h> 15#include <linux/cleancache.h> 16#include <linux/frontswap.h> 17 18#include <xen/xen.h> 19#include <xen/interface/xen.h> 20#include <asm/xen/hypercall.h> 21#include <asm/xen/page.h> 22#include <asm/xen/hypervisor.h> 23#include <xen/tmem.h> 24 25#ifndef CONFIG_XEN_TMEM_MODULE 26bool __read_mostly tmem_enabled = false; 27 28static int __init enable_tmem(char *s) 29{ 30 tmem_enabled = true; 31 return 1; 32} 33__setup("tmem", enable_tmem); 34#endif 35 36#ifdef CONFIG_CLEANCACHE 37static bool cleancache __read_mostly = true; 38module_param(cleancache, bool, S_IRUGO); 39static bool selfballooning __read_mostly = true; 40module_param(selfballooning, bool, S_IRUGO); 41#endif /* CONFIG_CLEANCACHE */ 42 43#ifdef CONFIG_FRONTSWAP 44static bool frontswap __read_mostly = true; 45module_param(frontswap, bool, S_IRUGO); 46#else /* CONFIG_FRONTSWAP */ 47#define frontswap (0) 48#endif /* CONFIG_FRONTSWAP */ 49 50#ifdef CONFIG_XEN_SELFBALLOONING 51static bool selfshrinking __read_mostly = true; 52module_param(selfshrinking, bool, S_IRUGO); 53#endif /* CONFIG_XEN_SELFBALLOONING */ 54 55#define TMEM_CONTROL 0 56#define TMEM_NEW_POOL 1 57#define TMEM_DESTROY_POOL 2 58#define TMEM_NEW_PAGE 3 59#define TMEM_PUT_PAGE 4 60#define TMEM_GET_PAGE 5 61#define TMEM_FLUSH_PAGE 6 62#define TMEM_FLUSH_OBJECT 7 63#define TMEM_READ 8 64#define TMEM_WRITE 9 65#define TMEM_XCHG 10 66 67/* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ 68#define TMEM_POOL_PERSIST 1 69#define TMEM_POOL_SHARED 2 70#define TMEM_POOL_PAGESIZE_SHIFT 4 71#define TMEM_VERSION_SHIFT 24 72 73 74struct tmem_pool_uuid { 75 u64 uuid_lo; 76 u64 uuid_hi; 77}; 78 79struct tmem_oid { 80 u64 oid[3]; 81}; 82 83#define TMEM_POOL_PRIVATE_UUID { 0, 0 } 84 85/* flags for tmem_ops.new_pool */ 86#define TMEM_POOL_PERSIST 1 87#define TMEM_POOL_SHARED 2 88 89/* xen tmem foundation ops/hypercalls */ 90 91static inline int xen_tmem_op(u32 tmem_cmd, u32 tmem_pool, struct tmem_oid oid, 92 u32 index, unsigned long gmfn, u32 tmem_offset, u32 pfn_offset, u32 len) 93{ 94 struct tmem_op op; 95 int rc = 0; 96 97 op.cmd = tmem_cmd; 98 op.pool_id = tmem_pool; 99 op.u.gen.oid[0] = oid.oid[0]; 100 op.u.gen.oid[1] = oid.oid[1]; 101 op.u.gen.oid[2] = oid.oid[2]; 102 op.u.gen.index = index; 103 op.u.gen.tmem_offset = tmem_offset; 104 op.u.gen.pfn_offset = pfn_offset; 105 op.u.gen.len = len; 106 set_xen_guest_handle(op.u.gen.gmfn, (void *)gmfn); 107 rc = HYPERVISOR_tmem_op(&op); 108 return rc; 109} 110 111static int xen_tmem_new_pool(struct tmem_pool_uuid uuid, 112 u32 flags, unsigned long pagesize) 113{ 114 struct tmem_op op; 115 int rc = 0, pageshift; 116 117 for (pageshift = 0; pagesize != 1; pageshift++) 118 pagesize >>= 1; 119 flags |= (pageshift - 12) << TMEM_POOL_PAGESIZE_SHIFT; 120 flags |= TMEM_SPEC_VERSION << TMEM_VERSION_SHIFT; 121 op.cmd = TMEM_NEW_POOL; 122 op.u.new.uuid[0] = uuid.uuid_lo; 123 op.u.new.uuid[1] = uuid.uuid_hi; 124 op.u.new.flags = flags; 125 rc = HYPERVISOR_tmem_op(&op); 126 return rc; 127} 128 129/* xen generic tmem ops */ 130 131static int xen_tmem_put_page(u32 pool_id, struct tmem_oid oid, 132 u32 index, unsigned long pfn) 133{ 134 unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn; 135 136 return xen_tmem_op(TMEM_PUT_PAGE, pool_id, oid, index, 137 gmfn, 0, 0, 0); 138} 139 140static int xen_tmem_get_page(u32 pool_id, struct tmem_oid oid, 141 u32 index, unsigned long pfn) 142{ 143 unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn; 144 145 return xen_tmem_op(TMEM_GET_PAGE, pool_id, oid, index, 146 gmfn, 0, 0, 0); 147} 148 149static int xen_tmem_flush_page(u32 pool_id, struct tmem_oid oid, u32 index) 150{ 151 return xen_tmem_op(TMEM_FLUSH_PAGE, pool_id, oid, index, 152 0, 0, 0, 0); 153} 154 155static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) 156{ 157 return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); 158} 159 160 161#ifdef CONFIG_CLEANCACHE 162static int xen_tmem_destroy_pool(u32 pool_id) 163{ 164 struct tmem_oid oid = { { 0 } }; 165 166 return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); 167} 168 169/* cleancache ops */ 170 171static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, 172 pgoff_t index, struct page *page) 173{ 174 u32 ind = (u32) index; 175 struct tmem_oid oid = *(struct tmem_oid *)&key; 176 unsigned long pfn = page_to_pfn(page); 177 178 if (pool < 0) 179 return; 180 if (ind != index) 181 return; 182 mb(); /* ensure page is quiescent; tmem may address it with an alias */ 183 (void)xen_tmem_put_page((u32)pool, oid, ind, pfn); 184} 185 186static int tmem_cleancache_get_page(int pool, struct cleancache_filekey key, 187 pgoff_t index, struct page *page) 188{ 189 u32 ind = (u32) index; 190 struct tmem_oid oid = *(struct tmem_oid *)&key; 191 unsigned long pfn = page_to_pfn(page); 192 int ret; 193 194 /* translate return values to linux semantics */ 195 if (pool < 0) 196 return -1; 197 if (ind != index) 198 return -1; 199 ret = xen_tmem_get_page((u32)pool, oid, ind, pfn); 200 if (ret == 1) 201 return 0; 202 else 203 return -1; 204} 205 206static void tmem_cleancache_flush_page(int pool, struct cleancache_filekey key, 207 pgoff_t index) 208{ 209 u32 ind = (u32) index; 210 struct tmem_oid oid = *(struct tmem_oid *)&key; 211 212 if (pool < 0) 213 return; 214 if (ind != index) 215 return; 216 (void)xen_tmem_flush_page((u32)pool, oid, ind); 217} 218 219static void tmem_cleancache_flush_inode(int pool, struct cleancache_filekey key) 220{ 221 struct tmem_oid oid = *(struct tmem_oid *)&key; 222 223 if (pool < 0) 224 return; 225 (void)xen_tmem_flush_object((u32)pool, oid); 226} 227 228static void tmem_cleancache_flush_fs(int pool) 229{ 230 if (pool < 0) 231 return; 232 (void)xen_tmem_destroy_pool((u32)pool); 233} 234 235static int tmem_cleancache_init_fs(size_t pagesize) 236{ 237 struct tmem_pool_uuid uuid_private = TMEM_POOL_PRIVATE_UUID; 238 239 return xen_tmem_new_pool(uuid_private, 0, pagesize); 240} 241 242static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize) 243{ 244 struct tmem_pool_uuid shared_uuid; 245 246 shared_uuid.uuid_lo = *(u64 *)uuid; 247 shared_uuid.uuid_hi = *(u64 *)(&uuid[8]); 248 return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize); 249} 250 251static struct cleancache_ops tmem_cleancache_ops = { 252 .put_page = tmem_cleancache_put_page, 253 .get_page = tmem_cleancache_get_page, 254 .invalidate_page = tmem_cleancache_flush_page, 255 .invalidate_inode = tmem_cleancache_flush_inode, 256 .invalidate_fs = tmem_cleancache_flush_fs, 257 .init_shared_fs = tmem_cleancache_init_shared_fs, 258 .init_fs = tmem_cleancache_init_fs 259}; 260#endif 261 262#ifdef CONFIG_FRONTSWAP 263/* frontswap tmem operations */ 264 265/* a single tmem poolid is used for all frontswap "types" (swapfiles) */ 266static int tmem_frontswap_poolid; 267 268/* 269 * Swizzling increases objects per swaptype, increasing tmem concurrency 270 * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS 271 */ 272#define SWIZ_BITS 4 273#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) 274#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) 275#define iswiz(_ind) (_ind >> SWIZ_BITS) 276 277static inline struct tmem_oid oswiz(unsigned type, u32 ind) 278{ 279 struct tmem_oid oid = { .oid = { 0 } }; 280 oid.oid[0] = _oswiz(type, ind); 281 return oid; 282} 283 284/* returns 0 if the page was successfully put into frontswap, -1 if not */ 285static int tmem_frontswap_store(unsigned type, pgoff_t offset, 286 struct page *page) 287{ 288 u64 ind64 = (u64)offset; 289 u32 ind = (u32)offset; 290 unsigned long pfn = page_to_pfn(page); 291 int pool = tmem_frontswap_poolid; 292 int ret; 293 294 if (pool < 0) 295 return -1; 296 if (ind64 != ind) 297 return -1; 298 mb(); /* ensure page is quiescent; tmem may address it with an alias */ 299 ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn); 300 /* translate Xen tmem return values to linux semantics */ 301 if (ret == 1) 302 return 0; 303 else 304 return -1; 305} 306 307/* 308 * returns 0 if the page was successfully gotten from frontswap, -1 if 309 * was not present (should never happen!) 310 */ 311static int tmem_frontswap_load(unsigned type, pgoff_t offset, 312 struct page *page) 313{ 314 u64 ind64 = (u64)offset; 315 u32 ind = (u32)offset; 316 unsigned long pfn = page_to_pfn(page); 317 int pool = tmem_frontswap_poolid; 318 int ret; 319 320 if (pool < 0) 321 return -1; 322 if (ind64 != ind) 323 return -1; 324 ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn); 325 /* translate Xen tmem return values to linux semantics */ 326 if (ret == 1) 327 return 0; 328 else 329 return -1; 330} 331 332/* flush a single page from frontswap */ 333static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset) 334{ 335 u64 ind64 = (u64)offset; 336 u32 ind = (u32)offset; 337 int pool = tmem_frontswap_poolid; 338 339 if (pool < 0) 340 return; 341 if (ind64 != ind) 342 return; 343 (void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind)); 344} 345 346/* flush all pages from the passed swaptype */ 347static void tmem_frontswap_flush_area(unsigned type) 348{ 349 int pool = tmem_frontswap_poolid; 350 int ind; 351 352 if (pool < 0) 353 return; 354 for (ind = SWIZ_MASK; ind >= 0; ind--) 355 (void)xen_tmem_flush_object(pool, oswiz(type, ind)); 356} 357 358static void tmem_frontswap_init(unsigned ignored) 359{ 360 struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID; 361 362 /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ 363 if (tmem_frontswap_poolid < 0) 364 tmem_frontswap_poolid = 365 xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE); 366} 367 368static struct frontswap_ops tmem_frontswap_ops = { 369 .store = tmem_frontswap_store, 370 .load = tmem_frontswap_load, 371 .invalidate_page = tmem_frontswap_flush_page, 372 .invalidate_area = tmem_frontswap_flush_area, 373 .init = tmem_frontswap_init 374}; 375#endif 376 377static int __init xen_tmem_init(void) 378{ 379 if (!xen_domain()) 380 return 0; 381#ifdef CONFIG_FRONTSWAP 382 if (tmem_enabled && frontswap) { 383 char *s = ""; 384 struct frontswap_ops *old_ops; 385 386 tmem_frontswap_poolid = -1; 387 old_ops = frontswap_register_ops(&tmem_frontswap_ops); 388 if (IS_ERR(old_ops) || old_ops) { 389 if (IS_ERR(old_ops)) 390 return PTR_ERR(old_ops); 391 s = " (WARNING: frontswap_ops overridden)"; 392 } 393 pr_info("frontswap enabled, RAM provided by Xen Transcendent Memory%s\n", 394 s); 395 } 396#endif 397#ifdef CONFIG_CLEANCACHE 398 BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); 399 if (tmem_enabled && cleancache) { 400 int err; 401 402 err = cleancache_register_ops(&tmem_cleancache_ops); 403 if (err) 404 pr_warn("xen-tmem: failed to enable cleancache: %d\n", 405 err); 406 else 407 pr_info("cleancache enabled, RAM provided by " 408 "Xen Transcendent Memory\n"); 409 } 410#endif 411#ifdef CONFIG_XEN_SELFBALLOONING 412 /* 413 * There is no point of driving pages to the swap system if they 414 * aren't going anywhere in tmem universe. 415 */ 416 if (!frontswap) { 417 selfshrinking = false; 418 selfballooning = false; 419 } 420 xen_selfballoon_init(selfballooning, selfshrinking); 421#endif 422 return 0; 423} 424 425module_init(xen_tmem_init) 426MODULE_LICENSE("GPL"); 427MODULE_AUTHOR("Dan Magenheimer <dan.magenheimer@oracle.com>"); 428MODULE_DESCRIPTION("Shim to Xen transcendent memory"); 429