1/*P:200 This contains all the /dev/lguest code, whereby the userspace 2 * launcher controls and communicates with the Guest. For example, 3 * the first write will tell us the Guest's memory layout and entry 4 * point. A read will run the Guest until something happens, such as 5 * a signal or the Guest accessing a device. 6:*/ 7#include <linux/uaccess.h> 8#include <linux/miscdevice.h> 9#include <linux/fs.h> 10#include <linux/sched.h> 11#include <linux/file.h> 12#include <linux/slab.h> 13#include <linux/export.h> 14#include "lg.h" 15 16/*L:052 17 The Launcher can get the registers, and also set some of them. 18*/ 19static int getreg_setup(struct lg_cpu *cpu, const unsigned long __user *input) 20{ 21 unsigned long which; 22 23 /* We re-use the ptrace structure to specify which register to read. */ 24 if (get_user(which, input) != 0) 25 return -EFAULT; 26 27 /* 28 * We set up the cpu register pointer, and their next read will 29 * actually get the value (instead of running the guest). 30 * 31 * The last argument 'true' says we can access any register. 32 */ 33 cpu->reg_read = lguest_arch_regptr(cpu, which, true); 34 if (!cpu->reg_read) 35 return -ENOENT; 36 37 /* And because this is a write() call, we return the length used. */ 38 return sizeof(unsigned long) * 2; 39} 40 41static int setreg(struct lg_cpu *cpu, const unsigned long __user *input) 42{ 43 unsigned long which, value, *reg; 44 45 /* We re-use the ptrace structure to specify which register to read. */ 46 if (get_user(which, input) != 0) 47 return -EFAULT; 48 input++; 49 if (get_user(value, input) != 0) 50 return -EFAULT; 51 52 /* The last argument 'false' means we can't access all registers. */ 53 reg = lguest_arch_regptr(cpu, which, false); 54 if (!reg) 55 return -ENOENT; 56 57 *reg = value; 58 59 /* And because this is a write() call, we return the length used. */ 60 return sizeof(unsigned long) * 3; 61} 62 63/*L:050 64 * Sending an interrupt is done by writing LHREQ_IRQ and an interrupt 65 * number to /dev/lguest. 66 */ 67static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) 68{ 69 unsigned long irq; 70 71 if (get_user(irq, input) != 0) 72 return -EFAULT; 73 if (irq >= LGUEST_IRQS) 74 return -EINVAL; 75 76 /* 77 * Next time the Guest runs, the core code will see if it can deliver 78 * this interrupt. 79 */ 80 set_interrupt(cpu, irq); 81 return 0; 82} 83 84/*L:053 85 * Deliver a trap: this is used by the Launcher if it can't emulate 86 * an instruction. 87 */ 88static int trap(struct lg_cpu *cpu, const unsigned long __user *input) 89{ 90 unsigned long trapnum; 91 92 if (get_user(trapnum, input) != 0) 93 return -EFAULT; 94 95 if (!deliver_trap(cpu, trapnum)) 96 return -EINVAL; 97 98 return 0; 99} 100 101/*L:040 102 * Once our Guest is initialized, the Launcher makes it run by reading 103 * from /dev/lguest. 104 */ 105static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) 106{ 107 struct lguest *lg = file->private_data; 108 struct lg_cpu *cpu; 109 unsigned int cpu_id = *o; 110 111 /* You must write LHREQ_INITIALIZE first! */ 112 if (!lg) 113 return -EINVAL; 114 115 /* Watch out for arbitrary vcpu indexes! */ 116 if (cpu_id >= lg->nr_cpus) 117 return -EINVAL; 118 119 cpu = &lg->cpus[cpu_id]; 120 121 /* If you're not the task which owns the Guest, go away. */ 122 if (current != cpu->tsk) 123 return -EPERM; 124 125 /* If the Guest is already dead, we indicate why */ 126 if (lg->dead) { 127 size_t len; 128 129 /* lg->dead either contains an error code, or a string. */ 130 if (IS_ERR(lg->dead)) 131 return PTR_ERR(lg->dead); 132 133 /* We can only return as much as the buffer they read with. */ 134 len = min(size, strlen(lg->dead)+1); 135 if (copy_to_user(user, lg->dead, len) != 0) 136 return -EFAULT; 137 return len; 138 } 139 140 /* 141 * If we returned from read() last time because the Guest sent I/O, 142 * clear the flag. 143 */ 144 if (cpu->pending.trap) 145 cpu->pending.trap = 0; 146 147 /* Run the Guest until something interesting happens. */ 148 return run_guest(cpu, (unsigned long __user *)user); 149} 150 151/*L:025 152 * This actually initializes a CPU. For the moment, a Guest is only 153 * uniprocessor, so "id" is always 0. 154 */ 155static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) 156{ 157 /* We have a limited number of CPUs in the lguest struct. */ 158 if (id >= ARRAY_SIZE(cpu->lg->cpus)) 159 return -EINVAL; 160 161 /* Set up this CPU's id, and pointer back to the lguest struct. */ 162 cpu->id = id; 163 cpu->lg = container_of(cpu, struct lguest, cpus[id]); 164 cpu->lg->nr_cpus++; 165 166 /* Each CPU has a timer it can set. */ 167 init_clockdev(cpu); 168 169 /* 170 * We need a complete page for the Guest registers: they are accessible 171 * to the Guest and we can only grant it access to whole pages. 172 */ 173 cpu->regs_page = get_zeroed_page(GFP_KERNEL); 174 if (!cpu->regs_page) 175 return -ENOMEM; 176 177 /* We actually put the registers at the end of the page. */ 178 cpu->regs = (void *)cpu->regs_page + PAGE_SIZE - sizeof(*cpu->regs); 179 180 /* 181 * Now we initialize the Guest's registers, handing it the start 182 * address. 183 */ 184 lguest_arch_setup_regs(cpu, start_ip); 185 186 /* 187 * We keep a pointer to the Launcher task (ie. current task) for when 188 * other Guests want to wake this one (eg. console input). 189 */ 190 cpu->tsk = current; 191 192 /* 193 * We need to keep a pointer to the Launcher's memory map, because if 194 * the Launcher dies we need to clean it up. If we don't keep a 195 * reference, it is destroyed before close() is called. 196 */ 197 cpu->mm = get_task_mm(cpu->tsk); 198 199 /* 200 * We remember which CPU's pages this Guest used last, for optimization 201 * when the same Guest runs on the same CPU twice. 202 */ 203 cpu->last_pages = NULL; 204 205 /* No error == success. */ 206 return 0; 207} 208 209/*L:020 210 * The initialization write supplies 3 pointer sized (32 or 64 bit) values (in 211 * addition to the LHREQ_INITIALIZE value). These are: 212 * 213 * base: The start of the Guest-physical memory inside the Launcher memory. 214 * 215 * pfnlimit: The highest (Guest-physical) page number the Guest should be 216 * allowed to access. The Guest memory lives inside the Launcher, so it sets 217 * this to ensure the Guest can only reach its own memory. 218 * 219 * start: The first instruction to execute ("eip" in x86-speak). 220 */ 221static int initialize(struct file *file, const unsigned long __user *input) 222{ 223 /* "struct lguest" contains all we (the Host) know about a Guest. */ 224 struct lguest *lg; 225 int err; 226 unsigned long args[4]; 227 228 /* 229 * We grab the Big Lguest lock, which protects against multiple 230 * simultaneous initializations. 231 */ 232 mutex_lock(&lguest_lock); 233 /* You can't initialize twice! Close the device and start again... */ 234 if (file->private_data) { 235 err = -EBUSY; 236 goto unlock; 237 } 238 239 if (copy_from_user(args, input, sizeof(args)) != 0) { 240 err = -EFAULT; 241 goto unlock; 242 } 243 244 lg = kzalloc(sizeof(*lg), GFP_KERNEL); 245 if (!lg) { 246 err = -ENOMEM; 247 goto unlock; 248 } 249 250 /* Populate the easy fields of our "struct lguest" */ 251 lg->mem_base = (void __user *)args[0]; 252 lg->pfn_limit = args[1]; 253 lg->device_limit = args[3]; 254 255 /* This is the first cpu (cpu 0) and it will start booting at args[2] */ 256 err = lg_cpu_start(&lg->cpus[0], 0, args[2]); 257 if (err) 258 goto free_lg; 259 260 /* 261 * Initialize the Guest's shadow page tables. This allocates 262 * memory, so can fail. 263 */ 264 err = init_guest_pagetable(lg); 265 if (err) 266 goto free_regs; 267 268 /* We keep our "struct lguest" in the file's private_data. */ 269 file->private_data = lg; 270 271 mutex_unlock(&lguest_lock); 272 273 /* And because this is a write() call, we return the length used. */ 274 return sizeof(args); 275 276free_regs: 277 /* FIXME: This should be in free_vcpu */ 278 free_page(lg->cpus[0].regs_page); 279free_lg: 280 kfree(lg); 281unlock: 282 mutex_unlock(&lguest_lock); 283 return err; 284} 285 286/*L:010 287 * The first operation the Launcher does must be a write. All writes 288 * start with an unsigned long number: for the first write this must be 289 * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use 290 * writes of other values to send interrupts or set up receipt of notifications. 291 * 292 * Note that we overload the "offset" in the /dev/lguest file to indicate what 293 * CPU number we're dealing with. Currently this is always 0 since we only 294 * support uniprocessor Guests, but you can see the beginnings of SMP support 295 * here. 296 */ 297static ssize_t write(struct file *file, const char __user *in, 298 size_t size, loff_t *off) 299{ 300 /* 301 * Once the Guest is initialized, we hold the "struct lguest" in the 302 * file private data. 303 */ 304 struct lguest *lg = file->private_data; 305 const unsigned long __user *input = (const unsigned long __user *)in; 306 unsigned long req; 307 struct lg_cpu *uninitialized_var(cpu); 308 unsigned int cpu_id = *off; 309 310 /* The first value tells us what this request is. */ 311 if (get_user(req, input) != 0) 312 return -EFAULT; 313 input++; 314 315 /* If you haven't initialized, you must do that first. */ 316 if (req != LHREQ_INITIALIZE) { 317 if (!lg || (cpu_id >= lg->nr_cpus)) 318 return -EINVAL; 319 cpu = &lg->cpus[cpu_id]; 320 321 /* Once the Guest is dead, you can only read() why it died. */ 322 if (lg->dead) 323 return -ENOENT; 324 } 325 326 switch (req) { 327 case LHREQ_INITIALIZE: 328 return initialize(file, input); 329 case LHREQ_IRQ: 330 return user_send_irq(cpu, input); 331 case LHREQ_GETREG: 332 return getreg_setup(cpu, input); 333 case LHREQ_SETREG: 334 return setreg(cpu, input); 335 case LHREQ_TRAP: 336 return trap(cpu, input); 337 default: 338 return -EINVAL; 339 } 340} 341 342static int open(struct inode *inode, struct file *file) 343{ 344 file->private_data = NULL; 345 346 return 0; 347} 348 349/*L:060 350 * The final piece of interface code is the close() routine. It reverses 351 * everything done in initialize(). This is usually called because the 352 * Launcher exited. 353 * 354 * Note that the close routine returns 0 or a negative error number: it can't 355 * really fail, but it can whine. I blame Sun for this wart, and K&R C for 356 * letting them do it. 357:*/ 358static int close(struct inode *inode, struct file *file) 359{ 360 struct lguest *lg = file->private_data; 361 unsigned int i; 362 363 /* If we never successfully initialized, there's nothing to clean up */ 364 if (!lg) 365 return 0; 366 367 /* 368 * We need the big lock, to protect from inter-guest I/O and other 369 * Launchers initializing guests. 370 */ 371 mutex_lock(&lguest_lock); 372 373 /* Free up the shadow page tables for the Guest. */ 374 free_guest_pagetable(lg); 375 376 for (i = 0; i < lg->nr_cpus; i++) { 377 /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */ 378 hrtimer_cancel(&lg->cpus[i].hrt); 379 /* We can free up the register page we allocated. */ 380 free_page(lg->cpus[i].regs_page); 381 /* 382 * Now all the memory cleanups are done, it's safe to release 383 * the Launcher's memory management structure. 384 */ 385 mmput(lg->cpus[i].mm); 386 } 387 388 /* 389 * If lg->dead doesn't contain an error code it will be NULL or a 390 * kmalloc()ed string, either of which is ok to hand to kfree(). 391 */ 392 if (!IS_ERR(lg->dead)) 393 kfree(lg->dead); 394 /* Free the memory allocated to the lguest_struct */ 395 kfree(lg); 396 /* Release lock and exit. */ 397 mutex_unlock(&lguest_lock); 398 399 return 0; 400} 401 402/*L:000 403 * Welcome to our journey through the Launcher! 404 * 405 * The Launcher is the Host userspace program which sets up, runs and services 406 * the Guest. In fact, many comments in the Drivers which refer to "the Host" 407 * doing things are inaccurate: the Launcher does all the device handling for 408 * the Guest, but the Guest can't know that. 409 * 410 * Just to confuse you: to the Host kernel, the Launcher *is* the Guest and we 411 * shall see more of that later. 412 * 413 * We begin our understanding with the Host kernel interface which the Launcher 414 * uses: reading and writing a character device called /dev/lguest. All the 415 * work happens in the read(), write() and close() routines: 416 */ 417static const struct file_operations lguest_fops = { 418 .owner = THIS_MODULE, 419 .open = open, 420 .release = close, 421 .write = write, 422 .read = read, 423 .llseek = default_llseek, 424}; 425/*:*/ 426 427/* 428 * This is a textbook example of a "misc" character device. Populate a "struct 429 * miscdevice" and register it with misc_register(). 430 */ 431static struct miscdevice lguest_dev = { 432 .minor = MISC_DYNAMIC_MINOR, 433 .name = "lguest", 434 .fops = &lguest_fops, 435}; 436 437int __init lguest_device_init(void) 438{ 439 return misc_register(&lguest_dev); 440} 441 442void __exit lguest_device_remove(void) 443{ 444 misc_deregister(&lguest_dev); 445} 446