1/* 2 * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 3 * Licensed under the GPL 4 */ 5 6#include <unistd.h> 7#include <sched.h> 8#include <signal.h> 9#include <errno.h> 10#include <sys/time.h> 11#include <asm/unistd.h> 12#include <aio.h> 13#include <init.h> 14#include <kern_util.h> 15#include <os.h> 16 17struct aio_thread_req { 18 enum aio_type type; 19 int io_fd; 20 unsigned long long offset; 21 char *buf; 22 int len; 23 struct aio_context *aio; 24}; 25 26#if defined(HAVE_AIO_ABI) 27#include <linux/aio_abi.h> 28 29/* 30 * If we have the headers, we are going to build with AIO enabled. 31 * If we don't have aio in libc, we define the necessary stubs here. 32 */ 33 34#if !defined(HAVE_AIO_LIBC) 35 36static long io_setup(int n, aio_context_t *ctxp) 37{ 38 return syscall(__NR_io_setup, n, ctxp); 39} 40 41static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp) 42{ 43 return syscall(__NR_io_submit, ctx, nr, iocbpp); 44} 45 46static long io_getevents(aio_context_t ctx_id, long min_nr, long nr, 47 struct io_event *events, struct timespec *timeout) 48{ 49 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout); 50} 51 52#endif 53 54/* 55 * The AIO_MMAP cases force the mmapped page into memory here 56 * rather than in whatever place first touches the data. I used 57 * to do this by touching the page, but that's delicate because 58 * gcc is prone to optimizing that away. So, what's done here 59 * is we read from the descriptor from which the page was 60 * mapped. The caller is required to pass an offset which is 61 * inside the page that was mapped. Thus, when the read 62 * returns, we know that the page is in the page cache, and 63 * that it now backs the mmapped area. 64 */ 65 66static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, 67 int len, unsigned long long offset, struct aio_context *aio) 68{ 69 struct iocb *iocbp = & ((struct iocb) { 70 .aio_data = (unsigned long) aio, 71 .aio_fildes = fd, 72 .aio_buf = (unsigned long) buf, 73 .aio_nbytes = len, 74 .aio_offset = offset 75 }); 76 char c; 77 78 switch (type) { 79 case AIO_READ: 80 iocbp->aio_lio_opcode = IOCB_CMD_PREAD; 81 break; 82 case AIO_WRITE: 83 iocbp->aio_lio_opcode = IOCB_CMD_PWRITE; 84 break; 85 case AIO_MMAP: 86 iocbp->aio_lio_opcode = IOCB_CMD_PREAD; 87 iocbp->aio_buf = (unsigned long) &c; 88 iocbp->aio_nbytes = sizeof(c); 89 break; 90 default: 91 printk(UM_KERN_ERR "Bogus op in do_aio - %d\n", type); 92 return -EINVAL; 93 } 94 95 return (io_submit(ctx, 1, &iocbp) > 0) ? 0 : -errno; 96} 97 98/* Initialized in an initcall and unchanged thereafter */ 99static aio_context_t ctx = 0; 100 101static int aio_thread(void *arg) 102{ 103 struct aio_thread_reply reply; 104 struct io_event event; 105 int err, n, reply_fd; 106 107 os_fix_helper_signals(); 108 while (1) { 109 n = io_getevents(ctx, 1, 1, &event, NULL); 110 if (n < 0) { 111 if (errno == EINTR) 112 continue; 113 printk(UM_KERN_ERR "aio_thread - io_getevents failed, " 114 "errno = %d\n", errno); 115 } 116 else { 117 reply = ((struct aio_thread_reply) 118 { .data = (void *) (long) event.data, 119 .err = event.res }); 120 reply_fd = ((struct aio_context *) reply.data)->reply_fd; 121 err = write(reply_fd, &reply, sizeof(reply)); 122 if (err != sizeof(reply)) 123 printk(UM_KERN_ERR "aio_thread - write failed, " 124 "fd = %d, err = %d\n", reply_fd, errno); 125 } 126 } 127 return 0; 128} 129 130#endif 131 132static int do_not_aio(struct aio_thread_req *req) 133{ 134 char c; 135 unsigned long long actual; 136 int n; 137 138 actual = lseek64(req->io_fd, req->offset, SEEK_SET); 139 if (actual != req->offset) 140 return -errno; 141 142 switch (req->type) { 143 case AIO_READ: 144 n = read(req->io_fd, req->buf, req->len); 145 break; 146 case AIO_WRITE: 147 n = write(req->io_fd, req->buf, req->len); 148 break; 149 case AIO_MMAP: 150 n = read(req->io_fd, &c, sizeof(c)); 151 break; 152 default: 153 printk(UM_KERN_ERR "do_not_aio - bad request type : %d\n", 154 req->type); 155 return -EINVAL; 156 } 157 158 if (n < 0) 159 return -errno; 160 return 0; 161} 162 163/* These are initialized in initcalls and not changed */ 164static int aio_req_fd_r = -1; 165static int aio_req_fd_w = -1; 166static int aio_pid = -1; 167static unsigned long aio_stack; 168 169static int not_aio_thread(void *arg) 170{ 171 struct aio_thread_req req; 172 struct aio_thread_reply reply; 173 int err; 174 175 os_fix_helper_signals(); 176 while (1) { 177 err = read(aio_req_fd_r, &req, sizeof(req)); 178 if (err != sizeof(req)) { 179 if (err < 0) 180 printk(UM_KERN_ERR "not_aio_thread - " 181 "read failed, fd = %d, err = %d\n", 182 aio_req_fd_r, 183 errno); 184 else { 185 printk(UM_KERN_ERR "not_aio_thread - short " 186 "read, fd = %d, length = %d\n", 187 aio_req_fd_r, err); 188 } 189 continue; 190 } 191 err = do_not_aio(&req); 192 reply = ((struct aio_thread_reply) { .data = req.aio, 193 .err = err }); 194 err = write(req.aio->reply_fd, &reply, sizeof(reply)); 195 if (err != sizeof(reply)) 196 printk(UM_KERN_ERR "not_aio_thread - write failed, " 197 "fd = %d, err = %d\n", req.aio->reply_fd, errno); 198 } 199 200 return 0; 201} 202 203static int init_aio_24(void) 204{ 205 int fds[2], err; 206 207 err = os_pipe(fds, 1, 1); 208 if (err) 209 goto out; 210 211 aio_req_fd_w = fds[0]; 212 aio_req_fd_r = fds[1]; 213 214 err = os_set_fd_block(aio_req_fd_w, 0); 215 if (err) 216 goto out_close_pipe; 217 218 err = run_helper_thread(not_aio_thread, NULL, 219 CLONE_FILES | CLONE_VM, &aio_stack); 220 if (err < 0) 221 goto out_close_pipe; 222 223 aio_pid = err; 224 goto out; 225 226out_close_pipe: 227 close(fds[0]); 228 close(fds[1]); 229 aio_req_fd_w = -1; 230 aio_req_fd_r = -1; 231out: 232#ifndef HAVE_AIO_ABI 233 printk(UM_KERN_INFO "/usr/include/linux/aio_abi.h not present during " 234 "build\n"); 235#endif 236 printk(UM_KERN_INFO "2.6 host AIO support not used - falling back to " 237 "I/O thread\n"); 238 return 0; 239} 240 241#ifdef HAVE_AIO_ABI 242#define DEFAULT_24_AIO 0 243static int init_aio_26(void) 244{ 245 int err; 246 247 if (io_setup(256, &ctx)) { 248 err = -errno; 249 printk(UM_KERN_ERR "aio_thread failed to initialize context, " 250 "err = %d\n", errno); 251 return err; 252 } 253 254 err = run_helper_thread(aio_thread, NULL, 255 CLONE_FILES | CLONE_VM, &aio_stack); 256 if (err < 0) 257 return err; 258 259 aio_pid = err; 260 261 printk(UM_KERN_INFO "Using 2.6 host AIO\n"); 262 return 0; 263} 264 265static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, 266 unsigned long long offset, struct aio_context *aio) 267{ 268 struct aio_thread_reply reply; 269 int err; 270 271 err = do_aio(ctx, type, io_fd, buf, len, offset, aio); 272 if (err) { 273 reply = ((struct aio_thread_reply) { .data = aio, 274 .err = err }); 275 err = write(aio->reply_fd, &reply, sizeof(reply)); 276 if (err != sizeof(reply)) { 277 err = -errno; 278 printk(UM_KERN_ERR "submit_aio_26 - write failed, " 279 "fd = %d, err = %d\n", aio->reply_fd, -err); 280 } 281 else err = 0; 282 } 283 284 return err; 285} 286 287#else 288#define DEFAULT_24_AIO 1 289static int init_aio_26(void) 290{ 291 return -ENOSYS; 292} 293 294static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, 295 unsigned long long offset, struct aio_context *aio) 296{ 297 return -ENOSYS; 298} 299#endif 300 301/* Initialized in an initcall and unchanged thereafter */ 302static int aio_24 = DEFAULT_24_AIO; 303 304static int __init set_aio_24(char *name, int *add) 305{ 306 aio_24 = 1; 307 return 0; 308} 309 310__uml_setup("aio=2.4", set_aio_24, 311"aio=2.4\n" 312" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n" 313" available. 2.4 AIO is a single thread that handles one request at a\n" 314" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n" 315" interface to handle an arbitrary number of pending requests. 2.6 AIO \n" 316" is not available in tt mode, on 2.4 hosts, or when UML is built with\n" 317" /usr/include/linux/aio_abi.h not available. Many distributions don't\n" 318" include aio_abi.h, so you will need to copy it from a kernel tree to\n" 319" your /usr/include/linux in order to build an AIO-capable UML\n\n" 320); 321 322static int init_aio(void) 323{ 324 int err; 325 326 if (!aio_24) { 327 err = init_aio_26(); 328 if (err && (errno == ENOSYS)) { 329 printk(UM_KERN_INFO "2.6 AIO not supported on the " 330 "host - reverting to 2.4 AIO\n"); 331 aio_24 = 1; 332 } 333 else return err; 334 } 335 336 if (aio_24) 337 return init_aio_24(); 338 339 return 0; 340} 341 342/* 343 * The reason for the __initcall/__uml_exitcall asymmetry is that init_aio 344 * needs to be called when the kernel is running because it calls run_helper, 345 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic 346 * kernel does not run __exitcalls on shutdown, and can't because many of them 347 * break when called outside of module unloading. 348 */ 349__initcall(init_aio); 350 351static void exit_aio(void) 352{ 353 if (aio_pid != -1) { 354 os_kill_process(aio_pid, 1); 355 free_stack(aio_stack, 0); 356 } 357} 358 359__uml_exitcall(exit_aio); 360 361static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len, 362 unsigned long long offset, struct aio_context *aio) 363{ 364 struct aio_thread_req req = { .type = type, 365 .io_fd = io_fd, 366 .offset = offset, 367 .buf = buf, 368 .len = len, 369 .aio = aio, 370 }; 371 int err; 372 373 err = write(aio_req_fd_w, &req, sizeof(req)); 374 if (err == sizeof(req)) 375 err = 0; 376 else err = -errno; 377 378 return err; 379} 380 381int submit_aio(enum aio_type type, int io_fd, char *buf, int len, 382 unsigned long long offset, int reply_fd, 383 struct aio_context *aio) 384{ 385 aio->reply_fd = reply_fd; 386 if (aio_24) 387 return submit_aio_24(type, io_fd, buf, len, offset, aio); 388 else 389 return submit_aio_26(type, io_fd, buf, len, offset, aio); 390} 391