1/* 2 * Copyright (C) 2005, 2006 3 * Avishay Traeger (avishay@gmail.com) 4 * Copyright (C) 2008, 2009 5 * Boaz Harrosh <ooo@electrozaur.com> 6 * 7 * Copyrights for code taken from ext2: 8 * Copyright (C) 1992, 1993, 1994, 1995 9 * Remy Card (card@masi.ibp.fr) 10 * Laboratoire MASI - Institut Blaise Pascal 11 * Universite Pierre et Marie Curie (Paris VI) 12 * from 13 * linux/fs/minix/inode.c 14 * Copyright (C) 1991, 1992 Linus Torvalds 15 * 16 * This file is part of exofs. 17 * 18 * exofs is free software; you can redistribute it and/or modify 19 * it under the terms of the GNU General Public License as published by 20 * the Free Software Foundation. Since it is based on ext2, and the only 21 * valid version of GPL for the Linux kernel is version 2, the only valid 22 * version of GPL for exofs is version 2. 23 * 24 * exofs is distributed in the hope that it will be useful, 25 * but WITHOUT ANY WARRANTY; without even the implied warranty of 26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 27 * GNU General Public License for more details. 28 * 29 * You should have received a copy of the GNU General Public License 30 * along with exofs; if not, write to the Free Software 31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 32 */ 33 34#include <linux/slab.h> 35 36#include "exofs.h" 37 38#define EXOFS_DBGMSG2(M...) do {} while (0) 39 40unsigned exofs_max_io_pages(struct ore_layout *layout, 41 unsigned expected_pages) 42{ 43 unsigned pages = min_t(unsigned, expected_pages, 44 layout->max_io_length / PAGE_SIZE); 45 46 return pages; 47} 48 49struct page_collect { 50 struct exofs_sb_info *sbi; 51 struct inode *inode; 52 unsigned expected_pages; 53 struct ore_io_state *ios; 54 55 struct page **pages; 56 unsigned alloc_pages; 57 unsigned nr_pages; 58 unsigned long length; 59 loff_t pg_first; /* keep 64bit also in 32-arches */ 60 bool read_4_write; /* This means two things: that the read is sync 61 * And the pages should not be unlocked. 62 */ 63 struct page *that_locked_page; 64}; 65 66static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, 67 struct inode *inode) 68{ 69 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 70 71 pcol->sbi = sbi; 72 pcol->inode = inode; 73 pcol->expected_pages = expected_pages; 74 75 pcol->ios = NULL; 76 pcol->pages = NULL; 77 pcol->alloc_pages = 0; 78 pcol->nr_pages = 0; 79 pcol->length = 0; 80 pcol->pg_first = -1; 81 pcol->read_4_write = false; 82 pcol->that_locked_page = NULL; 83} 84 85static void _pcol_reset(struct page_collect *pcol) 86{ 87 pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); 88 89 pcol->pages = NULL; 90 pcol->alloc_pages = 0; 91 pcol->nr_pages = 0; 92 pcol->length = 0; 93 pcol->pg_first = -1; 94 pcol->ios = NULL; 95 pcol->that_locked_page = NULL; 96 97 /* this is probably the end of the loop but in writes 98 * it might not end here. don't be left with nothing 99 */ 100 if (!pcol->expected_pages) 101 pcol->expected_pages = 102 exofs_max_io_pages(&pcol->sbi->layout, ~0); 103} 104 105static int pcol_try_alloc(struct page_collect *pcol) 106{ 107 unsigned pages; 108 109 /* TODO: easily support bio chaining */ 110 pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages); 111 112 for (; pages; pages >>= 1) { 113 pcol->pages = kmalloc(pages * sizeof(struct page *), 114 GFP_KERNEL); 115 if (likely(pcol->pages)) { 116 pcol->alloc_pages = pages; 117 return 0; 118 } 119 } 120 121 EXOFS_ERR("Failed to kmalloc expected_pages=%u\n", 122 pcol->expected_pages); 123 return -ENOMEM; 124} 125 126static void pcol_free(struct page_collect *pcol) 127{ 128 kfree(pcol->pages); 129 pcol->pages = NULL; 130 131 if (pcol->ios) { 132 ore_put_io_state(pcol->ios); 133 pcol->ios = NULL; 134 } 135} 136 137static int pcol_add_page(struct page_collect *pcol, struct page *page, 138 unsigned len) 139{ 140 if (unlikely(pcol->nr_pages >= pcol->alloc_pages)) 141 return -ENOMEM; 142 143 pcol->pages[pcol->nr_pages++] = page; 144 pcol->length += len; 145 return 0; 146} 147 148enum {PAGE_WAS_NOT_IN_IO = 17}; 149static int update_read_page(struct page *page, int ret) 150{ 151 switch (ret) { 152 case 0: 153 /* Everything is OK */ 154 SetPageUptodate(page); 155 if (PageError(page)) 156 ClearPageError(page); 157 break; 158 case -EFAULT: 159 /* In this case we were trying to read something that wasn't on 160 * disk yet - return a page full of zeroes. This should be OK, 161 * because the object should be empty (if there was a write 162 * before this read, the read would be waiting with the page 163 * locked */ 164 clear_highpage(page); 165 166 SetPageUptodate(page); 167 if (PageError(page)) 168 ClearPageError(page); 169 EXOFS_DBGMSG("recovered read error\n"); 170 /* fall through */ 171 case PAGE_WAS_NOT_IN_IO: 172 ret = 0; /* recovered error */ 173 break; 174 default: 175 SetPageError(page); 176 } 177 return ret; 178} 179 180static void update_write_page(struct page *page, int ret) 181{ 182 if (unlikely(ret == PAGE_WAS_NOT_IN_IO)) 183 return; /* don't pass start don't collect $200 */ 184 185 if (ret) { 186 mapping_set_error(page->mapping, ret); 187 SetPageError(page); 188 } 189 end_page_writeback(page); 190} 191 192/* Called at the end of reads, to optionally unlock pages and update their 193 * status. 194 */ 195static int __readpages_done(struct page_collect *pcol) 196{ 197 int i; 198 u64 good_bytes; 199 u64 length = 0; 200 int ret = ore_check_io(pcol->ios, NULL); 201 202 if (likely(!ret)) { 203 good_bytes = pcol->length; 204 ret = PAGE_WAS_NOT_IN_IO; 205 } else { 206 good_bytes = 0; 207 } 208 209 EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx" 210 " length=0x%lx nr_pages=%u\n", 211 pcol->inode->i_ino, _LLU(good_bytes), pcol->length, 212 pcol->nr_pages); 213 214 for (i = 0; i < pcol->nr_pages; i++) { 215 struct page *page = pcol->pages[i]; 216 struct inode *inode = page->mapping->host; 217 int page_stat; 218 219 if (inode != pcol->inode) 220 continue; /* osd might add more pages at end */ 221 222 if (likely(length < good_bytes)) 223 page_stat = 0; 224 else 225 page_stat = ret; 226 227 EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n", 228 inode->i_ino, page->index, 229 page_stat ? "bad_bytes" : "good_bytes"); 230 231 ret = update_read_page(page, page_stat); 232 if (!pcol->read_4_write) 233 unlock_page(page); 234 length += PAGE_SIZE; 235 } 236 237 pcol_free(pcol); 238 EXOFS_DBGMSG2("readpages_done END\n"); 239 return ret; 240} 241 242/* callback of async reads */ 243static void readpages_done(struct ore_io_state *ios, void *p) 244{ 245 struct page_collect *pcol = p; 246 247 __readpages_done(pcol); 248 atomic_dec(&pcol->sbi->s_curr_pending); 249 kfree(pcol); 250} 251 252static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) 253{ 254 int i; 255 256 for (i = 0; i < pcol->nr_pages; i++) { 257 struct page *page = pcol->pages[i]; 258 259 if (rw == READ) 260 update_read_page(page, ret); 261 else 262 update_write_page(page, ret); 263 264 unlock_page(page); 265 } 266} 267 268static int _maybe_not_all_in_one_io(struct ore_io_state *ios, 269 struct page_collect *pcol_src, struct page_collect *pcol) 270{ 271 /* length was wrong or offset was not page aligned */ 272 BUG_ON(pcol_src->nr_pages < ios->nr_pages); 273 274 if (pcol_src->nr_pages > ios->nr_pages) { 275 struct page **src_page; 276 unsigned pages_less = pcol_src->nr_pages - ios->nr_pages; 277 unsigned long len_less = pcol_src->length - ios->length; 278 unsigned i; 279 int ret; 280 281 /* This IO was trimmed */ 282 pcol_src->nr_pages = ios->nr_pages; 283 pcol_src->length = ios->length; 284 285 /* Left over pages are passed to the next io */ 286 pcol->expected_pages += pages_less; 287 pcol->nr_pages = pages_less; 288 pcol->length = len_less; 289 src_page = pcol_src->pages + pcol_src->nr_pages; 290 pcol->pg_first = (*src_page)->index; 291 292 ret = pcol_try_alloc(pcol); 293 if (unlikely(ret)) 294 return ret; 295 296 for (i = 0; i < pages_less; ++i) 297 pcol->pages[i] = *src_page++; 298 299 EXOFS_DBGMSG("Length was adjusted nr_pages=0x%x " 300 "pages_less=0x%x expected_pages=0x%x " 301 "next_offset=0x%llx next_len=0x%lx\n", 302 pcol_src->nr_pages, pages_less, pcol->expected_pages, 303 pcol->pg_first * PAGE_SIZE, pcol->length); 304 } 305 return 0; 306} 307 308static int read_exec(struct page_collect *pcol) 309{ 310 struct exofs_i_info *oi = exofs_i(pcol->inode); 311 struct ore_io_state *ios; 312 struct page_collect *pcol_copy = NULL; 313 int ret; 314 315 if (!pcol->pages) 316 return 0; 317 318 if (!pcol->ios) { 319 int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, true, 320 pcol->pg_first << PAGE_CACHE_SHIFT, 321 pcol->length, &pcol->ios); 322 323 if (ret) 324 return ret; 325 } 326 327 ios = pcol->ios; 328 ios->pages = pcol->pages; 329 330 if (pcol->read_4_write) { 331 ore_read(pcol->ios); 332 return __readpages_done(pcol); 333 } 334 335 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 336 if (!pcol_copy) { 337 ret = -ENOMEM; 338 goto err; 339 } 340 341 *pcol_copy = *pcol; 342 ios->done = readpages_done; 343 ios->private = pcol_copy; 344 345 /* pages ownership was passed to pcol_copy */ 346 _pcol_reset(pcol); 347 348 ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol); 349 if (unlikely(ret)) 350 goto err; 351 352 EXOFS_DBGMSG2("read_exec(0x%lx) offset=0x%llx length=0x%llx\n", 353 pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length)); 354 355 ret = ore_read(ios); 356 if (unlikely(ret)) 357 goto err; 358 359 atomic_inc(&pcol->sbi->s_curr_pending); 360 361 return 0; 362 363err: 364 if (!pcol_copy) /* Failed before ownership transfer */ 365 pcol_copy = pcol; 366 _unlock_pcol_pages(pcol_copy, ret, READ); 367 pcol_free(pcol_copy); 368 kfree(pcol_copy); 369 370 return ret; 371} 372 373/* readpage_strip is called either directly from readpage() or by the VFS from 374 * within read_cache_pages(), to add one more page to be read. It will try to 375 * collect as many contiguous pages as posible. If a discontinuity is 376 * encountered, or it runs out of resources, it will submit the previous segment 377 * and will start a new collection. Eventually caller must submit the last 378 * segment if present. 379 */ 380static int readpage_strip(void *data, struct page *page) 381{ 382 struct page_collect *pcol = data; 383 struct inode *inode = pcol->inode; 384 struct exofs_i_info *oi = exofs_i(inode); 385 loff_t i_size = i_size_read(inode); 386 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 387 size_t len; 388 int ret; 389 390 BUG_ON(!PageLocked(page)); 391 392 /* FIXME: Just for debugging, will be removed */ 393 if (PageUptodate(page)) 394 EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, 395 page->index); 396 397 pcol->that_locked_page = page; 398 399 if (page->index < end_index) 400 len = PAGE_CACHE_SIZE; 401 else if (page->index == end_index) 402 len = i_size & ~PAGE_CACHE_MASK; 403 else 404 len = 0; 405 406 if (!len || !obj_created(oi)) { 407 /* this will be out of bounds, or doesn't exist yet. 408 * Current page is cleared and the request is split 409 */ 410 clear_highpage(page); 411 412 SetPageUptodate(page); 413 if (PageError(page)) 414 ClearPageError(page); 415 416 if (!pcol->read_4_write) 417 unlock_page(page); 418 EXOFS_DBGMSG("readpage_strip(0x%lx) empty page len=%zx " 419 "read_4_write=%d index=0x%lx end_index=0x%lx " 420 "splitting\n", inode->i_ino, len, 421 pcol->read_4_write, page->index, end_index); 422 423 return read_exec(pcol); 424 } 425 426try_again: 427 428 if (unlikely(pcol->pg_first == -1)) { 429 pcol->pg_first = page->index; 430 } else if (unlikely((pcol->pg_first + pcol->nr_pages) != 431 page->index)) { 432 /* Discontinuity detected, split the request */ 433 ret = read_exec(pcol); 434 if (unlikely(ret)) 435 goto fail; 436 goto try_again; 437 } 438 439 if (!pcol->pages) { 440 ret = pcol_try_alloc(pcol); 441 if (unlikely(ret)) 442 goto fail; 443 } 444 445 if (len != PAGE_CACHE_SIZE) 446 zero_user(page, len, PAGE_CACHE_SIZE - len); 447 448 EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", 449 inode->i_ino, page->index, len); 450 451 ret = pcol_add_page(pcol, page, len); 452 if (ret) { 453 EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p " 454 "this_len=0x%zx nr_pages=%u length=0x%lx\n", 455 page, len, pcol->nr_pages, pcol->length); 456 457 /* split the request, and start again with current page */ 458 ret = read_exec(pcol); 459 if (unlikely(ret)) 460 goto fail; 461 462 goto try_again; 463 } 464 465 return 0; 466 467fail: 468 /* SetPageError(page); ??? */ 469 unlock_page(page); 470 return ret; 471} 472 473static int exofs_readpages(struct file *file, struct address_space *mapping, 474 struct list_head *pages, unsigned nr_pages) 475{ 476 struct page_collect pcol; 477 int ret; 478 479 _pcol_init(&pcol, nr_pages, mapping->host); 480 481 ret = read_cache_pages(mapping, pages, readpage_strip, &pcol); 482 if (ret) { 483 EXOFS_ERR("read_cache_pages => %d\n", ret); 484 return ret; 485 } 486 487 ret = read_exec(&pcol); 488 if (unlikely(ret)) 489 return ret; 490 491 return read_exec(&pcol); 492} 493 494static int _readpage(struct page *page, bool read_4_write) 495{ 496 struct page_collect pcol; 497 int ret; 498 499 _pcol_init(&pcol, 1, page->mapping->host); 500 501 pcol.read_4_write = read_4_write; 502 ret = readpage_strip(&pcol, page); 503 if (ret) { 504 EXOFS_ERR("_readpage => %d\n", ret); 505 return ret; 506 } 507 508 return read_exec(&pcol); 509} 510 511/* 512 * We don't need the file 513 */ 514static int exofs_readpage(struct file *file, struct page *page) 515{ 516 return _readpage(page, false); 517} 518 519/* Callback for osd_write. All writes are asynchronous */ 520static void writepages_done(struct ore_io_state *ios, void *p) 521{ 522 struct page_collect *pcol = p; 523 int i; 524 u64 good_bytes; 525 u64 length = 0; 526 int ret = ore_check_io(ios, NULL); 527 528 atomic_dec(&pcol->sbi->s_curr_pending); 529 530 if (likely(!ret)) { 531 good_bytes = pcol->length; 532 ret = PAGE_WAS_NOT_IN_IO; 533 } else { 534 good_bytes = 0; 535 } 536 537 EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx" 538 " length=0x%lx nr_pages=%u\n", 539 pcol->inode->i_ino, _LLU(good_bytes), pcol->length, 540 pcol->nr_pages); 541 542 for (i = 0; i < pcol->nr_pages; i++) { 543 struct page *page = pcol->pages[i]; 544 struct inode *inode = page->mapping->host; 545 int page_stat; 546 547 if (inode != pcol->inode) 548 continue; /* osd might add more pages to a bio */ 549 550 if (likely(length < good_bytes)) 551 page_stat = 0; 552 else 553 page_stat = ret; 554 555 update_write_page(page, page_stat); 556 unlock_page(page); 557 EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", 558 inode->i_ino, page->index, page_stat); 559 560 length += PAGE_SIZE; 561 } 562 563 pcol_free(pcol); 564 kfree(pcol); 565 EXOFS_DBGMSG2("writepages_done END\n"); 566} 567 568static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) 569{ 570 struct page_collect *pcol = priv; 571 pgoff_t index = offset / PAGE_SIZE; 572 573 if (!pcol->that_locked_page || 574 (pcol->that_locked_page->index != index)) { 575 struct page *page; 576 loff_t i_size = i_size_read(pcol->inode); 577 578 if (offset >= i_size) { 579 *uptodate = true; 580 EXOFS_DBGMSG2("offset >= i_size index=0x%lx\n", index); 581 return ZERO_PAGE(0); 582 } 583 584 page = find_get_page(pcol->inode->i_mapping, index); 585 if (!page) { 586 page = find_or_create_page(pcol->inode->i_mapping, 587 index, GFP_NOFS); 588 if (unlikely(!page)) { 589 EXOFS_DBGMSG("grab_cache_page Failed " 590 "index=0x%llx\n", _LLU(index)); 591 return NULL; 592 } 593 unlock_page(page); 594 } 595 *uptodate = PageUptodate(page); 596 EXOFS_DBGMSG2("index=0x%lx uptodate=%d\n", index, *uptodate); 597 return page; 598 } else { 599 EXOFS_DBGMSG2("YES that_locked_page index=0x%lx\n", 600 pcol->that_locked_page->index); 601 *uptodate = true; 602 return pcol->that_locked_page; 603 } 604} 605 606static void __r4w_put_page(void *priv, struct page *page) 607{ 608 struct page_collect *pcol = priv; 609 610 if ((pcol->that_locked_page != page) && (ZERO_PAGE(0) != page)) { 611 EXOFS_DBGMSG2("index=0x%lx\n", page->index); 612 page_cache_release(page); 613 return; 614 } 615 EXOFS_DBGMSG2("that_locked_page index=0x%lx\n", 616 ZERO_PAGE(0) == page ? -1 : page->index); 617} 618 619static const struct _ore_r4w_op _r4w_op = { 620 .get_page = &__r4w_get_page, 621 .put_page = &__r4w_put_page, 622}; 623 624static int write_exec(struct page_collect *pcol) 625{ 626 struct exofs_i_info *oi = exofs_i(pcol->inode); 627 struct ore_io_state *ios; 628 struct page_collect *pcol_copy = NULL; 629 int ret; 630 631 if (!pcol->pages) 632 return 0; 633 634 BUG_ON(pcol->ios); 635 ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, false, 636 pcol->pg_first << PAGE_CACHE_SHIFT, 637 pcol->length, &pcol->ios); 638 if (unlikely(ret)) 639 goto err; 640 641 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 642 if (!pcol_copy) { 643 EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n"); 644 ret = -ENOMEM; 645 goto err; 646 } 647 648 *pcol_copy = *pcol; 649 650 ios = pcol->ios; 651 ios->pages = pcol_copy->pages; 652 ios->done = writepages_done; 653 ios->r4w = &_r4w_op; 654 ios->private = pcol_copy; 655 656 /* pages ownership was passed to pcol_copy */ 657 _pcol_reset(pcol); 658 659 ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol); 660 if (unlikely(ret)) 661 goto err; 662 663 EXOFS_DBGMSG2("write_exec(0x%lx) offset=0x%llx length=0x%llx\n", 664 pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length)); 665 666 ret = ore_write(ios); 667 if (unlikely(ret)) { 668 EXOFS_ERR("write_exec: ore_write() Failed\n"); 669 goto err; 670 } 671 672 atomic_inc(&pcol->sbi->s_curr_pending); 673 return 0; 674 675err: 676 if (!pcol_copy) /* Failed before ownership transfer */ 677 pcol_copy = pcol; 678 _unlock_pcol_pages(pcol_copy, ret, WRITE); 679 pcol_free(pcol_copy); 680 kfree(pcol_copy); 681 682 return ret; 683} 684 685/* writepage_strip is called either directly from writepage() or by the VFS from 686 * within write_cache_pages(), to add one more page to be written to storage. 687 * It will try to collect as many contiguous pages as possible. If a 688 * discontinuity is encountered or it runs out of resources it will submit the 689 * previous segment and will start a new collection. 690 * Eventually caller must submit the last segment if present. 691 */ 692static int writepage_strip(struct page *page, 693 struct writeback_control *wbc_unused, void *data) 694{ 695 struct page_collect *pcol = data; 696 struct inode *inode = pcol->inode; 697 struct exofs_i_info *oi = exofs_i(inode); 698 loff_t i_size = i_size_read(inode); 699 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 700 size_t len; 701 int ret; 702 703 BUG_ON(!PageLocked(page)); 704 705 ret = wait_obj_created(oi); 706 if (unlikely(ret)) 707 goto fail; 708 709 if (page->index < end_index) 710 /* in this case, the page is within the limits of the file */ 711 len = PAGE_CACHE_SIZE; 712 else { 713 len = i_size & ~PAGE_CACHE_MASK; 714 715 if (page->index > end_index || !len) { 716 /* in this case, the page is outside the limits 717 * (truncate in progress) 718 */ 719 ret = write_exec(pcol); 720 if (unlikely(ret)) 721 goto fail; 722 if (PageError(page)) 723 ClearPageError(page); 724 unlock_page(page); 725 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) " 726 "outside the limits\n", 727 inode->i_ino, page->index); 728 return 0; 729 } 730 } 731 732try_again: 733 734 if (unlikely(pcol->pg_first == -1)) { 735 pcol->pg_first = page->index; 736 } else if (unlikely((pcol->pg_first + pcol->nr_pages) != 737 page->index)) { 738 /* Discontinuity detected, split the request */ 739 ret = write_exec(pcol); 740 if (unlikely(ret)) 741 goto fail; 742 743 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n", 744 inode->i_ino, page->index); 745 goto try_again; 746 } 747 748 if (!pcol->pages) { 749 ret = pcol_try_alloc(pcol); 750 if (unlikely(ret)) 751 goto fail; 752 } 753 754 EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", 755 inode->i_ino, page->index, len); 756 757 ret = pcol_add_page(pcol, page, len); 758 if (unlikely(ret)) { 759 EXOFS_DBGMSG2("Failed pcol_add_page " 760 "nr_pages=%u total_length=0x%lx\n", 761 pcol->nr_pages, pcol->length); 762 763 /* split the request, next loop will start again */ 764 ret = write_exec(pcol); 765 if (unlikely(ret)) { 766 EXOFS_DBGMSG("write_exec failed => %d", ret); 767 goto fail; 768 } 769 770 goto try_again; 771 } 772 773 BUG_ON(PageWriteback(page)); 774 set_page_writeback(page); 775 776 return 0; 777 778fail: 779 EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n", 780 inode->i_ino, page->index, ret); 781 set_bit(AS_EIO, &page->mapping->flags); 782 unlock_page(page); 783 return ret; 784} 785 786static int exofs_writepages(struct address_space *mapping, 787 struct writeback_control *wbc) 788{ 789 struct page_collect pcol; 790 long start, end, expected_pages; 791 int ret; 792 793 start = wbc->range_start >> PAGE_CACHE_SHIFT; 794 end = (wbc->range_end == LLONG_MAX) ? 795 start + mapping->nrpages : 796 wbc->range_end >> PAGE_CACHE_SHIFT; 797 798 if (start || end) 799 expected_pages = end - start + 1; 800 else 801 expected_pages = mapping->nrpages; 802 803 if (expected_pages < 32L) 804 expected_pages = 32L; 805 806 EXOFS_DBGMSG2("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx " 807 "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n", 808 mapping->host->i_ino, wbc->range_start, wbc->range_end, 809 mapping->nrpages, start, end, expected_pages); 810 811 _pcol_init(&pcol, expected_pages, mapping->host); 812 813 ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol); 814 if (unlikely(ret)) { 815 EXOFS_ERR("write_cache_pages => %d\n", ret); 816 return ret; 817 } 818 819 ret = write_exec(&pcol); 820 if (unlikely(ret)) 821 return ret; 822 823 if (wbc->sync_mode == WB_SYNC_ALL) { 824 return write_exec(&pcol); /* pump the last reminder */ 825 } else if (pcol.nr_pages) { 826 /* not SYNC let the reminder join the next writeout */ 827 unsigned i; 828 829 for (i = 0; i < pcol.nr_pages; i++) { 830 struct page *page = pcol.pages[i]; 831 832 end_page_writeback(page); 833 set_page_dirty(page); 834 unlock_page(page); 835 } 836 } 837 return 0; 838} 839 840/* 841static int exofs_writepage(struct page *page, struct writeback_control *wbc) 842{ 843 struct page_collect pcol; 844 int ret; 845 846 _pcol_init(&pcol, 1, page->mapping->host); 847 848 ret = writepage_strip(page, NULL, &pcol); 849 if (ret) { 850 EXOFS_ERR("exofs_writepage => %d\n", ret); 851 return ret; 852 } 853 854 return write_exec(&pcol); 855} 856*/ 857/* i_mutex held using inode->i_size directly */ 858static void _write_failed(struct inode *inode, loff_t to) 859{ 860 if (to > inode->i_size) 861 truncate_pagecache(inode, inode->i_size); 862} 863 864int exofs_write_begin(struct file *file, struct address_space *mapping, 865 loff_t pos, unsigned len, unsigned flags, 866 struct page **pagep, void **fsdata) 867{ 868 int ret = 0; 869 struct page *page; 870 871 page = *pagep; 872 if (page == NULL) { 873 ret = simple_write_begin(file, mapping, pos, len, flags, pagep, 874 fsdata); 875 if (ret) { 876 EXOFS_DBGMSG("simple_write_begin failed\n"); 877 goto out; 878 } 879 880 page = *pagep; 881 } 882 883 /* read modify write */ 884 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { 885 loff_t i_size = i_size_read(mapping->host); 886 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 887 size_t rlen; 888 889 if (page->index < end_index) 890 rlen = PAGE_CACHE_SIZE; 891 else if (page->index == end_index) 892 rlen = i_size & ~PAGE_CACHE_MASK; 893 else 894 rlen = 0; 895 896 if (!rlen) { 897 clear_highpage(page); 898 SetPageUptodate(page); 899 goto out; 900 } 901 902 ret = _readpage(page, true); 903 if (ret) { 904 /*SetPageError was done by _readpage. Is it ok?*/ 905 unlock_page(page); 906 EXOFS_DBGMSG("__readpage failed\n"); 907 } 908 } 909out: 910 if (unlikely(ret)) 911 _write_failed(mapping->host, pos + len); 912 913 return ret; 914} 915 916static int exofs_write_begin_export(struct file *file, 917 struct address_space *mapping, 918 loff_t pos, unsigned len, unsigned flags, 919 struct page **pagep, void **fsdata) 920{ 921 *pagep = NULL; 922 923 return exofs_write_begin(file, mapping, pos, len, flags, pagep, 924 fsdata); 925} 926 927static int exofs_write_end(struct file *file, struct address_space *mapping, 928 loff_t pos, unsigned len, unsigned copied, 929 struct page *page, void *fsdata) 930{ 931 struct inode *inode = mapping->host; 932 /* According to comment in simple_write_end i_mutex is held */ 933 loff_t i_size = inode->i_size; 934 int ret; 935 936 ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata); 937 if (unlikely(ret)) 938 _write_failed(inode, pos + len); 939 940 /* TODO: once simple_write_end marks inode dirty remove */ 941 if (i_size != inode->i_size) 942 mark_inode_dirty(inode); 943 return ret; 944} 945 946static int exofs_releasepage(struct page *page, gfp_t gfp) 947{ 948 EXOFS_DBGMSG("page 0x%lx\n", page->index); 949 WARN_ON(1); 950 return 0; 951} 952 953static void exofs_invalidatepage(struct page *page, unsigned int offset, 954 unsigned int length) 955{ 956 EXOFS_DBGMSG("page 0x%lx offset 0x%x length 0x%x\n", 957 page->index, offset, length); 958 WARN_ON(1); 959} 960 961 962 /* TODO: Should be easy enough to do proprly */ 963static ssize_t exofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, 964 loff_t offset) 965{ 966 return 0; 967} 968 969const struct address_space_operations exofs_aops = { 970 .readpage = exofs_readpage, 971 .readpages = exofs_readpages, 972 .writepage = NULL, 973 .writepages = exofs_writepages, 974 .write_begin = exofs_write_begin_export, 975 .write_end = exofs_write_end, 976 .releasepage = exofs_releasepage, 977 .set_page_dirty = __set_page_dirty_nobuffers, 978 .invalidatepage = exofs_invalidatepage, 979 980 /* Not implemented Yet */ 981 .bmap = NULL, /* TODO: use osd's OSD_ACT_READ_MAP */ 982 .direct_IO = exofs_direct_IO, 983 984 /* With these NULL has special meaning or default is not exported */ 985 .migratepage = NULL, 986 .launder_page = NULL, 987 .is_partially_uptodate = NULL, 988 .error_remove_page = NULL, 989}; 990 991/****************************************************************************** 992 * INODE OPERATIONS 993 *****************************************************************************/ 994 995/* 996 * Test whether an inode is a fast symlink. 997 */ 998static inline int exofs_inode_is_fast_symlink(struct inode *inode) 999{ 1000 struct exofs_i_info *oi = exofs_i(inode); 1001 1002 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); 1003} 1004 1005static int _do_truncate(struct inode *inode, loff_t newsize) 1006{ 1007 struct exofs_i_info *oi = exofs_i(inode); 1008 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1009 int ret; 1010 1011 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1012 1013 ret = ore_truncate(&sbi->layout, &oi->oc, (u64)newsize); 1014 if (likely(!ret)) 1015 truncate_setsize(inode, newsize); 1016 1017 EXOFS_DBGMSG2("(0x%lx) size=0x%llx ret=>%d\n", 1018 inode->i_ino, newsize, ret); 1019 return ret; 1020} 1021 1022/* 1023 * Set inode attributes - update size attribute on OSD if needed, 1024 * otherwise just call generic functions. 1025 */ 1026int exofs_setattr(struct dentry *dentry, struct iattr *iattr) 1027{ 1028 struct inode *inode = d_inode(dentry); 1029 int error; 1030 1031 /* if we are about to modify an object, and it hasn't been 1032 * created yet, wait 1033 */ 1034 error = wait_obj_created(exofs_i(inode)); 1035 if (unlikely(error)) 1036 return error; 1037 1038 error = inode_change_ok(inode, iattr); 1039 if (unlikely(error)) 1040 return error; 1041 1042 if ((iattr->ia_valid & ATTR_SIZE) && 1043 iattr->ia_size != i_size_read(inode)) { 1044 error = _do_truncate(inode, iattr->ia_size); 1045 if (unlikely(error)) 1046 return error; 1047 } 1048 1049 setattr_copy(inode, iattr); 1050 mark_inode_dirty(inode); 1051 return 0; 1052} 1053 1054static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF( 1055 EXOFS_APAGE_FS_DATA, 1056 EXOFS_ATTR_INODE_FILE_LAYOUT, 1057 0); 1058static const struct osd_attr g_attr_inode_dir_layout = ATTR_DEF( 1059 EXOFS_APAGE_FS_DATA, 1060 EXOFS_ATTR_INODE_DIR_LAYOUT, 1061 0); 1062 1063/* 1064 * Read the Linux inode info from the OSD, and return it as is. In exofs the 1065 * inode info is in an application specific page/attribute of the osd-object. 1066 */ 1067static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, 1068 struct exofs_fcb *inode) 1069{ 1070 struct exofs_sb_info *sbi = sb->s_fs_info; 1071 struct osd_attr attrs[] = { 1072 [0] = g_attr_inode_data, 1073 [1] = g_attr_inode_file_layout, 1074 [2] = g_attr_inode_dir_layout, 1075 }; 1076 struct ore_io_state *ios; 1077 struct exofs_on_disk_inode_layout *layout; 1078 int ret; 1079 1080 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1081 if (unlikely(ret)) { 1082 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); 1083 return ret; 1084 } 1085 1086 attrs[1].len = exofs_on_disk_inode_layout_size(sbi->oc.numdevs); 1087 attrs[2].len = exofs_on_disk_inode_layout_size(sbi->oc.numdevs); 1088 1089 ios->in_attr = attrs; 1090 ios->in_attr_len = ARRAY_SIZE(attrs); 1091 1092 ret = ore_read(ios); 1093 if (unlikely(ret)) { 1094 EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n", 1095 _LLU(oi->one_comp.obj.id), ret); 1096 memset(inode, 0, sizeof(*inode)); 1097 inode->i_mode = 0040000 | (0777 & ~022); 1098 /* If object is lost on target we might as well enable it's 1099 * delete. 1100 */ 1101 ret = 0; 1102 goto out; 1103 } 1104 1105 ret = extract_attr_from_ios(ios, &attrs[0]); 1106 if (ret) { 1107 EXOFS_ERR("%s: extract_attr 0 of inode failed\n", __func__); 1108 goto out; 1109 } 1110 WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE); 1111 memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE); 1112 1113 ret = extract_attr_from_ios(ios, &attrs[1]); 1114 if (ret) { 1115 EXOFS_ERR("%s: extract_attr 1 of inode failed\n", __func__); 1116 goto out; 1117 } 1118 if (attrs[1].len) { 1119 layout = attrs[1].val_ptr; 1120 if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) { 1121 EXOFS_ERR("%s: unsupported files layout %d\n", 1122 __func__, layout->gen_func); 1123 ret = -ENOTSUPP; 1124 goto out; 1125 } 1126 } 1127 1128 ret = extract_attr_from_ios(ios, &attrs[2]); 1129 if (ret) { 1130 EXOFS_ERR("%s: extract_attr 2 of inode failed\n", __func__); 1131 goto out; 1132 } 1133 if (attrs[2].len) { 1134 layout = attrs[2].val_ptr; 1135 if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) { 1136 EXOFS_ERR("%s: unsupported meta-data layout %d\n", 1137 __func__, layout->gen_func); 1138 ret = -ENOTSUPP; 1139 goto out; 1140 } 1141 } 1142 1143out: 1144 ore_put_io_state(ios); 1145 return ret; 1146} 1147 1148static void __oi_init(struct exofs_i_info *oi) 1149{ 1150 init_waitqueue_head(&oi->i_wq); 1151 oi->i_flags = 0; 1152} 1153/* 1154 * Fill in an inode read from the OSD and set it up for use 1155 */ 1156struct inode *exofs_iget(struct super_block *sb, unsigned long ino) 1157{ 1158 struct exofs_i_info *oi; 1159 struct exofs_fcb fcb; 1160 struct inode *inode; 1161 int ret; 1162 1163 inode = iget_locked(sb, ino); 1164 if (!inode) 1165 return ERR_PTR(-ENOMEM); 1166 if (!(inode->i_state & I_NEW)) 1167 return inode; 1168 oi = exofs_i(inode); 1169 __oi_init(oi); 1170 exofs_init_comps(&oi->oc, &oi->one_comp, sb->s_fs_info, 1171 exofs_oi_objno(oi)); 1172 1173 /* read the inode from the osd */ 1174 ret = exofs_get_inode(sb, oi, &fcb); 1175 if (ret) 1176 goto bad_inode; 1177 1178 set_obj_created(oi); 1179 1180 /* copy stuff from on-disk struct to in-memory struct */ 1181 inode->i_mode = le16_to_cpu(fcb.i_mode); 1182 i_uid_write(inode, le32_to_cpu(fcb.i_uid)); 1183 i_gid_write(inode, le32_to_cpu(fcb.i_gid)); 1184 set_nlink(inode, le16_to_cpu(fcb.i_links_count)); 1185 inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); 1186 inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); 1187 inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime); 1188 inode->i_ctime.tv_nsec = 1189 inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0; 1190 oi->i_commit_size = le64_to_cpu(fcb.i_size); 1191 i_size_write(inode, oi->i_commit_size); 1192 inode->i_blkbits = EXOFS_BLKSHIFT; 1193 inode->i_generation = le32_to_cpu(fcb.i_generation); 1194 1195 oi->i_dir_start_lookup = 0; 1196 1197 if ((inode->i_nlink == 0) && (inode->i_mode == 0)) { 1198 ret = -ESTALE; 1199 goto bad_inode; 1200 } 1201 1202 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 1203 if (fcb.i_data[0]) 1204 inode->i_rdev = 1205 old_decode_dev(le32_to_cpu(fcb.i_data[0])); 1206 else 1207 inode->i_rdev = 1208 new_decode_dev(le32_to_cpu(fcb.i_data[1])); 1209 } else { 1210 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 1211 } 1212 1213 if (S_ISREG(inode->i_mode)) { 1214 inode->i_op = &exofs_file_inode_operations; 1215 inode->i_fop = &exofs_file_operations; 1216 inode->i_mapping->a_ops = &exofs_aops; 1217 } else if (S_ISDIR(inode->i_mode)) { 1218 inode->i_op = &exofs_dir_inode_operations; 1219 inode->i_fop = &exofs_dir_operations; 1220 inode->i_mapping->a_ops = &exofs_aops; 1221 } else if (S_ISLNK(inode->i_mode)) { 1222 if (exofs_inode_is_fast_symlink(inode)) { 1223 inode->i_op = &simple_symlink_inode_operations; 1224 inode->i_link = (char *)oi->i_data; 1225 } else { 1226 inode->i_op = &page_symlink_inode_operations; 1227 inode->i_mapping->a_ops = &exofs_aops; 1228 } 1229 } else { 1230 inode->i_op = &exofs_special_inode_operations; 1231 if (fcb.i_data[0]) 1232 init_special_inode(inode, inode->i_mode, 1233 old_decode_dev(le32_to_cpu(fcb.i_data[0]))); 1234 else 1235 init_special_inode(inode, inode->i_mode, 1236 new_decode_dev(le32_to_cpu(fcb.i_data[1]))); 1237 } 1238 1239 unlock_new_inode(inode); 1240 return inode; 1241 1242bad_inode: 1243 iget_failed(inode); 1244 return ERR_PTR(ret); 1245} 1246 1247int __exofs_wait_obj_created(struct exofs_i_info *oi) 1248{ 1249 if (!obj_created(oi)) { 1250 EXOFS_DBGMSG("!obj_created\n"); 1251 BUG_ON(!obj_2bcreated(oi)); 1252 wait_event(oi->i_wq, obj_created(oi)); 1253 EXOFS_DBGMSG("wait_event done\n"); 1254 } 1255 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; 1256} 1257 1258/* 1259 * Callback function from exofs_new_inode(). The important thing is that we 1260 * set the obj_created flag so that other methods know that the object exists on 1261 * the OSD. 1262 */ 1263static void create_done(struct ore_io_state *ios, void *p) 1264{ 1265 struct inode *inode = p; 1266 struct exofs_i_info *oi = exofs_i(inode); 1267 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1268 int ret; 1269 1270 ret = ore_check_io(ios, NULL); 1271 ore_put_io_state(ios); 1272 1273 atomic_dec(&sbi->s_curr_pending); 1274 1275 if (unlikely(ret)) { 1276 EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx", 1277 _LLU(exofs_oi_objno(oi)), 1278 _LLU(oi->one_comp.obj.partition)); 1279 /*TODO: When FS is corrupted creation can fail, object already 1280 * exist. Get rid of this asynchronous creation, if exist 1281 * increment the obj counter and try the next object. Until we 1282 * succeed. All these dangling objects will be made into lost 1283 * files by chkfs.exofs 1284 */ 1285 } 1286 1287 set_obj_created(oi); 1288 1289 wake_up(&oi->i_wq); 1290} 1291 1292/* 1293 * Set up a new inode and create an object for it on the OSD 1294 */ 1295struct inode *exofs_new_inode(struct inode *dir, umode_t mode) 1296{ 1297 struct super_block *sb = dir->i_sb; 1298 struct exofs_sb_info *sbi = sb->s_fs_info; 1299 struct inode *inode; 1300 struct exofs_i_info *oi; 1301 struct ore_io_state *ios; 1302 int ret; 1303 1304 inode = new_inode(sb); 1305 if (!inode) 1306 return ERR_PTR(-ENOMEM); 1307 1308 oi = exofs_i(inode); 1309 __oi_init(oi); 1310 1311 set_obj_2bcreated(oi); 1312 1313 inode_init_owner(inode, dir, mode); 1314 inode->i_ino = sbi->s_nextid++; 1315 inode->i_blkbits = EXOFS_BLKSHIFT; 1316 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1317 oi->i_commit_size = inode->i_size = 0; 1318 spin_lock(&sbi->s_next_gen_lock); 1319 inode->i_generation = sbi->s_next_generation++; 1320 spin_unlock(&sbi->s_next_gen_lock); 1321 insert_inode_hash(inode); 1322 1323 exofs_init_comps(&oi->oc, &oi->one_comp, sb->s_fs_info, 1324 exofs_oi_objno(oi)); 1325 exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ 1326 1327 mark_inode_dirty(inode); 1328 1329 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1330 if (unlikely(ret)) { 1331 EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n"); 1332 return ERR_PTR(ret); 1333 } 1334 1335 ios->done = create_done; 1336 ios->private = inode; 1337 1338 ret = ore_create(ios); 1339 if (ret) { 1340 ore_put_io_state(ios); 1341 return ERR_PTR(ret); 1342 } 1343 atomic_inc(&sbi->s_curr_pending); 1344 1345 return inode; 1346} 1347 1348/* 1349 * struct to pass two arguments to update_inode's callback 1350 */ 1351struct updatei_args { 1352 struct exofs_sb_info *sbi; 1353 struct exofs_fcb fcb; 1354}; 1355 1356/* 1357 * Callback function from exofs_update_inode(). 1358 */ 1359static void updatei_done(struct ore_io_state *ios, void *p) 1360{ 1361 struct updatei_args *args = p; 1362 1363 ore_put_io_state(ios); 1364 1365 atomic_dec(&args->sbi->s_curr_pending); 1366 1367 kfree(args); 1368} 1369 1370/* 1371 * Write the inode to the OSD. Just fill up the struct, and set the attribute 1372 * synchronously or asynchronously depending on the do_sync flag. 1373 */ 1374static int exofs_update_inode(struct inode *inode, int do_sync) 1375{ 1376 struct exofs_i_info *oi = exofs_i(inode); 1377 struct super_block *sb = inode->i_sb; 1378 struct exofs_sb_info *sbi = sb->s_fs_info; 1379 struct ore_io_state *ios; 1380 struct osd_attr attr; 1381 struct exofs_fcb *fcb; 1382 struct updatei_args *args; 1383 int ret; 1384 1385 args = kzalloc(sizeof(*args), GFP_KERNEL); 1386 if (!args) { 1387 EXOFS_DBGMSG("Failed kzalloc of args\n"); 1388 return -ENOMEM; 1389 } 1390 1391 fcb = &args->fcb; 1392 1393 fcb->i_mode = cpu_to_le16(inode->i_mode); 1394 fcb->i_uid = cpu_to_le32(i_uid_read(inode)); 1395 fcb->i_gid = cpu_to_le32(i_gid_read(inode)); 1396 fcb->i_links_count = cpu_to_le16(inode->i_nlink); 1397 fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); 1398 fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec); 1399 fcb->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); 1400 oi->i_commit_size = i_size_read(inode); 1401 fcb->i_size = cpu_to_le64(oi->i_commit_size); 1402 fcb->i_generation = cpu_to_le32(inode->i_generation); 1403 1404 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 1405 if (old_valid_dev(inode->i_rdev)) { 1406 fcb->i_data[0] = 1407 cpu_to_le32(old_encode_dev(inode->i_rdev)); 1408 fcb->i_data[1] = 0; 1409 } else { 1410 fcb->i_data[0] = 0; 1411 fcb->i_data[1] = 1412 cpu_to_le32(new_encode_dev(inode->i_rdev)); 1413 fcb->i_data[2] = 0; 1414 } 1415 } else 1416 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1417 1418 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1419 if (unlikely(ret)) { 1420 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); 1421 goto free_args; 1422 } 1423 1424 attr = g_attr_inode_data; 1425 attr.val_ptr = fcb; 1426 ios->out_attr_len = 1; 1427 ios->out_attr = &attr; 1428 1429 wait_obj_created(oi); 1430 1431 if (!do_sync) { 1432 args->sbi = sbi; 1433 ios->done = updatei_done; 1434 ios->private = args; 1435 } 1436 1437 ret = ore_write(ios); 1438 if (!do_sync && !ret) { 1439 atomic_inc(&sbi->s_curr_pending); 1440 goto out; /* deallocation in updatei_done */ 1441 } 1442 1443 ore_put_io_state(ios); 1444free_args: 1445 kfree(args); 1446out: 1447 EXOFS_DBGMSG("(0x%lx) do_sync=%d ret=>%d\n", 1448 inode->i_ino, do_sync, ret); 1449 return ret; 1450} 1451 1452int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) 1453{ 1454 /* FIXME: fix fsync and use wbc->sync_mode == WB_SYNC_ALL */ 1455 return exofs_update_inode(inode, 1); 1456} 1457 1458/* 1459 * Callback function from exofs_delete_inode() - don't have much cleaning up to 1460 * do. 1461 */ 1462static void delete_done(struct ore_io_state *ios, void *p) 1463{ 1464 struct exofs_sb_info *sbi = p; 1465 1466 ore_put_io_state(ios); 1467 1468 atomic_dec(&sbi->s_curr_pending); 1469} 1470 1471/* 1472 * Called when the refcount of an inode reaches zero. We remove the object 1473 * from the OSD here. We make sure the object was created before we try and 1474 * delete it. 1475 */ 1476void exofs_evict_inode(struct inode *inode) 1477{ 1478 struct exofs_i_info *oi = exofs_i(inode); 1479 struct super_block *sb = inode->i_sb; 1480 struct exofs_sb_info *sbi = sb->s_fs_info; 1481 struct ore_io_state *ios; 1482 int ret; 1483 1484 truncate_inode_pages_final(&inode->i_data); 1485 1486 /* TODO: should do better here */ 1487 if (inode->i_nlink || is_bad_inode(inode)) 1488 goto no_delete; 1489 1490 inode->i_size = 0; 1491 clear_inode(inode); 1492 1493 /* if we are deleting an obj that hasn't been created yet, wait. 1494 * This also makes sure that create_done cannot be called with an 1495 * already evicted inode. 1496 */ 1497 wait_obj_created(oi); 1498 /* ignore the error, attempt a remove anyway */ 1499 1500 /* Now Remove the OSD objects */ 1501 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1502 if (unlikely(ret)) { 1503 EXOFS_ERR("%s: ore_get_io_state failed\n", __func__); 1504 return; 1505 } 1506 1507 ios->done = delete_done; 1508 ios->private = sbi; 1509 1510 ret = ore_remove(ios); 1511 if (ret) { 1512 EXOFS_ERR("%s: ore_remove failed\n", __func__); 1513 ore_put_io_state(ios); 1514 return; 1515 } 1516 atomic_inc(&sbi->s_curr_pending); 1517 1518 return; 1519 1520no_delete: 1521 clear_inode(inode); 1522} 1523