1/* 2 * fs/f2fs/recovery.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11#include <linux/fs.h> 12#include <linux/f2fs_fs.h> 13#include "f2fs.h" 14#include "node.h" 15#include "segment.h" 16 17/* 18 * Roll forward recovery scenarios. 19 * 20 * [Term] F: fsync_mark, D: dentry_mark 21 * 22 * 1. inode(x) | CP | inode(x) | dnode(F) 23 * -> Update the latest inode(x). 24 * 25 * 2. inode(x) | CP | inode(F) | dnode(F) 26 * -> No problem. 27 * 28 * 3. inode(x) | CP | dnode(F) | inode(x) 29 * -> Recover to the latest dnode(F), and drop the last inode(x) 30 * 31 * 4. inode(x) | CP | dnode(F) | inode(F) 32 * -> No problem. 33 * 34 * 5. CP | inode(x) | dnode(F) 35 * -> The inode(DF) was missing. Should drop this dnode(F). 36 * 37 * 6. CP | inode(DF) | dnode(F) 38 * -> No problem. 39 * 40 * 7. CP | dnode(F) | inode(DF) 41 * -> If f2fs_iget fails, then goto next to find inode(DF). 42 * 43 * 8. CP | dnode(F) | inode(x) 44 * -> If f2fs_iget fails, then goto next to find inode(DF). 45 * But it will fail due to no inode(DF). 46 */ 47 48static struct kmem_cache *fsync_entry_slab; 49 50bool space_for_roll_forward(struct f2fs_sb_info *sbi) 51{ 52 if (sbi->last_valid_block_count + sbi->alloc_valid_block_count 53 > sbi->user_block_count) 54 return false; 55 return true; 56} 57 58static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, 59 nid_t ino) 60{ 61 struct fsync_inode_entry *entry; 62 63 list_for_each_entry(entry, head, list) 64 if (entry->inode->i_ino == ino) 65 return entry; 66 67 return NULL; 68} 69 70static int recover_dentry(struct inode *inode, struct page *ipage) 71{ 72 struct f2fs_inode *raw_inode = F2FS_INODE(ipage); 73 nid_t pino = le32_to_cpu(raw_inode->i_pino); 74 struct f2fs_dir_entry *de; 75 struct qstr name; 76 struct page *page; 77 struct inode *dir, *einode; 78 int err = 0; 79 80 dir = f2fs_iget(inode->i_sb, pino); 81 if (IS_ERR(dir)) { 82 err = PTR_ERR(dir); 83 goto out; 84 } 85 86 name.len = le32_to_cpu(raw_inode->i_namelen); 87 name.name = raw_inode->i_name; 88 89 if (unlikely(name.len > F2FS_NAME_LEN)) { 90 WARN_ON(1); 91 err = -ENAMETOOLONG; 92 goto out_err; 93 } 94retry: 95 de = f2fs_find_entry(dir, &name, &page); 96 if (de && inode->i_ino == le32_to_cpu(de->ino)) 97 goto out_unmap_put; 98 99 if (de) { 100 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); 101 if (IS_ERR(einode)) { 102 WARN_ON(1); 103 err = PTR_ERR(einode); 104 if (err == -ENOENT) 105 err = -EEXIST; 106 goto out_unmap_put; 107 } 108 err = acquire_orphan_inode(F2FS_I_SB(inode)); 109 if (err) { 110 iput(einode); 111 goto out_unmap_put; 112 } 113 f2fs_delete_entry(de, page, dir, einode); 114 iput(einode); 115 goto retry; 116 } 117 err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode); 118 if (err) 119 goto out_err; 120 121 if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) { 122 iput(dir); 123 } else { 124 add_dirty_dir_inode(dir); 125 set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); 126 } 127 128 goto out; 129 130out_unmap_put: 131 f2fs_dentry_kunmap(dir, page); 132 f2fs_put_page(page, 0); 133out_err: 134 iput(dir); 135out: 136 f2fs_msg(inode->i_sb, KERN_NOTICE, 137 "%s: ino = %x, name = %s, dir = %lx, err = %d", 138 __func__, ino_of_node(ipage), raw_inode->i_name, 139 IS_ERR(dir) ? 0 : dir->i_ino, err); 140 return err; 141} 142 143static void recover_inode(struct inode *inode, struct page *page) 144{ 145 struct f2fs_inode *raw = F2FS_INODE(page); 146 147 inode->i_mode = le16_to_cpu(raw->i_mode); 148 i_size_write(inode, le64_to_cpu(raw->i_size)); 149 inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime); 150 inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); 151 inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime); 152 inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); 153 inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec); 154 inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); 155 156 f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", 157 ino_of_node(page), F2FS_INODE(page)->i_name); 158} 159 160static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) 161{ 162 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 163 struct curseg_info *curseg; 164 struct page *page = NULL; 165 block_t blkaddr; 166 int err = 0; 167 168 /* get node pages in the current segment */ 169 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 170 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 171 172 ra_meta_pages(sbi, blkaddr, 1, META_POR); 173 174 while (1) { 175 struct fsync_inode_entry *entry; 176 177 if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) 178 return 0; 179 180 page = get_meta_page(sbi, blkaddr); 181 182 if (cp_ver != cpver_of_node(page)) 183 break; 184 185 if (!is_fsync_dnode(page)) 186 goto next; 187 188 entry = get_fsync_inode(head, ino_of_node(page)); 189 if (!entry) { 190 if (IS_INODE(page) && is_dent_dnode(page)) { 191 err = recover_inode_page(sbi, page); 192 if (err) 193 break; 194 } 195 196 /* add this fsync inode to the list */ 197 entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); 198 if (!entry) { 199 err = -ENOMEM; 200 break; 201 } 202 /* 203 * CP | dnode(F) | inode(DF) 204 * For this case, we should not give up now. 205 */ 206 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); 207 if (IS_ERR(entry->inode)) { 208 err = PTR_ERR(entry->inode); 209 kmem_cache_free(fsync_entry_slab, entry); 210 if (err == -ENOENT) { 211 err = 0; 212 goto next; 213 } 214 break; 215 } 216 list_add_tail(&entry->list, head); 217 } 218 entry->blkaddr = blkaddr; 219 220 if (IS_INODE(page)) { 221 entry->last_inode = blkaddr; 222 if (is_dent_dnode(page)) 223 entry->last_dentry = blkaddr; 224 } 225next: 226 /* check next segment */ 227 blkaddr = next_blkaddr_of_node(page); 228 f2fs_put_page(page, 1); 229 230 ra_meta_pages_cond(sbi, blkaddr); 231 } 232 f2fs_put_page(page, 1); 233 return err; 234} 235 236static void destroy_fsync_dnodes(struct list_head *head) 237{ 238 struct fsync_inode_entry *entry, *tmp; 239 240 list_for_each_entry_safe(entry, tmp, head, list) { 241 iput(entry->inode); 242 list_del(&entry->list); 243 kmem_cache_free(fsync_entry_slab, entry); 244 } 245} 246 247static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, 248 block_t blkaddr, struct dnode_of_data *dn) 249{ 250 struct seg_entry *sentry; 251 unsigned int segno = GET_SEGNO(sbi, blkaddr); 252 unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 253 struct f2fs_summary_block *sum_node; 254 struct f2fs_summary sum; 255 struct page *sum_page, *node_page; 256 struct dnode_of_data tdn = *dn; 257 nid_t ino, nid; 258 struct inode *inode; 259 unsigned int offset; 260 block_t bidx; 261 int i; 262 263 sentry = get_seg_entry(sbi, segno); 264 if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) 265 return 0; 266 267 /* Get the previous summary */ 268 for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { 269 struct curseg_info *curseg = CURSEG_I(sbi, i); 270 if (curseg->segno == segno) { 271 sum = curseg->sum_blk->entries[blkoff]; 272 goto got_it; 273 } 274 } 275 276 sum_page = get_sum_page(sbi, segno); 277 sum_node = (struct f2fs_summary_block *)page_address(sum_page); 278 sum = sum_node->entries[blkoff]; 279 f2fs_put_page(sum_page, 1); 280got_it: 281 /* Use the locked dnode page and inode */ 282 nid = le32_to_cpu(sum.nid); 283 if (dn->inode->i_ino == nid) { 284 tdn.nid = nid; 285 if (!dn->inode_page_locked) 286 lock_page(dn->inode_page); 287 tdn.node_page = dn->inode_page; 288 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); 289 goto truncate_out; 290 } else if (dn->nid == nid) { 291 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); 292 goto truncate_out; 293 } 294 295 /* Get the node page */ 296 node_page = get_node_page(sbi, nid); 297 if (IS_ERR(node_page)) 298 return PTR_ERR(node_page); 299 300 offset = ofs_of_node(node_page); 301 ino = ino_of_node(node_page); 302 f2fs_put_page(node_page, 1); 303 304 if (ino != dn->inode->i_ino) { 305 /* Deallocate previous index in the node page */ 306 inode = f2fs_iget(sbi->sb, ino); 307 if (IS_ERR(inode)) 308 return PTR_ERR(inode); 309 } else { 310 inode = dn->inode; 311 } 312 313 bidx = start_bidx_of_node(offset, F2FS_I(inode)) + 314 le16_to_cpu(sum.ofs_in_node); 315 316 /* 317 * if inode page is locked, unlock temporarily, but its reference 318 * count keeps alive. 319 */ 320 if (ino == dn->inode->i_ino && dn->inode_page_locked) 321 unlock_page(dn->inode_page); 322 323 set_new_dnode(&tdn, inode, NULL, NULL, 0); 324 if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) 325 goto out; 326 327 if (tdn.data_blkaddr == blkaddr) 328 truncate_data_blocks_range(&tdn, 1); 329 330 f2fs_put_dnode(&tdn); 331out: 332 if (ino != dn->inode->i_ino) 333 iput(inode); 334 else if (dn->inode_page_locked) 335 lock_page(dn->inode_page); 336 return 0; 337 338truncate_out: 339 if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr) 340 truncate_data_blocks_range(&tdn, 1); 341 if (dn->inode->i_ino == nid && !dn->inode_page_locked) 342 unlock_page(dn->inode_page); 343 return 0; 344} 345 346static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, 347 struct page *page, block_t blkaddr) 348{ 349 struct f2fs_inode_info *fi = F2FS_I(inode); 350 unsigned int start, end; 351 struct dnode_of_data dn; 352 struct f2fs_summary sum; 353 struct node_info ni; 354 int err = 0, recovered = 0; 355 356 /* step 1: recover xattr */ 357 if (IS_INODE(page)) { 358 recover_inline_xattr(inode, page); 359 } else if (f2fs_has_xattr_block(ofs_of_node(page))) { 360 /* 361 * Deprecated; xattr blocks should be found from cold log. 362 * But, we should remain this for backward compatibility. 363 */ 364 recover_xattr_data(inode, page, blkaddr); 365 goto out; 366 } 367 368 /* step 2: recover inline data */ 369 if (recover_inline_data(inode, page)) 370 goto out; 371 372 /* step 3: recover data indices */ 373 start = start_bidx_of_node(ofs_of_node(page), fi); 374 end = start + ADDRS_PER_PAGE(page, fi); 375 376 f2fs_lock_op(sbi); 377 378 set_new_dnode(&dn, inode, NULL, NULL, 0); 379 380 err = get_dnode_of_data(&dn, start, ALLOC_NODE); 381 if (err) { 382 f2fs_unlock_op(sbi); 383 goto out; 384 } 385 386 f2fs_wait_on_page_writeback(dn.node_page, NODE); 387 388 get_node_info(sbi, dn.nid, &ni); 389 f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); 390 f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); 391 392 for (; start < end; start++) { 393 block_t src, dest; 394 395 src = datablock_addr(dn.node_page, dn.ofs_in_node); 396 dest = datablock_addr(page, dn.ofs_in_node); 397 398 if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR && 399 dest >= MAIN_BLKADDR(sbi) && dest < MAX_BLKADDR(sbi)) { 400 401 if (src == NULL_ADDR) { 402 err = reserve_new_block(&dn); 403 /* We should not get -ENOSPC */ 404 f2fs_bug_on(sbi, err); 405 } 406 407 /* Check the previous node page having this index */ 408 err = check_index_in_prev_nodes(sbi, dest, &dn); 409 if (err) 410 goto err; 411 412 set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); 413 414 /* write dummy data page */ 415 recover_data_page(sbi, NULL, &sum, src, dest); 416 dn.data_blkaddr = dest; 417 set_data_blkaddr(&dn); 418 f2fs_update_extent_cache(&dn); 419 recovered++; 420 } 421 dn.ofs_in_node++; 422 } 423 424 if (IS_INODE(dn.node_page)) 425 sync_inode_page(&dn); 426 427 copy_node_footer(dn.node_page, page); 428 fill_node_footer(dn.node_page, dn.nid, ni.ino, 429 ofs_of_node(page), false); 430 set_page_dirty(dn.node_page); 431err: 432 f2fs_put_dnode(&dn); 433 f2fs_unlock_op(sbi); 434out: 435 f2fs_msg(sbi->sb, KERN_NOTICE, 436 "recover_data: ino = %lx, recovered = %d blocks, err = %d", 437 inode->i_ino, recovered, err); 438 return err; 439} 440 441static int recover_data(struct f2fs_sb_info *sbi, 442 struct list_head *head, int type) 443{ 444 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 445 struct curseg_info *curseg; 446 struct page *page = NULL; 447 int err = 0; 448 block_t blkaddr; 449 450 /* get node pages in the current segment */ 451 curseg = CURSEG_I(sbi, type); 452 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 453 454 while (1) { 455 struct fsync_inode_entry *entry; 456 457 if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) 458 break; 459 460 ra_meta_pages_cond(sbi, blkaddr); 461 462 page = get_meta_page(sbi, blkaddr); 463 464 if (cp_ver != cpver_of_node(page)) { 465 f2fs_put_page(page, 1); 466 break; 467 } 468 469 entry = get_fsync_inode(head, ino_of_node(page)); 470 if (!entry) 471 goto next; 472 /* 473 * inode(x) | CP | inode(x) | dnode(F) 474 * In this case, we can lose the latest inode(x). 475 * So, call recover_inode for the inode update. 476 */ 477 if (entry->last_inode == blkaddr) 478 recover_inode(entry->inode, page); 479 if (entry->last_dentry == blkaddr) { 480 err = recover_dentry(entry->inode, page); 481 if (err) { 482 f2fs_put_page(page, 1); 483 break; 484 } 485 } 486 err = do_recover_data(sbi, entry->inode, page, blkaddr); 487 if (err) { 488 f2fs_put_page(page, 1); 489 break; 490 } 491 492 if (entry->blkaddr == blkaddr) { 493 iput(entry->inode); 494 list_del(&entry->list); 495 kmem_cache_free(fsync_entry_slab, entry); 496 } 497next: 498 /* check next segment */ 499 blkaddr = next_blkaddr_of_node(page); 500 f2fs_put_page(page, 1); 501 } 502 if (!err) 503 allocate_new_segments(sbi); 504 return err; 505} 506 507int recover_fsync_data(struct f2fs_sb_info *sbi) 508{ 509 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 510 struct list_head inode_list; 511 block_t blkaddr; 512 int err; 513 bool need_writecp = false; 514 515 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", 516 sizeof(struct fsync_inode_entry)); 517 if (!fsync_entry_slab) 518 return -ENOMEM; 519 520 INIT_LIST_HEAD(&inode_list); 521 522 /* step #1: find fsynced inode numbers */ 523 set_sbi_flag(sbi, SBI_POR_DOING); 524 525 /* prevent checkpoint */ 526 mutex_lock(&sbi->cp_mutex); 527 528 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 529 530 err = find_fsync_dnodes(sbi, &inode_list); 531 if (err) 532 goto out; 533 534 if (list_empty(&inode_list)) 535 goto out; 536 537 need_writecp = true; 538 539 /* step #2: recover data */ 540 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); 541 if (!err) 542 f2fs_bug_on(sbi, !list_empty(&inode_list)); 543out: 544 destroy_fsync_dnodes(&inode_list); 545 kmem_cache_destroy(fsync_entry_slab); 546 547 /* truncate meta pages to be used by the recovery */ 548 truncate_inode_pages_range(META_MAPPING(sbi), 549 MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1); 550 551 if (err) { 552 truncate_inode_pages_final(NODE_MAPPING(sbi)); 553 truncate_inode_pages_final(META_MAPPING(sbi)); 554 } 555 556 clear_sbi_flag(sbi, SBI_POR_DOING); 557 if (err) { 558 discard_next_dnode(sbi, blkaddr); 559 560 /* Flush all the NAT/SIT pages */ 561 while (get_pages(sbi, F2FS_DIRTY_META)) 562 sync_meta_pages(sbi, META, LONG_MAX); 563 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 564 mutex_unlock(&sbi->cp_mutex); 565 } else if (need_writecp) { 566 struct cp_control cpc = { 567 .reason = CP_RECOVERY, 568 }; 569 mutex_unlock(&sbi->cp_mutex); 570 write_checkpoint(sbi, &cpc); 571 } else { 572 mutex_unlock(&sbi->cp_mutex); 573 } 574 return err; 575} 576