1/* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10/* 11 * Implements Extendible Hashing as described in: 12 * "Extendible Hashing" by Fagin, et al in 13 * __ACM Trans. on Database Systems__, Sept 1979. 14 * 15 * 16 * Here's the layout of dirents which is essentially the same as that of ext2 17 * within a single block. The field de_name_len is the number of bytes 18 * actually required for the name (no null terminator). The field de_rec_len 19 * is the number of bytes allocated to the dirent. The offset of the next 20 * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is 21 * deleted, the preceding dirent inherits its allocated space, ie 22 * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained 23 * by adding de_rec_len to the current dirent, this essentially causes the 24 * deleted dirent to get jumped over when iterating through all the dirents. 25 * 26 * When deleting the first dirent in a block, there is no previous dirent so 27 * the field de_ino is set to zero to designate it as deleted. When allocating 28 * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the 29 * first dirent has (de_ino == 0) and de_rec_len is large enough, this first 30 * dirent is allocated. Otherwise it must go through all the 'used' dirents 31 * searching for one in which the amount of total space minus the amount of 32 * used space will provide enough space for the new dirent. 33 * 34 * There are two types of blocks in which dirents reside. In a stuffed dinode, 35 * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of 36 * the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the 37 * beginning of the leaf block. The dirents reside in leaves when 38 * 39 * dip->i_diskflags & GFS2_DIF_EXHASH is true 40 * 41 * Otherwise, the dirents are "linear", within a single stuffed dinode block. 42 * 43 * When the dirents are in leaves, the actual contents of the directory file are 44 * used as an array of 64-bit block pointers pointing to the leaf blocks. The 45 * dirents are NOT in the directory file itself. There can be more than one 46 * block pointer in the array that points to the same leaf. In fact, when a 47 * directory is first converted from linear to exhash, all of the pointers 48 * point to the same leaf. 49 * 50 * When a leaf is completely full, the size of the hash table can be 51 * doubled unless it is already at the maximum size which is hard coded into 52 * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list, 53 * but never before the maximum hash table size has been reached. 54 */ 55 56#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 57 58#include <linux/slab.h> 59#include <linux/spinlock.h> 60#include <linux/buffer_head.h> 61#include <linux/sort.h> 62#include <linux/gfs2_ondisk.h> 63#include <linux/crc32.h> 64#include <linux/vmalloc.h> 65 66#include "gfs2.h" 67#include "incore.h" 68#include "dir.h" 69#include "glock.h" 70#include "inode.h" 71#include "meta_io.h" 72#include "quota.h" 73#include "rgrp.h" 74#include "trans.h" 75#include "bmap.h" 76#include "util.h" 77 78#define IS_LEAF 1 /* Hashed (leaf) directory */ 79#define IS_DINODE 2 /* Linear (stuffed dinode block) directory */ 80 81#define MAX_RA_BLOCKS 32 /* max read-ahead blocks */ 82 83#define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) 84#define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) 85 86struct qstr gfs2_qdot __read_mostly; 87struct qstr gfs2_qdotdot __read_mostly; 88 89typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, 90 const struct qstr *name, void *opaque); 91 92int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, 93 struct buffer_head **bhp) 94{ 95 struct buffer_head *bh; 96 97 bh = gfs2_meta_new(ip->i_gl, block); 98 gfs2_trans_add_meta(ip->i_gl, bh); 99 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); 100 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); 101 *bhp = bh; 102 return 0; 103} 104 105static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, u64 block, 106 struct buffer_head **bhp) 107{ 108 struct buffer_head *bh; 109 int error; 110 111 error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, &bh); 112 if (error) 113 return error; 114 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_JD)) { 115 brelse(bh); 116 return -EIO; 117 } 118 *bhp = bh; 119 return 0; 120} 121 122static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, 123 unsigned int offset, unsigned int size) 124{ 125 struct buffer_head *dibh; 126 int error; 127 128 error = gfs2_meta_inode_buffer(ip, &dibh); 129 if (error) 130 return error; 131 132 gfs2_trans_add_meta(ip->i_gl, dibh); 133 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); 134 if (ip->i_inode.i_size < offset + size) 135 i_size_write(&ip->i_inode, offset + size); 136 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 137 gfs2_dinode_out(ip, dibh->b_data); 138 139 brelse(dibh); 140 141 return size; 142} 143 144 145 146/** 147 * gfs2_dir_write_data - Write directory information to the inode 148 * @ip: The GFS2 inode 149 * @buf: The buffer containing information to be written 150 * @offset: The file offset to start writing at 151 * @size: The amount of data to write 152 * 153 * Returns: The number of bytes correctly written or error code 154 */ 155static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf, 156 u64 offset, unsigned int size) 157{ 158 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 159 struct buffer_head *dibh; 160 u64 lblock, dblock; 161 u32 extlen = 0; 162 unsigned int o; 163 int copied = 0; 164 int error = 0; 165 int new = 0; 166 167 if (!size) 168 return 0; 169 170 if (gfs2_is_stuffed(ip) && 171 offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) 172 return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset, 173 size); 174 175 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) 176 return -EINVAL; 177 178 if (gfs2_is_stuffed(ip)) { 179 error = gfs2_unstuff_dinode(ip, NULL); 180 if (error) 181 return error; 182 } 183 184 lblock = offset; 185 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); 186 187 while (copied < size) { 188 unsigned int amount; 189 struct buffer_head *bh; 190 191 amount = size - copied; 192 if (amount > sdp->sd_sb.sb_bsize - o) 193 amount = sdp->sd_sb.sb_bsize - o; 194 195 if (!extlen) { 196 new = 1; 197 error = gfs2_extent_map(&ip->i_inode, lblock, &new, 198 &dblock, &extlen); 199 if (error) 200 goto fail; 201 error = -EIO; 202 if (gfs2_assert_withdraw(sdp, dblock)) 203 goto fail; 204 } 205 206 if (amount == sdp->sd_jbsize || new) 207 error = gfs2_dir_get_new_buffer(ip, dblock, &bh); 208 else 209 error = gfs2_dir_get_existing_buffer(ip, dblock, &bh); 210 211 if (error) 212 goto fail; 213 214 gfs2_trans_add_meta(ip->i_gl, bh); 215 memcpy(bh->b_data + o, buf, amount); 216 brelse(bh); 217 218 buf += amount; 219 copied += amount; 220 lblock++; 221 dblock++; 222 extlen--; 223 224 o = sizeof(struct gfs2_meta_header); 225 } 226 227out: 228 error = gfs2_meta_inode_buffer(ip, &dibh); 229 if (error) 230 return error; 231 232 if (ip->i_inode.i_size < offset + copied) 233 i_size_write(&ip->i_inode, offset + copied); 234 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 235 236 gfs2_trans_add_meta(ip->i_gl, dibh); 237 gfs2_dinode_out(ip, dibh->b_data); 238 brelse(dibh); 239 240 return copied; 241fail: 242 if (copied) 243 goto out; 244 return error; 245} 246 247static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, __be64 *buf, 248 unsigned int size) 249{ 250 struct buffer_head *dibh; 251 int error; 252 253 error = gfs2_meta_inode_buffer(ip, &dibh); 254 if (!error) { 255 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size); 256 brelse(dibh); 257 } 258 259 return (error) ? error : size; 260} 261 262 263/** 264 * gfs2_dir_read_data - Read a data from a directory inode 265 * @ip: The GFS2 Inode 266 * @buf: The buffer to place result into 267 * @size: Amount of data to transfer 268 * 269 * Returns: The amount of data actually copied or the error 270 */ 271static int gfs2_dir_read_data(struct gfs2_inode *ip, __be64 *buf, 272 unsigned int size) 273{ 274 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 275 u64 lblock, dblock; 276 u32 extlen = 0; 277 unsigned int o; 278 int copied = 0; 279 int error = 0; 280 281 if (gfs2_is_stuffed(ip)) 282 return gfs2_dir_read_stuffed(ip, buf, size); 283 284 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) 285 return -EINVAL; 286 287 lblock = 0; 288 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); 289 290 while (copied < size) { 291 unsigned int amount; 292 struct buffer_head *bh; 293 int new; 294 295 amount = size - copied; 296 if (amount > sdp->sd_sb.sb_bsize - o) 297 amount = sdp->sd_sb.sb_bsize - o; 298 299 if (!extlen) { 300 new = 0; 301 error = gfs2_extent_map(&ip->i_inode, lblock, &new, 302 &dblock, &extlen); 303 if (error || !dblock) 304 goto fail; 305 BUG_ON(extlen < 1); 306 bh = gfs2_meta_ra(ip->i_gl, dblock, extlen); 307 } else { 308 error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh); 309 if (error) 310 goto fail; 311 } 312 error = gfs2_metatype_check(sdp, bh, GFS2_METATYPE_JD); 313 if (error) { 314 brelse(bh); 315 goto fail; 316 } 317 dblock++; 318 extlen--; 319 memcpy(buf, bh->b_data + o, amount); 320 brelse(bh); 321 buf += (amount/sizeof(__be64)); 322 copied += amount; 323 lblock++; 324 o = sizeof(struct gfs2_meta_header); 325 } 326 327 return copied; 328fail: 329 return (copied) ? copied : error; 330} 331 332/** 333 * gfs2_dir_get_hash_table - Get pointer to the dir hash table 334 * @ip: The inode in question 335 * 336 * Returns: The hash table or an error 337 */ 338 339static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) 340{ 341 struct inode *inode = &ip->i_inode; 342 int ret; 343 u32 hsize; 344 __be64 *hc; 345 346 BUG_ON(!(ip->i_diskflags & GFS2_DIF_EXHASH)); 347 348 hc = ip->i_hash_cache; 349 if (hc) 350 return hc; 351 352 hsize = 1 << ip->i_depth; 353 hsize *= sizeof(__be64); 354 if (hsize != i_size_read(&ip->i_inode)) { 355 gfs2_consist_inode(ip); 356 return ERR_PTR(-EIO); 357 } 358 359 hc = kmalloc(hsize, GFP_NOFS | __GFP_NOWARN); 360 if (hc == NULL) 361 hc = __vmalloc(hsize, GFP_NOFS, PAGE_KERNEL); 362 363 if (hc == NULL) 364 return ERR_PTR(-ENOMEM); 365 366 ret = gfs2_dir_read_data(ip, hc, hsize); 367 if (ret < 0) { 368 kvfree(hc); 369 return ERR_PTR(ret); 370 } 371 372 spin_lock(&inode->i_lock); 373 if (likely(!ip->i_hash_cache)) { 374 ip->i_hash_cache = hc; 375 hc = NULL; 376 } 377 spin_unlock(&inode->i_lock); 378 kvfree(hc); 379 380 return ip->i_hash_cache; 381} 382 383/** 384 * gfs2_dir_hash_inval - Invalidate dir hash 385 * @ip: The directory inode 386 * 387 * Must be called with an exclusive glock, or during glock invalidation. 388 */ 389void gfs2_dir_hash_inval(struct gfs2_inode *ip) 390{ 391 __be64 *hc; 392 393 spin_lock(&ip->i_inode.i_lock); 394 hc = ip->i_hash_cache; 395 ip->i_hash_cache = NULL; 396 spin_unlock(&ip->i_inode.i_lock); 397 398 kvfree(hc); 399} 400 401static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) 402{ 403 return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0; 404} 405 406static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent, 407 const struct qstr *name, int ret) 408{ 409 if (!gfs2_dirent_sentinel(dent) && 410 be32_to_cpu(dent->de_hash) == name->hash && 411 be16_to_cpu(dent->de_name_len) == name->len && 412 memcmp(dent+1, name->name, name->len) == 0) 413 return ret; 414 return 0; 415} 416 417static int gfs2_dirent_find(const struct gfs2_dirent *dent, 418 const struct qstr *name, 419 void *opaque) 420{ 421 return __gfs2_dirent_find(dent, name, 1); 422} 423 424static int gfs2_dirent_prev(const struct gfs2_dirent *dent, 425 const struct qstr *name, 426 void *opaque) 427{ 428 return __gfs2_dirent_find(dent, name, 2); 429} 430 431/* 432 * name->name holds ptr to start of block. 433 * name->len holds size of block. 434 */ 435static int gfs2_dirent_last(const struct gfs2_dirent *dent, 436 const struct qstr *name, 437 void *opaque) 438{ 439 const char *start = name->name; 440 const char *end = (const char *)dent + be16_to_cpu(dent->de_rec_len); 441 if (name->len == (end - start)) 442 return 1; 443 return 0; 444} 445 446static int gfs2_dirent_find_space(const struct gfs2_dirent *dent, 447 const struct qstr *name, 448 void *opaque) 449{ 450 unsigned required = GFS2_DIRENT_SIZE(name->len); 451 unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); 452 unsigned totlen = be16_to_cpu(dent->de_rec_len); 453 454 if (gfs2_dirent_sentinel(dent)) 455 actual = 0; 456 if (totlen - actual >= required) 457 return 1; 458 return 0; 459} 460 461struct dirent_gather { 462 const struct gfs2_dirent **pdent; 463 unsigned offset; 464}; 465 466static int gfs2_dirent_gather(const struct gfs2_dirent *dent, 467 const struct qstr *name, 468 void *opaque) 469{ 470 struct dirent_gather *g = opaque; 471 if (!gfs2_dirent_sentinel(dent)) { 472 g->pdent[g->offset++] = dent; 473 } 474 return 0; 475} 476 477/* 478 * Other possible things to check: 479 * - Inode located within filesystem size (and on valid block) 480 * - Valid directory entry type 481 * Not sure how heavy-weight we want to make this... could also check 482 * hash is correct for example, but that would take a lot of extra time. 483 * For now the most important thing is to check that the various sizes 484 * are correct. 485 */ 486static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset, 487 unsigned int size, unsigned int len, int first) 488{ 489 const char *msg = "gfs2_dirent too small"; 490 if (unlikely(size < sizeof(struct gfs2_dirent))) 491 goto error; 492 msg = "gfs2_dirent misaligned"; 493 if (unlikely(offset & 0x7)) 494 goto error; 495 msg = "gfs2_dirent points beyond end of block"; 496 if (unlikely(offset + size > len)) 497 goto error; 498 msg = "zero inode number"; 499 if (unlikely(!first && gfs2_dirent_sentinel(dent))) 500 goto error; 501 msg = "name length is greater than space in dirent"; 502 if (!gfs2_dirent_sentinel(dent) && 503 unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) > 504 size)) 505 goto error; 506 return 0; 507error: 508 pr_warn("%s: %s (%s)\n", 509 __func__, msg, first ? "first in block" : "not first in block"); 510 return -EIO; 511} 512 513static int gfs2_dirent_offset(const void *buf) 514{ 515 const struct gfs2_meta_header *h = buf; 516 int offset; 517 518 BUG_ON(buf == NULL); 519 520 switch(be32_to_cpu(h->mh_type)) { 521 case GFS2_METATYPE_LF: 522 offset = sizeof(struct gfs2_leaf); 523 break; 524 case GFS2_METATYPE_DI: 525 offset = sizeof(struct gfs2_dinode); 526 break; 527 default: 528 goto wrong_type; 529 } 530 return offset; 531wrong_type: 532 pr_warn("%s: wrong block type %u\n", __func__, be32_to_cpu(h->mh_type)); 533 return -1; 534} 535 536static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf, 537 unsigned int len, gfs2_dscan_t scan, 538 const struct qstr *name, 539 void *opaque) 540{ 541 struct gfs2_dirent *dent, *prev; 542 unsigned offset; 543 unsigned size; 544 int ret = 0; 545 546 ret = gfs2_dirent_offset(buf); 547 if (ret < 0) 548 goto consist_inode; 549 550 offset = ret; 551 prev = NULL; 552 dent = buf + offset; 553 size = be16_to_cpu(dent->de_rec_len); 554 if (gfs2_check_dirent(dent, offset, size, len, 1)) 555 goto consist_inode; 556 do { 557 ret = scan(dent, name, opaque); 558 if (ret) 559 break; 560 offset += size; 561 if (offset == len) 562 break; 563 prev = dent; 564 dent = buf + offset; 565 size = be16_to_cpu(dent->de_rec_len); 566 if (gfs2_check_dirent(dent, offset, size, len, 0)) 567 goto consist_inode; 568 } while(1); 569 570 switch(ret) { 571 case 0: 572 return NULL; 573 case 1: 574 return dent; 575 case 2: 576 return prev ? prev : dent; 577 default: 578 BUG_ON(ret > 0); 579 return ERR_PTR(ret); 580 } 581 582consist_inode: 583 gfs2_consist_inode(GFS2_I(inode)); 584 return ERR_PTR(-EIO); 585} 586 587static int dirent_check_reclen(struct gfs2_inode *dip, 588 const struct gfs2_dirent *d, const void *end_p) 589{ 590 const void *ptr = d; 591 u16 rec_len = be16_to_cpu(d->de_rec_len); 592 593 if (unlikely(rec_len < sizeof(struct gfs2_dirent))) 594 goto broken; 595 ptr += rec_len; 596 if (ptr < end_p) 597 return rec_len; 598 if (ptr == end_p) 599 return -ENOENT; 600broken: 601 gfs2_consist_inode(dip); 602 return -EIO; 603} 604 605/** 606 * dirent_next - Next dirent 607 * @dip: the directory 608 * @bh: The buffer 609 * @dent: Pointer to list of dirents 610 * 611 * Returns: 0 on success, error code otherwise 612 */ 613 614static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh, 615 struct gfs2_dirent **dent) 616{ 617 struct gfs2_dirent *cur = *dent, *tmp; 618 char *bh_end = bh->b_data + bh->b_size; 619 int ret; 620 621 ret = dirent_check_reclen(dip, cur, bh_end); 622 if (ret < 0) 623 return ret; 624 625 tmp = (void *)cur + ret; 626 ret = dirent_check_reclen(dip, tmp, bh_end); 627 if (ret == -EIO) 628 return ret; 629 630 /* Only the first dent could ever have de_inum.no_addr == 0 */ 631 if (gfs2_dirent_sentinel(tmp)) { 632 gfs2_consist_inode(dip); 633 return -EIO; 634 } 635 636 *dent = tmp; 637 return 0; 638} 639 640/** 641 * dirent_del - Delete a dirent 642 * @dip: The GFS2 inode 643 * @bh: The buffer 644 * @prev: The previous dirent 645 * @cur: The current dirent 646 * 647 */ 648 649static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh, 650 struct gfs2_dirent *prev, struct gfs2_dirent *cur) 651{ 652 u16 cur_rec_len, prev_rec_len; 653 654 if (gfs2_dirent_sentinel(cur)) { 655 gfs2_consist_inode(dip); 656 return; 657 } 658 659 gfs2_trans_add_meta(dip->i_gl, bh); 660 661 /* If there is no prev entry, this is the first entry in the block. 662 The de_rec_len is already as big as it needs to be. Just zero 663 out the inode number and return. */ 664 665 if (!prev) { 666 cur->de_inum.no_addr = 0; 667 cur->de_inum.no_formal_ino = 0; 668 return; 669 } 670 671 /* Combine this dentry with the previous one. */ 672 673 prev_rec_len = be16_to_cpu(prev->de_rec_len); 674 cur_rec_len = be16_to_cpu(cur->de_rec_len); 675 676 if ((char *)prev + prev_rec_len != (char *)cur) 677 gfs2_consist_inode(dip); 678 if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size) 679 gfs2_consist_inode(dip); 680 681 prev_rec_len += cur_rec_len; 682 prev->de_rec_len = cpu_to_be16(prev_rec_len); 683} 684 685/* 686 * Takes a dent from which to grab space as an argument. Returns the 687 * newly created dent. 688 */ 689static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode, 690 struct gfs2_dirent *dent, 691 const struct qstr *name, 692 struct buffer_head *bh) 693{ 694 struct gfs2_inode *ip = GFS2_I(inode); 695 struct gfs2_dirent *ndent; 696 unsigned offset = 0, totlen; 697 698 if (!gfs2_dirent_sentinel(dent)) 699 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); 700 totlen = be16_to_cpu(dent->de_rec_len); 701 BUG_ON(offset + name->len > totlen); 702 gfs2_trans_add_meta(ip->i_gl, bh); 703 ndent = (struct gfs2_dirent *)((char *)dent + offset); 704 dent->de_rec_len = cpu_to_be16(offset); 705 gfs2_qstr2dirent(name, totlen - offset, ndent); 706 return ndent; 707} 708 709static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode, 710 struct buffer_head *bh, 711 const struct qstr *name) 712{ 713 struct gfs2_dirent *dent; 714 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, 715 gfs2_dirent_find_space, name, NULL); 716 if (!dent || IS_ERR(dent)) 717 return dent; 718 return gfs2_init_dirent(inode, dent, name, bh); 719} 720 721static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, 722 struct buffer_head **bhp) 723{ 724 int error; 725 726 error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp); 727 if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) { 728 /* pr_info("block num=%llu\n", leaf_no); */ 729 error = -EIO; 730 } 731 732 return error; 733} 734 735/** 736 * get_leaf_nr - Get a leaf number associated with the index 737 * @dip: The GFS2 inode 738 * @index: 739 * @leaf_out: 740 * 741 * Returns: 0 on success, error code otherwise 742 */ 743 744static int get_leaf_nr(struct gfs2_inode *dip, u32 index, 745 u64 *leaf_out) 746{ 747 __be64 *hash; 748 749 hash = gfs2_dir_get_hash_table(dip); 750 if (IS_ERR(hash)) 751 return PTR_ERR(hash); 752 *leaf_out = be64_to_cpu(*(hash + index)); 753 return 0; 754} 755 756static int get_first_leaf(struct gfs2_inode *dip, u32 index, 757 struct buffer_head **bh_out) 758{ 759 u64 leaf_no; 760 int error; 761 762 error = get_leaf_nr(dip, index, &leaf_no); 763 if (!error) 764 error = get_leaf(dip, leaf_no, bh_out); 765 766 return error; 767} 768 769static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, 770 const struct qstr *name, 771 gfs2_dscan_t scan, 772 struct buffer_head **pbh) 773{ 774 struct buffer_head *bh; 775 struct gfs2_dirent *dent; 776 struct gfs2_inode *ip = GFS2_I(inode); 777 int error; 778 779 if (ip->i_diskflags & GFS2_DIF_EXHASH) { 780 struct gfs2_leaf *leaf; 781 unsigned hsize = 1 << ip->i_depth; 782 unsigned index; 783 u64 ln; 784 if (hsize * sizeof(u64) != i_size_read(inode)) { 785 gfs2_consist_inode(ip); 786 return ERR_PTR(-EIO); 787 } 788 789 index = name->hash >> (32 - ip->i_depth); 790 error = get_first_leaf(ip, index, &bh); 791 if (error) 792 return ERR_PTR(error); 793 do { 794 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, 795 scan, name, NULL); 796 if (dent) 797 goto got_dent; 798 leaf = (struct gfs2_leaf *)bh->b_data; 799 ln = be64_to_cpu(leaf->lf_next); 800 brelse(bh); 801 if (!ln) 802 break; 803 804 error = get_leaf(ip, ln, &bh); 805 } while(!error); 806 807 return error ? ERR_PTR(error) : NULL; 808 } 809 810 811 error = gfs2_meta_inode_buffer(ip, &bh); 812 if (error) 813 return ERR_PTR(error); 814 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL); 815got_dent: 816 if (unlikely(dent == NULL || IS_ERR(dent))) { 817 brelse(bh); 818 bh = NULL; 819 } 820 *pbh = bh; 821 return dent; 822} 823 824static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth) 825{ 826 struct gfs2_inode *ip = GFS2_I(inode); 827 unsigned int n = 1; 828 u64 bn; 829 int error; 830 struct buffer_head *bh; 831 struct gfs2_leaf *leaf; 832 struct gfs2_dirent *dent; 833 struct qstr name = { .name = "" }; 834 struct timespec tv = CURRENT_TIME; 835 836 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); 837 if (error) 838 return NULL; 839 bh = gfs2_meta_new(ip->i_gl, bn); 840 if (!bh) 841 return NULL; 842 843 gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); 844 gfs2_trans_add_meta(ip->i_gl, bh); 845 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); 846 leaf = (struct gfs2_leaf *)bh->b_data; 847 leaf->lf_depth = cpu_to_be16(depth); 848 leaf->lf_entries = 0; 849 leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); 850 leaf->lf_next = 0; 851 leaf->lf_inode = cpu_to_be64(ip->i_no_addr); 852 leaf->lf_dist = cpu_to_be32(1); 853 leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); 854 leaf->lf_sec = cpu_to_be64(tv.tv_sec); 855 memset(leaf->lf_reserved2, 0, sizeof(leaf->lf_reserved2)); 856 dent = (struct gfs2_dirent *)(leaf+1); 857 gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent); 858 *pbh = bh; 859 return leaf; 860} 861 862/** 863 * dir_make_exhash - Convert a stuffed directory into an ExHash directory 864 * @dip: The GFS2 inode 865 * 866 * Returns: 0 on success, error code otherwise 867 */ 868 869static int dir_make_exhash(struct inode *inode) 870{ 871 struct gfs2_inode *dip = GFS2_I(inode); 872 struct gfs2_sbd *sdp = GFS2_SB(inode); 873 struct gfs2_dirent *dent; 874 struct qstr args; 875 struct buffer_head *bh, *dibh; 876 struct gfs2_leaf *leaf; 877 int y; 878 u32 x; 879 __be64 *lp; 880 u64 bn; 881 int error; 882 883 error = gfs2_meta_inode_buffer(dip, &dibh); 884 if (error) 885 return error; 886 887 /* Turn over a new leaf */ 888 889 leaf = new_leaf(inode, &bh, 0); 890 if (!leaf) 891 return -ENOSPC; 892 bn = bh->b_blocknr; 893 894 gfs2_assert(sdp, dip->i_entries < (1 << 16)); 895 leaf->lf_entries = cpu_to_be16(dip->i_entries); 896 897 /* Copy dirents */ 898 899 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh, 900 sizeof(struct gfs2_dinode)); 901 902 /* Find last entry */ 903 904 x = 0; 905 args.len = bh->b_size - sizeof(struct gfs2_dinode) + 906 sizeof(struct gfs2_leaf); 907 args.name = bh->b_data; 908 dent = gfs2_dirent_scan(&dip->i_inode, bh->b_data, bh->b_size, 909 gfs2_dirent_last, &args, NULL); 910 if (!dent) { 911 brelse(bh); 912 brelse(dibh); 913 return -EIO; 914 } 915 if (IS_ERR(dent)) { 916 brelse(bh); 917 brelse(dibh); 918 return PTR_ERR(dent); 919 } 920 921 /* Adjust the last dirent's record length 922 (Remember that dent still points to the last entry.) */ 923 924 dent->de_rec_len = cpu_to_be16(be16_to_cpu(dent->de_rec_len) + 925 sizeof(struct gfs2_dinode) - 926 sizeof(struct gfs2_leaf)); 927 928 brelse(bh); 929 930 /* We're done with the new leaf block, now setup the new 931 hash table. */ 932 933 gfs2_trans_add_meta(dip->i_gl, dibh); 934 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 935 936 lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); 937 938 for (x = sdp->sd_hash_ptrs; x--; lp++) 939 *lp = cpu_to_be64(bn); 940 941 i_size_write(inode, sdp->sd_sb.sb_bsize / 2); 942 gfs2_add_inode_blocks(&dip->i_inode, 1); 943 dip->i_diskflags |= GFS2_DIF_EXHASH; 944 945 for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; 946 dip->i_depth = y; 947 948 gfs2_dinode_out(dip, dibh->b_data); 949 950 brelse(dibh); 951 952 return 0; 953} 954 955/** 956 * dir_split_leaf - Split a leaf block into two 957 * @dip: The GFS2 inode 958 * @index: 959 * @leaf_no: 960 * 961 * Returns: 0 on success, error code on failure 962 */ 963 964static int dir_split_leaf(struct inode *inode, const struct qstr *name) 965{ 966 struct gfs2_inode *dip = GFS2_I(inode); 967 struct buffer_head *nbh, *obh, *dibh; 968 struct gfs2_leaf *nleaf, *oleaf; 969 struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new; 970 u32 start, len, half_len, divider; 971 u64 bn, leaf_no; 972 __be64 *lp; 973 u32 index; 974 int x, moved = 0; 975 int error; 976 977 index = name->hash >> (32 - dip->i_depth); 978 error = get_leaf_nr(dip, index, &leaf_no); 979 if (error) 980 return error; 981 982 /* Get the old leaf block */ 983 error = get_leaf(dip, leaf_no, &obh); 984 if (error) 985 return error; 986 987 oleaf = (struct gfs2_leaf *)obh->b_data; 988 if (dip->i_depth == be16_to_cpu(oleaf->lf_depth)) { 989 brelse(obh); 990 return 1; /* can't split */ 991 } 992 993 gfs2_trans_add_meta(dip->i_gl, obh); 994 995 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1); 996 if (!nleaf) { 997 brelse(obh); 998 return -ENOSPC; 999 } 1000 bn = nbh->b_blocknr; 1001 1002 /* Compute the start and len of leaf pointers in the hash table. */ 1003 len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth)); 1004 half_len = len >> 1; 1005 if (!half_len) { 1006 pr_warn("i_depth %u lf_depth %u index %u\n", 1007 dip->i_depth, be16_to_cpu(oleaf->lf_depth), index); 1008 gfs2_consist_inode(dip); 1009 error = -EIO; 1010 goto fail_brelse; 1011 } 1012 1013 start = (index & ~(len - 1)); 1014 1015 /* Change the pointers. 1016 Don't bother distinguishing stuffed from non-stuffed. 1017 This code is complicated enough already. */ 1018 lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS); 1019 if (!lp) { 1020 error = -ENOMEM; 1021 goto fail_brelse; 1022 } 1023 1024 /* Change the pointers */ 1025 for (x = 0; x < half_len; x++) 1026 lp[x] = cpu_to_be64(bn); 1027 1028 gfs2_dir_hash_inval(dip); 1029 1030 error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64), 1031 half_len * sizeof(u64)); 1032 if (error != half_len * sizeof(u64)) { 1033 if (error >= 0) 1034 error = -EIO; 1035 goto fail_lpfree; 1036 } 1037 1038 kfree(lp); 1039 1040 /* Compute the divider */ 1041 divider = (start + half_len) << (32 - dip->i_depth); 1042 1043 /* Copy the entries */ 1044 dent = (struct gfs2_dirent *)(obh->b_data + sizeof(struct gfs2_leaf)); 1045 1046 do { 1047 next = dent; 1048 if (dirent_next(dip, obh, &next)) 1049 next = NULL; 1050 1051 if (!gfs2_dirent_sentinel(dent) && 1052 be32_to_cpu(dent->de_hash) < divider) { 1053 struct qstr str; 1054 str.name = (char*)(dent+1); 1055 str.len = be16_to_cpu(dent->de_name_len); 1056 str.hash = be32_to_cpu(dent->de_hash); 1057 new = gfs2_dirent_alloc(inode, nbh, &str); 1058 if (IS_ERR(new)) { 1059 error = PTR_ERR(new); 1060 break; 1061 } 1062 1063 new->de_inum = dent->de_inum; /* No endian worries */ 1064 new->de_type = dent->de_type; /* No endian worries */ 1065 be16_add_cpu(&nleaf->lf_entries, 1); 1066 1067 dirent_del(dip, obh, prev, dent); 1068 1069 if (!oleaf->lf_entries) 1070 gfs2_consist_inode(dip); 1071 be16_add_cpu(&oleaf->lf_entries, -1); 1072 1073 if (!prev) 1074 prev = dent; 1075 1076 moved = 1; 1077 } else { 1078 prev = dent; 1079 } 1080 dent = next; 1081 } while (dent); 1082 1083 oleaf->lf_depth = nleaf->lf_depth; 1084 1085 error = gfs2_meta_inode_buffer(dip, &dibh); 1086 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { 1087 gfs2_trans_add_meta(dip->i_gl, dibh); 1088 gfs2_add_inode_blocks(&dip->i_inode, 1); 1089 gfs2_dinode_out(dip, dibh->b_data); 1090 brelse(dibh); 1091 } 1092 1093 brelse(obh); 1094 brelse(nbh); 1095 1096 return error; 1097 1098fail_lpfree: 1099 kfree(lp); 1100 1101fail_brelse: 1102 brelse(obh); 1103 brelse(nbh); 1104 return error; 1105} 1106 1107/** 1108 * dir_double_exhash - Double size of ExHash table 1109 * @dip: The GFS2 dinode 1110 * 1111 * Returns: 0 on success, error code on failure 1112 */ 1113 1114static int dir_double_exhash(struct gfs2_inode *dip) 1115{ 1116 struct buffer_head *dibh; 1117 u32 hsize; 1118 u32 hsize_bytes; 1119 __be64 *hc; 1120 __be64 *hc2, *h; 1121 int x; 1122 int error = 0; 1123 1124 hsize = 1 << dip->i_depth; 1125 hsize_bytes = hsize * sizeof(__be64); 1126 1127 hc = gfs2_dir_get_hash_table(dip); 1128 if (IS_ERR(hc)) 1129 return PTR_ERR(hc); 1130 1131 hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS | __GFP_NOWARN); 1132 if (hc2 == NULL) 1133 hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS, PAGE_KERNEL); 1134 1135 if (!hc2) 1136 return -ENOMEM; 1137 1138 h = hc2; 1139 error = gfs2_meta_inode_buffer(dip, &dibh); 1140 if (error) 1141 goto out_kfree; 1142 1143 for (x = 0; x < hsize; x++) { 1144 *h++ = *hc; 1145 *h++ = *hc; 1146 hc++; 1147 } 1148 1149 error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2); 1150 if (error != (hsize_bytes * 2)) 1151 goto fail; 1152 1153 gfs2_dir_hash_inval(dip); 1154 dip->i_hash_cache = hc2; 1155 dip->i_depth++; 1156 gfs2_dinode_out(dip, dibh->b_data); 1157 brelse(dibh); 1158 return 0; 1159 1160fail: 1161 /* Replace original hash table & size */ 1162 gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes); 1163 i_size_write(&dip->i_inode, hsize_bytes); 1164 gfs2_dinode_out(dip, dibh->b_data); 1165 brelse(dibh); 1166out_kfree: 1167 kvfree(hc2); 1168 return error; 1169} 1170 1171/** 1172 * compare_dents - compare directory entries by hash value 1173 * @a: first dent 1174 * @b: second dent 1175 * 1176 * When comparing the hash entries of @a to @b: 1177 * gt: returns 1 1178 * lt: returns -1 1179 * eq: returns 0 1180 */ 1181 1182static int compare_dents(const void *a, const void *b) 1183{ 1184 const struct gfs2_dirent *dent_a, *dent_b; 1185 u32 hash_a, hash_b; 1186 int ret = 0; 1187 1188 dent_a = *(const struct gfs2_dirent **)a; 1189 hash_a = be32_to_cpu(dent_a->de_hash); 1190 1191 dent_b = *(const struct gfs2_dirent **)b; 1192 hash_b = be32_to_cpu(dent_b->de_hash); 1193 1194 if (hash_a > hash_b) 1195 ret = 1; 1196 else if (hash_a < hash_b) 1197 ret = -1; 1198 else { 1199 unsigned int len_a = be16_to_cpu(dent_a->de_name_len); 1200 unsigned int len_b = be16_to_cpu(dent_b->de_name_len); 1201 1202 if (len_a > len_b) 1203 ret = 1; 1204 else if (len_a < len_b) 1205 ret = -1; 1206 else 1207 ret = memcmp(dent_a + 1, dent_b + 1, len_a); 1208 } 1209 1210 return ret; 1211} 1212 1213/** 1214 * do_filldir_main - read out directory entries 1215 * @dip: The GFS2 inode 1216 * @ctx: what to feed the entries to 1217 * @darr: an array of struct gfs2_dirent pointers to read 1218 * @entries: the number of entries in darr 1219 * @copied: pointer to int that's non-zero if a entry has been copied out 1220 * 1221 * Jump through some hoops to make sure that if there are hash collsions, 1222 * they are read out at the beginning of a buffer. We want to minimize 1223 * the possibility that they will fall into different readdir buffers or 1224 * that someone will want to seek to that location. 1225 * 1226 * Returns: errno, >0 if the actor tells you to stop 1227 */ 1228 1229static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx, 1230 const struct gfs2_dirent **darr, u32 entries, 1231 int *copied) 1232{ 1233 const struct gfs2_dirent *dent, *dent_next; 1234 u64 off, off_next; 1235 unsigned int x, y; 1236 int run = 0; 1237 1238 sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL); 1239 1240 dent_next = darr[0]; 1241 off_next = be32_to_cpu(dent_next->de_hash); 1242 off_next = gfs2_disk_hash2offset(off_next); 1243 1244 for (x = 0, y = 1; x < entries; x++, y++) { 1245 dent = dent_next; 1246 off = off_next; 1247 1248 if (y < entries) { 1249 dent_next = darr[y]; 1250 off_next = be32_to_cpu(dent_next->de_hash); 1251 off_next = gfs2_disk_hash2offset(off_next); 1252 1253 if (off < ctx->pos) 1254 continue; 1255 ctx->pos = off; 1256 1257 if (off_next == off) { 1258 if (*copied && !run) 1259 return 1; 1260 run = 1; 1261 } else 1262 run = 0; 1263 } else { 1264 if (off < ctx->pos) 1265 continue; 1266 ctx->pos = off; 1267 } 1268 1269 if (!dir_emit(ctx, (const char *)(dent + 1), 1270 be16_to_cpu(dent->de_name_len), 1271 be64_to_cpu(dent->de_inum.no_addr), 1272 be16_to_cpu(dent->de_type))) 1273 return 1; 1274 1275 *copied = 1; 1276 } 1277 1278 /* Increment the ctx->pos by one, so the next time we come into the 1279 do_filldir fxn, we get the next entry instead of the last one in the 1280 current leaf */ 1281 1282 ctx->pos++; 1283 1284 return 0; 1285} 1286 1287static void *gfs2_alloc_sort_buffer(unsigned size) 1288{ 1289 void *ptr = NULL; 1290 1291 if (size < KMALLOC_MAX_SIZE) 1292 ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN); 1293 if (!ptr) 1294 ptr = __vmalloc(size, GFP_NOFS, PAGE_KERNEL); 1295 return ptr; 1296} 1297 1298static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, 1299 int *copied, unsigned *depth, 1300 u64 leaf_no) 1301{ 1302 struct gfs2_inode *ip = GFS2_I(inode); 1303 struct gfs2_sbd *sdp = GFS2_SB(inode); 1304 struct buffer_head *bh; 1305 struct gfs2_leaf *lf; 1306 unsigned entries = 0, entries2 = 0; 1307 unsigned leaves = 0; 1308 const struct gfs2_dirent **darr, *dent; 1309 struct dirent_gather g; 1310 struct buffer_head **larr; 1311 int leaf = 0; 1312 int error, i; 1313 u64 lfn = leaf_no; 1314 1315 do { 1316 error = get_leaf(ip, lfn, &bh); 1317 if (error) 1318 goto out; 1319 lf = (struct gfs2_leaf *)bh->b_data; 1320 if (leaves == 0) 1321 *depth = be16_to_cpu(lf->lf_depth); 1322 entries += be16_to_cpu(lf->lf_entries); 1323 leaves++; 1324 lfn = be64_to_cpu(lf->lf_next); 1325 brelse(bh); 1326 } while(lfn); 1327 1328 if (!entries) 1329 return 0; 1330 1331 error = -ENOMEM; 1332 /* 1333 * The extra 99 entries are not normally used, but are a buffer 1334 * zone in case the number of entries in the leaf is corrupt. 1335 * 99 is the maximum number of entries that can fit in a single 1336 * leaf block. 1337 */ 1338 larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *)); 1339 if (!larr) 1340 goto out; 1341 darr = (const struct gfs2_dirent **)(larr + leaves); 1342 g.pdent = darr; 1343 g.offset = 0; 1344 lfn = leaf_no; 1345 1346 do { 1347 error = get_leaf(ip, lfn, &bh); 1348 if (error) 1349 goto out_free; 1350 lf = (struct gfs2_leaf *)bh->b_data; 1351 lfn = be64_to_cpu(lf->lf_next); 1352 if (lf->lf_entries) { 1353 entries2 += be16_to_cpu(lf->lf_entries); 1354 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, 1355 gfs2_dirent_gather, NULL, &g); 1356 error = PTR_ERR(dent); 1357 if (IS_ERR(dent)) 1358 goto out_free; 1359 if (entries2 != g.offset) { 1360 fs_warn(sdp, "Number of entries corrupt in dir " 1361 "leaf %llu, entries2 (%u) != " 1362 "g.offset (%u)\n", 1363 (unsigned long long)bh->b_blocknr, 1364 entries2, g.offset); 1365 1366 error = -EIO; 1367 goto out_free; 1368 } 1369 error = 0; 1370 larr[leaf++] = bh; 1371 } else { 1372 brelse(bh); 1373 } 1374 } while(lfn); 1375 1376 BUG_ON(entries2 != entries); 1377 error = do_filldir_main(ip, ctx, darr, entries, copied); 1378out_free: 1379 for(i = 0; i < leaf; i++) 1380 brelse(larr[i]); 1381 kvfree(larr); 1382out: 1383 return error; 1384} 1385 1386/** 1387 * gfs2_dir_readahead - Issue read-ahead requests for leaf blocks. 1388 * 1389 * Note: we can't calculate each index like dir_e_read can because we don't 1390 * have the leaf, and therefore we don't have the depth, and therefore we 1391 * don't have the length. So we have to just read enough ahead to make up 1392 * for the loss of information. 1393 */ 1394static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, 1395 struct file_ra_state *f_ra) 1396{ 1397 struct gfs2_inode *ip = GFS2_I(inode); 1398 struct gfs2_glock *gl = ip->i_gl; 1399 struct buffer_head *bh; 1400 u64 blocknr = 0, last; 1401 unsigned count; 1402 1403 /* First check if we've already read-ahead for the whole range. */ 1404 if (index + MAX_RA_BLOCKS < f_ra->start) 1405 return; 1406 1407 f_ra->start = max((pgoff_t)index, f_ra->start); 1408 for (count = 0; count < MAX_RA_BLOCKS; count++) { 1409 if (f_ra->start >= hsize) /* if exceeded the hash table */ 1410 break; 1411 1412 last = blocknr; 1413 blocknr = be64_to_cpu(ip->i_hash_cache[f_ra->start]); 1414 f_ra->start++; 1415 if (blocknr == last) 1416 continue; 1417 1418 bh = gfs2_getbuf(gl, blocknr, 1); 1419 if (trylock_buffer(bh)) { 1420 if (buffer_uptodate(bh)) { 1421 unlock_buffer(bh); 1422 brelse(bh); 1423 continue; 1424 } 1425 bh->b_end_io = end_buffer_read_sync; 1426 submit_bh(READA | REQ_META, bh); 1427 continue; 1428 } 1429 brelse(bh); 1430 } 1431} 1432 1433/** 1434 * dir_e_read - Reads the entries from a directory into a filldir buffer 1435 * @dip: dinode pointer 1436 * @ctx: actor to feed the entries to 1437 * 1438 * Returns: errno 1439 */ 1440 1441static int dir_e_read(struct inode *inode, struct dir_context *ctx, 1442 struct file_ra_state *f_ra) 1443{ 1444 struct gfs2_inode *dip = GFS2_I(inode); 1445 u32 hsize, len = 0; 1446 u32 hash, index; 1447 __be64 *lp; 1448 int copied = 0; 1449 int error = 0; 1450 unsigned depth = 0; 1451 1452 hsize = 1 << dip->i_depth; 1453 hash = gfs2_dir_offset2hash(ctx->pos); 1454 index = hash >> (32 - dip->i_depth); 1455 1456 if (dip->i_hash_cache == NULL) 1457 f_ra->start = 0; 1458 lp = gfs2_dir_get_hash_table(dip); 1459 if (IS_ERR(lp)) 1460 return PTR_ERR(lp); 1461 1462 gfs2_dir_readahead(inode, hsize, index, f_ra); 1463 1464 while (index < hsize) { 1465 error = gfs2_dir_read_leaf(inode, ctx, 1466 &copied, &depth, 1467 be64_to_cpu(lp[index])); 1468 if (error) 1469 break; 1470 1471 len = 1 << (dip->i_depth - depth); 1472 index = (index & ~(len - 1)) + len; 1473 } 1474 1475 if (error > 0) 1476 error = 0; 1477 return error; 1478} 1479 1480int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, 1481 struct file_ra_state *f_ra) 1482{ 1483 struct gfs2_inode *dip = GFS2_I(inode); 1484 struct gfs2_sbd *sdp = GFS2_SB(inode); 1485 struct dirent_gather g; 1486 const struct gfs2_dirent **darr, *dent; 1487 struct buffer_head *dibh; 1488 int copied = 0; 1489 int error; 1490 1491 if (!dip->i_entries) 1492 return 0; 1493 1494 if (dip->i_diskflags & GFS2_DIF_EXHASH) 1495 return dir_e_read(inode, ctx, f_ra); 1496 1497 if (!gfs2_is_stuffed(dip)) { 1498 gfs2_consist_inode(dip); 1499 return -EIO; 1500 } 1501 1502 error = gfs2_meta_inode_buffer(dip, &dibh); 1503 if (error) 1504 return error; 1505 1506 error = -ENOMEM; 1507 /* 96 is max number of dirents which can be stuffed into an inode */ 1508 darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_NOFS); 1509 if (darr) { 1510 g.pdent = darr; 1511 g.offset = 0; 1512 dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size, 1513 gfs2_dirent_gather, NULL, &g); 1514 if (IS_ERR(dent)) { 1515 error = PTR_ERR(dent); 1516 goto out; 1517 } 1518 if (dip->i_entries != g.offset) { 1519 fs_warn(sdp, "Number of entries corrupt in dir %llu, " 1520 "ip->i_entries (%u) != g.offset (%u)\n", 1521 (unsigned long long)dip->i_no_addr, 1522 dip->i_entries, 1523 g.offset); 1524 error = -EIO; 1525 goto out; 1526 } 1527 error = do_filldir_main(dip, ctx, darr, 1528 dip->i_entries, &copied); 1529out: 1530 kfree(darr); 1531 } 1532 1533 if (error > 0) 1534 error = 0; 1535 1536 brelse(dibh); 1537 1538 return error; 1539} 1540 1541/** 1542 * gfs2_dir_search - Search a directory 1543 * @dip: The GFS2 dir inode 1544 * @name: The name we are looking up 1545 * @fail_on_exist: Fail if the name exists rather than looking it up 1546 * 1547 * This routine searches a directory for a file or another directory. 1548 * Assumes a glock is held on dip. 1549 * 1550 * Returns: errno 1551 */ 1552 1553struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name, 1554 bool fail_on_exist) 1555{ 1556 struct buffer_head *bh; 1557 struct gfs2_dirent *dent; 1558 u64 addr, formal_ino; 1559 u16 dtype; 1560 1561 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); 1562 if (dent) { 1563 if (IS_ERR(dent)) 1564 return ERR_CAST(dent); 1565 dtype = be16_to_cpu(dent->de_type); 1566 addr = be64_to_cpu(dent->de_inum.no_addr); 1567 formal_ino = be64_to_cpu(dent->de_inum.no_formal_ino); 1568 brelse(bh); 1569 if (fail_on_exist) 1570 return ERR_PTR(-EEXIST); 1571 return gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, 0); 1572 } 1573 return ERR_PTR(-ENOENT); 1574} 1575 1576int gfs2_dir_check(struct inode *dir, const struct qstr *name, 1577 const struct gfs2_inode *ip) 1578{ 1579 struct buffer_head *bh; 1580 struct gfs2_dirent *dent; 1581 int ret = -ENOENT; 1582 1583 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); 1584 if (dent) { 1585 if (IS_ERR(dent)) 1586 return PTR_ERR(dent); 1587 if (ip) { 1588 if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr) 1589 goto out; 1590 if (be64_to_cpu(dent->de_inum.no_formal_ino) != 1591 ip->i_no_formal_ino) 1592 goto out; 1593 if (unlikely(IF2DT(ip->i_inode.i_mode) != 1594 be16_to_cpu(dent->de_type))) { 1595 gfs2_consist_inode(GFS2_I(dir)); 1596 ret = -EIO; 1597 goto out; 1598 } 1599 } 1600 ret = 0; 1601out: 1602 brelse(bh); 1603 } 1604 return ret; 1605} 1606 1607/** 1608 * dir_new_leaf - Add a new leaf onto hash chain 1609 * @inode: The directory 1610 * @name: The name we are adding 1611 * 1612 * This adds a new dir leaf onto an existing leaf when there is not 1613 * enough space to add a new dir entry. This is a last resort after 1614 * we've expanded the hash table to max size and also split existing 1615 * leaf blocks, so it will only occur for very large directories. 1616 * 1617 * The dist parameter is set to 1 for leaf blocks directly attached 1618 * to the hash table, 2 for one layer of indirection, 3 for two layers 1619 * etc. We are thus able to tell the difference between an old leaf 1620 * with dist set to zero (i.e. "don't know") and a new one where we 1621 * set this information for debug/fsck purposes. 1622 * 1623 * Returns: 0 on success, or -ve on error 1624 */ 1625 1626static int dir_new_leaf(struct inode *inode, const struct qstr *name) 1627{ 1628 struct buffer_head *bh, *obh; 1629 struct gfs2_inode *ip = GFS2_I(inode); 1630 struct gfs2_leaf *leaf, *oleaf; 1631 u32 dist = 1; 1632 int error; 1633 u32 index; 1634 u64 bn; 1635 1636 index = name->hash >> (32 - ip->i_depth); 1637 error = get_first_leaf(ip, index, &obh); 1638 if (error) 1639 return error; 1640 do { 1641 dist++; 1642 oleaf = (struct gfs2_leaf *)obh->b_data; 1643 bn = be64_to_cpu(oleaf->lf_next); 1644 if (!bn) 1645 break; 1646 brelse(obh); 1647 error = get_leaf(ip, bn, &obh); 1648 if (error) 1649 return error; 1650 } while(1); 1651 1652 gfs2_trans_add_meta(ip->i_gl, obh); 1653 1654 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth)); 1655 if (!leaf) { 1656 brelse(obh); 1657 return -ENOSPC; 1658 } 1659 leaf->lf_dist = cpu_to_be32(dist); 1660 oleaf->lf_next = cpu_to_be64(bh->b_blocknr); 1661 brelse(bh); 1662 brelse(obh); 1663 1664 error = gfs2_meta_inode_buffer(ip, &bh); 1665 if (error) 1666 return error; 1667 gfs2_trans_add_meta(ip->i_gl, bh); 1668 gfs2_add_inode_blocks(&ip->i_inode, 1); 1669 gfs2_dinode_out(ip, bh->b_data); 1670 brelse(bh); 1671 return 0; 1672} 1673 1674static u16 gfs2_inode_ra_len(const struct gfs2_inode *ip) 1675{ 1676 u64 where = ip->i_no_addr + 1; 1677 if (ip->i_eattr == where) 1678 return 1; 1679 return 0; 1680} 1681 1682/** 1683 * gfs2_dir_add - Add new filename into directory 1684 * @inode: The directory inode 1685 * @name: The new name 1686 * @nip: The GFS2 inode to be linked in to the directory 1687 * @da: The directory addition info 1688 * 1689 * If the call to gfs2_diradd_alloc_required resulted in there being 1690 * no need to allocate any new directory blocks, then it will contain 1691 * a pointer to the directory entry and the bh in which it resides. We 1692 * can use that without having to repeat the search. If there was no 1693 * free space, then we must now create more space. 1694 * 1695 * Returns: 0 on success, error code on failure 1696 */ 1697 1698int gfs2_dir_add(struct inode *inode, const struct qstr *name, 1699 const struct gfs2_inode *nip, struct gfs2_diradd *da) 1700{ 1701 struct gfs2_inode *ip = GFS2_I(inode); 1702 struct buffer_head *bh = da->bh; 1703 struct gfs2_dirent *dent = da->dent; 1704 struct timespec tv; 1705 struct gfs2_leaf *leaf; 1706 int error; 1707 1708 while(1) { 1709 if (da->bh == NULL) { 1710 dent = gfs2_dirent_search(inode, name, 1711 gfs2_dirent_find_space, &bh); 1712 } 1713 if (dent) { 1714 if (IS_ERR(dent)) 1715 return PTR_ERR(dent); 1716 dent = gfs2_init_dirent(inode, dent, name, bh); 1717 gfs2_inum_out(nip, dent); 1718 dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); 1719 dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip)); 1720 tv = CURRENT_TIME; 1721 if (ip->i_diskflags & GFS2_DIF_EXHASH) { 1722 leaf = (struct gfs2_leaf *)bh->b_data; 1723 be16_add_cpu(&leaf->lf_entries, 1); 1724 leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); 1725 leaf->lf_sec = cpu_to_be64(tv.tv_sec); 1726 } 1727 da->dent = NULL; 1728 da->bh = NULL; 1729 brelse(bh); 1730 ip->i_entries++; 1731 ip->i_inode.i_mtime = ip->i_inode.i_ctime = tv; 1732 if (S_ISDIR(nip->i_inode.i_mode)) 1733 inc_nlink(&ip->i_inode); 1734 mark_inode_dirty(inode); 1735 error = 0; 1736 break; 1737 } 1738 if (!(ip->i_diskflags & GFS2_DIF_EXHASH)) { 1739 error = dir_make_exhash(inode); 1740 if (error) 1741 break; 1742 continue; 1743 } 1744 error = dir_split_leaf(inode, name); 1745 if (error == 0) 1746 continue; 1747 if (error < 0) 1748 break; 1749 if (ip->i_depth < GFS2_DIR_MAX_DEPTH) { 1750 error = dir_double_exhash(ip); 1751 if (error) 1752 break; 1753 error = dir_split_leaf(inode, name); 1754 if (error < 0) 1755 break; 1756 if (error == 0) 1757 continue; 1758 } 1759 error = dir_new_leaf(inode, name); 1760 if (!error) 1761 continue; 1762 error = -ENOSPC; 1763 break; 1764 } 1765 return error; 1766} 1767 1768 1769/** 1770 * gfs2_dir_del - Delete a directory entry 1771 * @dip: The GFS2 inode 1772 * @filename: The filename 1773 * 1774 * Returns: 0 on success, error code on failure 1775 */ 1776 1777int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) 1778{ 1779 const struct qstr *name = &dentry->d_name; 1780 struct gfs2_dirent *dent, *prev = NULL; 1781 struct buffer_head *bh; 1782 struct timespec tv = CURRENT_TIME; 1783 1784 /* Returns _either_ the entry (if its first in block) or the 1785 previous entry otherwise */ 1786 dent = gfs2_dirent_search(&dip->i_inode, name, gfs2_dirent_prev, &bh); 1787 if (!dent) { 1788 gfs2_consist_inode(dip); 1789 return -EIO; 1790 } 1791 if (IS_ERR(dent)) { 1792 gfs2_consist_inode(dip); 1793 return PTR_ERR(dent); 1794 } 1795 /* If not first in block, adjust pointers accordingly */ 1796 if (gfs2_dirent_find(dent, name, NULL) == 0) { 1797 prev = dent; 1798 dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len)); 1799 } 1800 1801 dirent_del(dip, bh, prev, dent); 1802 if (dip->i_diskflags & GFS2_DIF_EXHASH) { 1803 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; 1804 u16 entries = be16_to_cpu(leaf->lf_entries); 1805 if (!entries) 1806 gfs2_consist_inode(dip); 1807 leaf->lf_entries = cpu_to_be16(--entries); 1808 leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); 1809 leaf->lf_sec = cpu_to_be64(tv.tv_sec); 1810 } 1811 brelse(bh); 1812 1813 if (!dip->i_entries) 1814 gfs2_consist_inode(dip); 1815 dip->i_entries--; 1816 dip->i_inode.i_mtime = dip->i_inode.i_ctime = tv; 1817 if (d_is_dir(dentry)) 1818 drop_nlink(&dip->i_inode); 1819 mark_inode_dirty(&dip->i_inode); 1820 1821 return 0; 1822} 1823 1824/** 1825 * gfs2_dir_mvino - Change inode number of directory entry 1826 * @dip: The GFS2 inode 1827 * @filename: 1828 * @new_inode: 1829 * 1830 * This routine changes the inode number of a directory entry. It's used 1831 * by rename to change ".." when a directory is moved. 1832 * Assumes a glock is held on dvp. 1833 * 1834 * Returns: errno 1835 */ 1836 1837int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 1838 const struct gfs2_inode *nip, unsigned int new_type) 1839{ 1840 struct buffer_head *bh; 1841 struct gfs2_dirent *dent; 1842 int error; 1843 1844 dent = gfs2_dirent_search(&dip->i_inode, filename, gfs2_dirent_find, &bh); 1845 if (!dent) { 1846 gfs2_consist_inode(dip); 1847 return -EIO; 1848 } 1849 if (IS_ERR(dent)) 1850 return PTR_ERR(dent); 1851 1852 gfs2_trans_add_meta(dip->i_gl, bh); 1853 gfs2_inum_out(nip, dent); 1854 dent->de_type = cpu_to_be16(new_type); 1855 1856 if (dip->i_diskflags & GFS2_DIF_EXHASH) { 1857 brelse(bh); 1858 error = gfs2_meta_inode_buffer(dip, &bh); 1859 if (error) 1860 return error; 1861 gfs2_trans_add_meta(dip->i_gl, bh); 1862 } 1863 1864 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; 1865 gfs2_dinode_out(dip, bh->b_data); 1866 brelse(bh); 1867 return 0; 1868} 1869 1870/** 1871 * leaf_dealloc - Deallocate a directory leaf 1872 * @dip: the directory 1873 * @index: the hash table offset in the directory 1874 * @len: the number of pointers to this leaf 1875 * @leaf_no: the leaf number 1876 * @leaf_bh: buffer_head for the starting leaf 1877 * last_dealloc: 1 if this is the final dealloc for the leaf, else 0 1878 * 1879 * Returns: errno 1880 */ 1881 1882static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, 1883 u64 leaf_no, struct buffer_head *leaf_bh, 1884 int last_dealloc) 1885{ 1886 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1887 struct gfs2_leaf *tmp_leaf; 1888 struct gfs2_rgrp_list rlist; 1889 struct buffer_head *bh, *dibh; 1890 u64 blk, nblk; 1891 unsigned int rg_blocks = 0, l_blocks = 0; 1892 char *ht; 1893 unsigned int x, size = len * sizeof(u64); 1894 int error; 1895 1896 error = gfs2_rindex_update(sdp); 1897 if (error) 1898 return error; 1899 1900 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); 1901 1902 ht = kzalloc(size, GFP_NOFS | __GFP_NOWARN); 1903 if (ht == NULL) 1904 ht = __vmalloc(size, GFP_NOFS | __GFP_NOWARN | __GFP_ZERO, 1905 PAGE_KERNEL); 1906 if (!ht) 1907 return -ENOMEM; 1908 1909 error = gfs2_quota_hold(dip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); 1910 if (error) 1911 goto out; 1912 1913 /* Count the number of leaves */ 1914 bh = leaf_bh; 1915 1916 for (blk = leaf_no; blk; blk = nblk) { 1917 if (blk != leaf_no) { 1918 error = get_leaf(dip, blk, &bh); 1919 if (error) 1920 goto out_rlist; 1921 } 1922 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1923 nblk = be64_to_cpu(tmp_leaf->lf_next); 1924 if (blk != leaf_no) 1925 brelse(bh); 1926 1927 gfs2_rlist_add(dip, &rlist, blk); 1928 l_blocks++; 1929 } 1930 1931 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); 1932 1933 for (x = 0; x < rlist.rl_rgrps; x++) { 1934 struct gfs2_rgrpd *rgd; 1935 rgd = rlist.rl_ghs[x].gh_gl->gl_object; 1936 rg_blocks += rgd->rd_length; 1937 } 1938 1939 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); 1940 if (error) 1941 goto out_rlist; 1942 1943 error = gfs2_trans_begin(sdp, 1944 rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) + 1945 RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks); 1946 if (error) 1947 goto out_rg_gunlock; 1948 1949 bh = leaf_bh; 1950 1951 for (blk = leaf_no; blk; blk = nblk) { 1952 if (blk != leaf_no) { 1953 error = get_leaf(dip, blk, &bh); 1954 if (error) 1955 goto out_end_trans; 1956 } 1957 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1958 nblk = be64_to_cpu(tmp_leaf->lf_next); 1959 if (blk != leaf_no) 1960 brelse(bh); 1961 1962 gfs2_free_meta(dip, blk, 1); 1963 gfs2_add_inode_blocks(&dip->i_inode, -1); 1964 } 1965 1966 error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size); 1967 if (error != size) { 1968 if (error >= 0) 1969 error = -EIO; 1970 goto out_end_trans; 1971 } 1972 1973 error = gfs2_meta_inode_buffer(dip, &dibh); 1974 if (error) 1975 goto out_end_trans; 1976 1977 gfs2_trans_add_meta(dip->i_gl, dibh); 1978 /* On the last dealloc, make this a regular file in case we crash. 1979 (We don't want to free these blocks a second time.) */ 1980 if (last_dealloc) 1981 dip->i_inode.i_mode = S_IFREG; 1982 gfs2_dinode_out(dip, dibh->b_data); 1983 brelse(dibh); 1984 1985out_end_trans: 1986 gfs2_trans_end(sdp); 1987out_rg_gunlock: 1988 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); 1989out_rlist: 1990 gfs2_rlist_free(&rlist); 1991 gfs2_quota_unhold(dip); 1992out: 1993 kvfree(ht); 1994 return error; 1995} 1996 1997/** 1998 * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory 1999 * @dip: the directory 2000 * 2001 * Dealloc all on-disk directory leaves to FREEMETA state 2002 * Change on-disk inode type to "regular file" 2003 * 2004 * Returns: errno 2005 */ 2006 2007int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) 2008{ 2009 struct buffer_head *bh; 2010 struct gfs2_leaf *leaf; 2011 u32 hsize, len; 2012 u32 index = 0, next_index; 2013 __be64 *lp; 2014 u64 leaf_no; 2015 int error = 0, last; 2016 2017 hsize = 1 << dip->i_depth; 2018 2019 lp = gfs2_dir_get_hash_table(dip); 2020 if (IS_ERR(lp)) 2021 return PTR_ERR(lp); 2022 2023 while (index < hsize) { 2024 leaf_no = be64_to_cpu(lp[index]); 2025 if (leaf_no) { 2026 error = get_leaf(dip, leaf_no, &bh); 2027 if (error) 2028 goto out; 2029 leaf = (struct gfs2_leaf *)bh->b_data; 2030 len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth)); 2031 2032 next_index = (index & ~(len - 1)) + len; 2033 last = ((next_index >= hsize) ? 1 : 0); 2034 error = leaf_dealloc(dip, index, len, leaf_no, bh, 2035 last); 2036 brelse(bh); 2037 if (error) 2038 goto out; 2039 index = next_index; 2040 } else 2041 index++; 2042 } 2043 2044 if (index != hsize) { 2045 gfs2_consist_inode(dip); 2046 error = -EIO; 2047 } 2048 2049out: 2050 2051 return error; 2052} 2053 2054/** 2055 * gfs2_diradd_alloc_required - find if adding entry will require an allocation 2056 * @ip: the file being written to 2057 * @filname: the filename that's going to be added 2058 * @da: The structure to return dir alloc info 2059 * 2060 * Returns: 0 if ok, -ve on error 2061 */ 2062 2063int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name, 2064 struct gfs2_diradd *da) 2065{ 2066 struct gfs2_inode *ip = GFS2_I(inode); 2067 struct gfs2_sbd *sdp = GFS2_SB(inode); 2068 const unsigned int extra = sizeof(struct gfs2_dinode) - sizeof(struct gfs2_leaf); 2069 struct gfs2_dirent *dent; 2070 struct buffer_head *bh; 2071 2072 da->nr_blocks = 0; 2073 da->bh = NULL; 2074 da->dent = NULL; 2075 2076 dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh); 2077 if (!dent) { 2078 da->nr_blocks = sdp->sd_max_dirres; 2079 if (!(ip->i_diskflags & GFS2_DIF_EXHASH) && 2080 (GFS2_DIRENT_SIZE(name->len) < extra)) 2081 da->nr_blocks = 1; 2082 return 0; 2083 } 2084 if (IS_ERR(dent)) 2085 return PTR_ERR(dent); 2086 2087 if (da->save_loc) { 2088 da->bh = bh; 2089 da->dent = dent; 2090 } else { 2091 brelse(bh); 2092 } 2093 return 0; 2094} 2095 2096