root/fs/orangefs/dir.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. do_readdir
  2. parse_readdir
  3. orangefs_dir_more
  4. fill_from_part
  5. orangefs_dir_fill
  6. orangefs_dir_llseek
  7. orangefs_dir_iterate
  8. orangefs_dir_open
  9. orangefs_dir_release

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright 2017 Omnibond Systems, L.L.C.
   4  */
   5 
   6 #include "protocol.h"
   7 #include "orangefs-kernel.h"
   8 #include "orangefs-bufmap.h"
   9 
  10 struct orangefs_dir_part {
  11         struct orangefs_dir_part *next;
  12         size_t len;
  13 };
  14 
  15 struct orangefs_dir {
  16         __u64 token;
  17         struct orangefs_dir_part *part;
  18         loff_t end;
  19         int error;
  20 };
  21 
  22 #define PART_SHIFT (24)
  23 #define PART_SIZE (1<<24)
  24 #define PART_MASK (~(PART_SIZE - 1))
  25 
  26 /*
  27  * There can be up to 512 directory entries.  Each entry is encoded as
  28  * follows:
  29  * 4 bytes: string size (n)
  30  * n bytes: string
  31  * 1 byte: trailing zero
  32  * padding to 8 bytes
  33  * 16 bytes: khandle
  34  * padding to 8 bytes
  35  *
  36  * The trailer_buf starts with a struct orangefs_readdir_response_s
  37  * which must be skipped to get to the directory data.
  38  *
  39  * The data which is received from the userspace daemon is termed a
  40  * part and is stored in a linked list in case more than one part is
  41  * needed for a large directory.
  42  *
  43  * The position pointer (ctx->pos) encodes the part and offset on which
  44  * to begin reading at.  Bits above PART_SHIFT encode the part and bits
  45  * below PART_SHIFT encode the offset.  Parts are stored in a linked
  46  * list which grows as data is received from the server.  The overhead
  47  * associated with managing the list is presumed to be small compared to
  48  * the overhead of communicating with the server.
  49  *
  50  * As data is received from the server, it is placed at the end of the
  51  * part list.  Data is parsed from the current position as it is needed.
  52  * When data is determined to be corrupt, it is either because the
  53  * userspace component has sent back corrupt data or because the file
  54  * pointer has been moved to an invalid location.  Since the two cannot
  55  * be differentiated, return EIO.
  56  *
  57  * Part zero is synthesized to contains `.' and `..'.  Part one is the
  58  * first part of the part list.
  59  */
  60 
  61 static int do_readdir(struct orangefs_inode_s *oi,
  62     struct orangefs_dir *od, struct dentry *dentry,
  63     struct orangefs_kernel_op_s *op)
  64 {
  65         struct orangefs_readdir_response_s *resp;
  66         int bufi, r;
  67 
  68         /*
  69          * Despite the badly named field, readdir does not use shared
  70          * memory.  However, there are a limited number of readdir
  71          * slots, which must be allocated here.  This flag simply tells
  72          * the op scheduler to return the op here for retry.
  73          */
  74         op->uses_shared_memory = 1;
  75         op->upcall.req.readdir.refn = oi->refn;
  76         op->upcall.req.readdir.token = od->token;
  77         op->upcall.req.readdir.max_dirent_count =
  78             ORANGEFS_MAX_DIRENT_COUNT_READDIR;
  79 
  80 again:
  81         bufi = orangefs_readdir_index_get();
  82         if (bufi < 0) {
  83                 od->error = bufi;
  84                 return bufi;
  85         }
  86 
  87         op->upcall.req.readdir.buf_index = bufi;
  88 
  89         r = service_operation(op, "orangefs_readdir",
  90             get_interruptible_flag(dentry->d_inode));
  91 
  92         orangefs_readdir_index_put(bufi);
  93 
  94         if (op_state_purged(op)) {
  95                 if (r == -EAGAIN) {
  96                         vfree(op->downcall.trailer_buf);
  97                         goto again;
  98                 } else if (r == -EIO) {
  99                         vfree(op->downcall.trailer_buf);
 100                         od->error = r;
 101                         return r;
 102                 }
 103         }
 104 
 105         if (r < 0) {
 106                 vfree(op->downcall.trailer_buf);
 107                 od->error = r;
 108                 return r;
 109         } else if (op->downcall.status) {
 110                 vfree(op->downcall.trailer_buf);
 111                 od->error = op->downcall.status;
 112                 return op->downcall.status;
 113         }
 114 
 115         /*
 116          * The maximum size is size per entry times the 512 entries plus
 117          * the header.  This is well under the limit.
 118          */
 119         if (op->downcall.trailer_size > PART_SIZE) {
 120                 vfree(op->downcall.trailer_buf);
 121                 od->error = -EIO;
 122                 return -EIO;
 123         }
 124 
 125         resp = (struct orangefs_readdir_response_s *)
 126             op->downcall.trailer_buf;
 127         od->token = resp->token;
 128         return 0;
 129 }
 130 
 131 static int parse_readdir(struct orangefs_dir *od,
 132     struct orangefs_kernel_op_s *op)
 133 {
 134         struct orangefs_dir_part *part, *new;
 135         size_t count;
 136 
 137         count = 1;
 138         part = od->part;
 139         while (part) {
 140                 count++;
 141                 if (part->next)
 142                         part = part->next;
 143                 else
 144                         break;
 145         }
 146 
 147         new = (void *)op->downcall.trailer_buf;
 148         new->next = NULL;
 149         new->len = op->downcall.trailer_size -
 150             sizeof(struct orangefs_readdir_response_s);
 151         if (!od->part)
 152                 od->part = new;
 153         else
 154                 part->next = new;
 155         count++;
 156         od->end = count << PART_SHIFT;
 157 
 158         return 0;
 159 }
 160 
 161 static int orangefs_dir_more(struct orangefs_inode_s *oi,
 162     struct orangefs_dir *od, struct dentry *dentry)
 163 {
 164         struct orangefs_kernel_op_s *op;
 165         int r;
 166 
 167         op = op_alloc(ORANGEFS_VFS_OP_READDIR);
 168         if (!op) {
 169                 od->error = -ENOMEM;
 170                 return -ENOMEM;
 171         }
 172         r = do_readdir(oi, od, dentry, op);
 173         if (r) {
 174                 od->error = r;
 175                 goto out;
 176         }
 177         r = parse_readdir(od, op);
 178         if (r) {
 179                 od->error = r;
 180                 goto out;
 181         }
 182 
 183         od->error = 0;
 184 out:
 185         op_release(op);
 186         return od->error;
 187 }
 188 
 189 static int fill_from_part(struct orangefs_dir_part *part,
 190     struct dir_context *ctx)
 191 {
 192         const int offset = sizeof(struct orangefs_readdir_response_s);
 193         struct orangefs_khandle *khandle;
 194         __u32 *len, padlen;
 195         loff_t i;
 196         char *s;
 197         i = ctx->pos & ~PART_MASK;
 198 
 199         /* The file offset from userspace is too large. */
 200         if (i > part->len)
 201                 return 1;
 202 
 203         /*
 204          * If the seek pointer is positioned just before an entry it
 205          * should find the next entry.
 206          */
 207         if (i % 8)
 208                 i = i + (8 - i%8)%8;
 209 
 210         while (i < part->len) {
 211                 if (part->len < i + sizeof *len)
 212                         break;
 213                 len = (void *)part + offset + i;
 214                 /*
 215                  * len is the size of the string itself.  padlen is the
 216                  * total size of the encoded string.
 217                  */
 218                 padlen = (sizeof *len + *len + 1) +
 219                     (8 - (sizeof *len + *len + 1)%8)%8;
 220                 if (part->len < i + padlen + sizeof *khandle)
 221                         goto next;
 222                 s = (void *)part + offset + i + sizeof *len;
 223                 if (s[*len] != 0)
 224                         goto next;
 225                 khandle = (void *)part + offset + i + padlen;
 226                 if (!dir_emit(ctx, s, *len,
 227                     orangefs_khandle_to_ino(khandle),
 228                     DT_UNKNOWN))
 229                         return 0;
 230                 i += padlen + sizeof *khandle;
 231                 i = i + (8 - i%8)%8;
 232                 BUG_ON(i > part->len);
 233                 ctx->pos = (ctx->pos & PART_MASK) | i;
 234                 continue;
 235 next:
 236                 i += 8;
 237         }
 238         return 1;
 239 }
 240 
 241 static int orangefs_dir_fill(struct orangefs_inode_s *oi,
 242     struct orangefs_dir *od, struct dentry *dentry,
 243     struct dir_context *ctx)
 244 {
 245         struct orangefs_dir_part *part;
 246         size_t count;
 247 
 248         count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
 249 
 250         part = od->part;
 251         while (part->next && count) {
 252                 count--;
 253                 part = part->next;
 254         }
 255         /* This means the userspace file offset is invalid. */
 256         if (count) {
 257                 od->error = -EIO;
 258                 return -EIO;
 259         }
 260 
 261         while (part && part->len) {
 262                 int r;
 263                 r = fill_from_part(part, ctx);
 264                 if (r < 0) {
 265                         od->error = r;
 266                         return r;
 267                 } else if (r == 0) {
 268                         /* Userspace buffer is full. */
 269                         break;
 270                 } else {
 271                         /*
 272                          * The part ran out of data.  Move to the next
 273                          * part. */
 274                         ctx->pos = (ctx->pos & PART_MASK) +
 275                             (1 << PART_SHIFT);
 276                         part = part->next;
 277                 }
 278         }
 279         return 0;
 280 }
 281 
 282 static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
 283     int whence)
 284 {
 285         struct orangefs_dir *od = file->private_data;
 286         /*
 287          * Delete the stored data so userspace sees new directory
 288          * entries.
 289          */
 290         if (!whence && offset < od->end) {
 291                 struct orangefs_dir_part *part = od->part;
 292                 while (part) {
 293                         struct orangefs_dir_part *next = part->next;
 294                         vfree(part);
 295                         part = next;
 296                 }
 297                 od->token = ORANGEFS_ITERATE_START;
 298                 od->part = NULL;
 299                 od->end = 1 << PART_SHIFT;
 300         }
 301         return default_llseek(file, offset, whence);
 302 }
 303 
 304 static int orangefs_dir_iterate(struct file *file,
 305     struct dir_context *ctx)
 306 {
 307         struct orangefs_inode_s *oi;
 308         struct orangefs_dir *od;
 309         struct dentry *dentry;
 310         int r;
 311 
 312         dentry = file->f_path.dentry;
 313         oi = ORANGEFS_I(dentry->d_inode);
 314         od = file->private_data;
 315 
 316         if (od->error)
 317                 return od->error;
 318 
 319         if (ctx->pos == 0) {
 320                 if (!dir_emit_dot(file, ctx))
 321                         return 0;
 322                 ctx->pos++;
 323         }
 324         if (ctx->pos == 1) {
 325                 if (!dir_emit_dotdot(file, ctx))
 326                         return 0;
 327                 ctx->pos = 1 << PART_SHIFT;
 328         }
 329 
 330         /*
 331          * The seek position is in the first synthesized part but is not
 332          * valid.
 333          */
 334         if ((ctx->pos & PART_MASK) == 0)
 335                 return -EIO;
 336 
 337         r = 0;
 338 
 339         /*
 340          * Must read more if the user has sought past what has been read
 341          * so far.  Stop a user who has sought past the end.
 342          */
 343         while (od->token != ORANGEFS_ITERATE_END &&
 344             ctx->pos > od->end) {
 345                 r = orangefs_dir_more(oi, od, dentry);
 346                 if (r)
 347                         return r;
 348         }
 349         if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
 350                 return -EIO;
 351 
 352         /* Then try to fill if there's any left in the buffer. */
 353         if (ctx->pos < od->end) {
 354                 r = orangefs_dir_fill(oi, od, dentry, ctx);
 355                 if (r)
 356                         return r;
 357         }
 358 
 359         /* Finally get some more and try to fill. */
 360         if (od->token != ORANGEFS_ITERATE_END) {
 361                 r = orangefs_dir_more(oi, od, dentry);
 362                 if (r)
 363                         return r;
 364                 r = orangefs_dir_fill(oi, od, dentry, ctx);
 365         }
 366 
 367         return r;
 368 }
 369 
 370 static int orangefs_dir_open(struct inode *inode, struct file *file)
 371 {
 372         struct orangefs_dir *od;
 373         file->private_data = kmalloc(sizeof(struct orangefs_dir),
 374             GFP_KERNEL);
 375         if (!file->private_data)
 376                 return -ENOMEM;
 377         od = file->private_data;
 378         od->token = ORANGEFS_ITERATE_START;
 379         od->part = NULL;
 380         od->end = 1 << PART_SHIFT;
 381         od->error = 0;
 382         return 0;
 383 }
 384 
 385 static int orangefs_dir_release(struct inode *inode, struct file *file)
 386 {
 387         struct orangefs_dir *od = file->private_data;
 388         struct orangefs_dir_part *part = od->part;
 389         while (part) {
 390                 struct orangefs_dir_part *next = part->next;
 391                 vfree(part);
 392                 part = next;
 393         }
 394         kfree(od);
 395         return 0;
 396 }
 397 
 398 const struct file_operations orangefs_dir_operations = {
 399         .llseek = orangefs_dir_llseek,
 400         .read = generic_read_dir,
 401         .iterate = orangefs_dir_iterate,
 402         .open = orangefs_dir_open,
 403         .release = orangefs_dir_release
 404 };

/* [<][>][^][v][top][bottom][index][help] */