root/fs/ceph/quota.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ceph_adjust_quota_realms_count
  2. ceph_has_realms_with_quotas
  3. ceph_handle_quota
  4. find_quotarealm_inode
  5. lookup_quotarealm_inode
  6. ceph_cleanup_quotarealms_inodes
  7. get_quota_realm
  8. ceph_quota_is_same_realm
  9. check_quota_exceeded
  10. ceph_quota_is_max_files_exceeded
  11. ceph_quota_is_max_bytes_exceeded
  12. ceph_quota_is_max_bytes_approaching
  13. ceph_quota_update_statfs

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * quota.c - CephFS quota
   4  *
   5  * Copyright (C) 2017-2018 SUSE
   6  */
   7 
   8 #include <linux/statfs.h>
   9 
  10 #include "super.h"
  11 #include "mds_client.h"
  12 
  13 void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
  14 {
  15         struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
  16         if (inc)
  17                 atomic64_inc(&mdsc->quotarealms_count);
  18         else
  19                 atomic64_dec(&mdsc->quotarealms_count);
  20 }
  21 
  22 static inline bool ceph_has_realms_with_quotas(struct inode *inode)
  23 {
  24         struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
  25         struct super_block *sb = mdsc->fsc->sb;
  26 
  27         if (atomic64_read(&mdsc->quotarealms_count) > 0)
  28                 return true;
  29         /* if root is the real CephFS root, we don't have quota realms */
  30         if (sb->s_root->d_inode &&
  31             (sb->s_root->d_inode->i_ino == CEPH_INO_ROOT))
  32                 return false;
  33         /* otherwise, we can't know for sure */
  34         return true;
  35 }
  36 
  37 void ceph_handle_quota(struct ceph_mds_client *mdsc,
  38                        struct ceph_mds_session *session,
  39                        struct ceph_msg *msg)
  40 {
  41         struct super_block *sb = mdsc->fsc->sb;
  42         struct ceph_mds_quota *h = msg->front.iov_base;
  43         struct ceph_vino vino;
  44         struct inode *inode;
  45         struct ceph_inode_info *ci;
  46 
  47         if (msg->front.iov_len < sizeof(*h)) {
  48                 pr_err("%s corrupt message mds%d len %d\n", __func__,
  49                        session->s_mds, (int)msg->front.iov_len);
  50                 ceph_msg_dump(msg);
  51                 return;
  52         }
  53 
  54         /* increment msg sequence number */
  55         mutex_lock(&session->s_mutex);
  56         session->s_seq++;
  57         mutex_unlock(&session->s_mutex);
  58 
  59         /* lookup inode */
  60         vino.ino = le64_to_cpu(h->ino);
  61         vino.snap = CEPH_NOSNAP;
  62         inode = ceph_find_inode(sb, vino);
  63         if (!inode) {
  64                 pr_warn("Failed to find inode %llu\n", vino.ino);
  65                 return;
  66         }
  67         ci = ceph_inode(inode);
  68 
  69         spin_lock(&ci->i_ceph_lock);
  70         ci->i_rbytes = le64_to_cpu(h->rbytes);
  71         ci->i_rfiles = le64_to_cpu(h->rfiles);
  72         ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
  73         __ceph_update_quota(ci, le64_to_cpu(h->max_bytes),
  74                             le64_to_cpu(h->max_files));
  75         spin_unlock(&ci->i_ceph_lock);
  76 
  77         /* avoid calling iput_final() in dispatch thread */
  78         ceph_async_iput(inode);
  79 }
  80 
  81 static struct ceph_quotarealm_inode *
  82 find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
  83 {
  84         struct ceph_quotarealm_inode *qri = NULL;
  85         struct rb_node **node, *parent = NULL;
  86 
  87         mutex_lock(&mdsc->quotarealms_inodes_mutex);
  88         node = &(mdsc->quotarealms_inodes.rb_node);
  89         while (*node) {
  90                 parent = *node;
  91                 qri = container_of(*node, struct ceph_quotarealm_inode, node);
  92 
  93                 if (ino < qri->ino)
  94                         node = &((*node)->rb_left);
  95                 else if (ino > qri->ino)
  96                         node = &((*node)->rb_right);
  97                 else
  98                         break;
  99         }
 100         if (!qri || (qri->ino != ino)) {
 101                 /* Not found, create a new one and insert it */
 102                 qri = kmalloc(sizeof(*qri), GFP_KERNEL);
 103                 if (qri) {
 104                         qri->ino = ino;
 105                         qri->inode = NULL;
 106                         qri->timeout = 0;
 107                         mutex_init(&qri->mutex);
 108                         rb_link_node(&qri->node, parent, node);
 109                         rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
 110                 } else
 111                         pr_warn("Failed to alloc quotarealms_inode\n");
 112         }
 113         mutex_unlock(&mdsc->quotarealms_inodes_mutex);
 114 
 115         return qri;
 116 }
 117 
 118 /*
 119  * This function will try to lookup a realm inode which isn't visible in the
 120  * filesystem mountpoint.  A list of these kind of inodes (not visible) is
 121  * maintained in the mdsc and freed only when the filesystem is umounted.
 122  *
 123  * Note that these inodes are kept in this list even if the lookup fails, which
 124  * allows to prevent useless lookup requests.
 125  */
 126 static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
 127                                              struct super_block *sb,
 128                                              struct ceph_snap_realm *realm)
 129 {
 130         struct ceph_quotarealm_inode *qri;
 131         struct inode *in;
 132 
 133         qri = find_quotarealm_inode(mdsc, realm->ino);
 134         if (!qri)
 135                 return NULL;
 136 
 137         mutex_lock(&qri->mutex);
 138         if (qri->inode && ceph_is_any_caps(qri->inode)) {
 139                 /* A request has already returned the inode */
 140                 mutex_unlock(&qri->mutex);
 141                 return qri->inode;
 142         }
 143         /* Check if this inode lookup has failed recently */
 144         if (qri->timeout &&
 145             time_before_eq(jiffies, qri->timeout)) {
 146                 mutex_unlock(&qri->mutex);
 147                 return NULL;
 148         }
 149         if (qri->inode) {
 150                 /* get caps */
 151                 int ret = __ceph_do_getattr(qri->inode, NULL,
 152                                             CEPH_STAT_CAP_INODE, true);
 153                 if (ret >= 0)
 154                         in = qri->inode;
 155                 else
 156                         in = ERR_PTR(ret);
 157         }  else {
 158                 in = ceph_lookup_inode(sb, realm->ino);
 159         }
 160 
 161         if (IS_ERR(in)) {
 162                 dout("Can't lookup inode %llx (err: %ld)\n",
 163                      realm->ino, PTR_ERR(in));
 164                 qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
 165         } else {
 166                 qri->timeout = 0;
 167                 qri->inode = in;
 168         }
 169         mutex_unlock(&qri->mutex);
 170 
 171         return in;
 172 }
 173 
 174 void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
 175 {
 176         struct ceph_quotarealm_inode *qri;
 177         struct rb_node *node;
 178 
 179         /*
 180          * It should now be safe to clean quotarealms_inode tree without holding
 181          * mdsc->quotarealms_inodes_mutex...
 182          */
 183         mutex_lock(&mdsc->quotarealms_inodes_mutex);
 184         while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
 185                 node = rb_first(&mdsc->quotarealms_inodes);
 186                 qri = rb_entry(node, struct ceph_quotarealm_inode, node);
 187                 rb_erase(node, &mdsc->quotarealms_inodes);
 188                 iput(qri->inode);
 189                 kfree(qri);
 190         }
 191         mutex_unlock(&mdsc->quotarealms_inodes_mutex);
 192 }
 193 
 194 /*
 195  * This function walks through the snaprealm for an inode and returns the
 196  * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
 197  * or max_bytes).  If the root is reached, return the root ceph_snap_realm
 198  * instead.
 199  *
 200  * Note that the caller is responsible for calling ceph_put_snap_realm() on the
 201  * returned realm.
 202  *
 203  * Callers of this function need to hold mdsc->snap_rwsem.  However, if there's
 204  * a need to do an inode lookup, this rwsem will be temporarily dropped.  Hence
 205  * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
 206  * this function will return -EAGAIN; otherwise, the snaprealms walk-through
 207  * will be restarted.
 208  */
 209 static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
 210                                                struct inode *inode, bool retry)
 211 {
 212         struct ceph_inode_info *ci = NULL;
 213         struct ceph_snap_realm *realm, *next;
 214         struct inode *in;
 215         bool has_quota;
 216 
 217         if (ceph_snap(inode) != CEPH_NOSNAP)
 218                 return NULL;
 219 
 220 restart:
 221         realm = ceph_inode(inode)->i_snap_realm;
 222         if (realm)
 223                 ceph_get_snap_realm(mdsc, realm);
 224         else
 225                 pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
 226                                    "null i_snap_realm\n", ceph_vinop(inode));
 227         while (realm) {
 228                 bool has_inode;
 229 
 230                 spin_lock(&realm->inodes_with_caps_lock);
 231                 has_inode = realm->inode;
 232                 in = has_inode ? igrab(realm->inode) : NULL;
 233                 spin_unlock(&realm->inodes_with_caps_lock);
 234                 if (has_inode && !in)
 235                         break;
 236                 if (!in) {
 237                         up_read(&mdsc->snap_rwsem);
 238                         in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
 239                         down_read(&mdsc->snap_rwsem);
 240                         if (IS_ERR_OR_NULL(in))
 241                                 break;
 242                         ceph_put_snap_realm(mdsc, realm);
 243                         if (!retry)
 244                                 return ERR_PTR(-EAGAIN);
 245                         goto restart;
 246                 }
 247 
 248                 ci = ceph_inode(in);
 249                 has_quota = __ceph_has_any_quota(ci);
 250                 /* avoid calling iput_final() while holding mdsc->snap_rwsem */
 251                 ceph_async_iput(in);
 252 
 253                 next = realm->parent;
 254                 if (has_quota || !next)
 255                        return realm;
 256 
 257                 ceph_get_snap_realm(mdsc, next);
 258                 ceph_put_snap_realm(mdsc, realm);
 259                 realm = next;
 260         }
 261         if (realm)
 262                 ceph_put_snap_realm(mdsc, realm);
 263 
 264         return NULL;
 265 }
 266 
 267 bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
 268 {
 269         struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
 270         struct ceph_snap_realm *old_realm, *new_realm;
 271         bool is_same;
 272 
 273 restart:
 274         /*
 275          * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
 276          * However, get_quota_realm may drop it temporarily.  By setting the
 277          * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
 278          * dropped and we can then restart the whole operation.
 279          */
 280         down_read(&mdsc->snap_rwsem);
 281         old_realm = get_quota_realm(mdsc, old, true);
 282         new_realm = get_quota_realm(mdsc, new, false);
 283         if (PTR_ERR(new_realm) == -EAGAIN) {
 284                 up_read(&mdsc->snap_rwsem);
 285                 if (old_realm)
 286                         ceph_put_snap_realm(mdsc, old_realm);
 287                 goto restart;
 288         }
 289         is_same = (old_realm == new_realm);
 290         up_read(&mdsc->snap_rwsem);
 291 
 292         if (old_realm)
 293                 ceph_put_snap_realm(mdsc, old_realm);
 294         if (new_realm)
 295                 ceph_put_snap_realm(mdsc, new_realm);
 296 
 297         return is_same;
 298 }
 299 
 300 enum quota_check_op {
 301         QUOTA_CHECK_MAX_FILES_OP,       /* check quota max_files limit */
 302         QUOTA_CHECK_MAX_BYTES_OP,       /* check quota max_files limit */
 303         QUOTA_CHECK_MAX_BYTES_APPROACHING_OP    /* check if quota max_files
 304                                                    limit is approaching */
 305 };
 306 
 307 /*
 308  * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
 309  * realm, it will execute quota check operation defined by the 'op' parameter.
 310  * The snaprealm walk is interrupted if the quota check detects that the quota
 311  * is exceeded or if the root inode is reached.
 312  */
 313 static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
 314                                  loff_t delta)
 315 {
 316         struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 317         struct ceph_inode_info *ci;
 318         struct ceph_snap_realm *realm, *next;
 319         struct inode *in;
 320         u64 max, rvalue;
 321         bool exceeded = false;
 322 
 323         if (ceph_snap(inode) != CEPH_NOSNAP)
 324                 return false;
 325 
 326         down_read(&mdsc->snap_rwsem);
 327 restart:
 328         realm = ceph_inode(inode)->i_snap_realm;
 329         if (realm)
 330                 ceph_get_snap_realm(mdsc, realm);
 331         else
 332                 pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
 333                                    "null i_snap_realm\n", ceph_vinop(inode));
 334         while (realm) {
 335                 bool has_inode;
 336 
 337                 spin_lock(&realm->inodes_with_caps_lock);
 338                 has_inode = realm->inode;
 339                 in = has_inode ? igrab(realm->inode) : NULL;
 340                 spin_unlock(&realm->inodes_with_caps_lock);
 341                 if (has_inode && !in)
 342                         break;
 343                 if (!in) {
 344                         up_read(&mdsc->snap_rwsem);
 345                         in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
 346                         down_read(&mdsc->snap_rwsem);
 347                         if (IS_ERR_OR_NULL(in))
 348                                 break;
 349                         ceph_put_snap_realm(mdsc, realm);
 350                         goto restart;
 351                 }
 352                 ci = ceph_inode(in);
 353                 spin_lock(&ci->i_ceph_lock);
 354                 if (op == QUOTA_CHECK_MAX_FILES_OP) {
 355                         max = ci->i_max_files;
 356                         rvalue = ci->i_rfiles + ci->i_rsubdirs;
 357                 } else {
 358                         max = ci->i_max_bytes;
 359                         rvalue = ci->i_rbytes;
 360                 }
 361                 spin_unlock(&ci->i_ceph_lock);
 362                 switch (op) {
 363                 case QUOTA_CHECK_MAX_FILES_OP:
 364                         exceeded = (max && (rvalue >= max));
 365                         break;
 366                 case QUOTA_CHECK_MAX_BYTES_OP:
 367                         exceeded = (max && (rvalue + delta > max));
 368                         break;
 369                 case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP:
 370                         if (max) {
 371                                 if (rvalue >= max)
 372                                         exceeded = true;
 373                                 else {
 374                                         /*
 375                                          * when we're writing more that 1/16th
 376                                          * of the available space
 377                                          */
 378                                         exceeded =
 379                                                 (((max - rvalue) >> 4) < delta);
 380                                 }
 381                         }
 382                         break;
 383                 default:
 384                         /* Shouldn't happen */
 385                         pr_warn("Invalid quota check op (%d)\n", op);
 386                         exceeded = true; /* Just break the loop */
 387                 }
 388                 /* avoid calling iput_final() while holding mdsc->snap_rwsem */
 389                 ceph_async_iput(in);
 390 
 391                 next = realm->parent;
 392                 if (exceeded || !next)
 393                         break;
 394                 ceph_get_snap_realm(mdsc, next);
 395                 ceph_put_snap_realm(mdsc, realm);
 396                 realm = next;
 397         }
 398         if (realm)
 399                 ceph_put_snap_realm(mdsc, realm);
 400         up_read(&mdsc->snap_rwsem);
 401 
 402         return exceeded;
 403 }
 404 
 405 /*
 406  * ceph_quota_is_max_files_exceeded - check if we can create a new file
 407  * @inode:      directory where a new file is being created
 408  *
 409  * This functions returns true is max_files quota allows a new file to be
 410  * created.  It is necessary to walk through the snaprealm hierarchy (until the
 411  * FS root) to check all realms with quotas set.
 412  */
 413 bool ceph_quota_is_max_files_exceeded(struct inode *inode)
 414 {
 415         if (!ceph_has_realms_with_quotas(inode))
 416                 return false;
 417 
 418         WARN_ON(!S_ISDIR(inode->i_mode));
 419 
 420         return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0);
 421 }
 422 
 423 /*
 424  * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
 425  * @inode:      inode being written
 426  * @newsize:    new size if write succeeds
 427  *
 428  * This functions returns true is max_bytes quota allows a file size to reach
 429  * @newsize; it returns false otherwise.
 430  */
 431 bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize)
 432 {
 433         loff_t size = i_size_read(inode);
 434 
 435         if (!ceph_has_realms_with_quotas(inode))
 436                 return false;
 437 
 438         /* return immediately if we're decreasing file size */
 439         if (newsize <= size)
 440                 return false;
 441 
 442         return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size));
 443 }
 444 
 445 /*
 446  * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
 447  * @inode:      inode being written
 448  * @newsize:    new size if write succeeds
 449  *
 450  * This function returns true if the new file size @newsize will be consuming
 451  * more than 1/16th of the available quota space; it returns false otherwise.
 452  */
 453 bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize)
 454 {
 455         loff_t size = ceph_inode(inode)->i_reported_size;
 456 
 457         if (!ceph_has_realms_with_quotas(inode))
 458                 return false;
 459 
 460         /* return immediately if we're decreasing file size */
 461         if (newsize <= size)
 462                 return false;
 463 
 464         return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP,
 465                                     (newsize - size));
 466 }
 467 
 468 /*
 469  * ceph_quota_update_statfs - if root has quota update statfs with quota status
 470  * @fsc:        filesystem client instance
 471  * @buf:        statfs to update
 472  *
 473  * If the mounted filesystem root has max_bytes quota set, update the filesystem
 474  * statistics with the quota status.
 475  *
 476  * This function returns true if the stats have been updated, false otherwise.
 477  */
 478 bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
 479 {
 480         struct ceph_mds_client *mdsc = fsc->mdsc;
 481         struct ceph_inode_info *ci;
 482         struct ceph_snap_realm *realm;
 483         struct inode *in;
 484         u64 total = 0, used, free;
 485         bool is_updated = false;
 486 
 487         down_read(&mdsc->snap_rwsem);
 488         realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
 489         up_read(&mdsc->snap_rwsem);
 490         if (!realm)
 491                 return false;
 492 
 493         spin_lock(&realm->inodes_with_caps_lock);
 494         in = realm->inode ? igrab(realm->inode) : NULL;
 495         spin_unlock(&realm->inodes_with_caps_lock);
 496         if (in) {
 497                 ci = ceph_inode(in);
 498                 spin_lock(&ci->i_ceph_lock);
 499                 if (ci->i_max_bytes) {
 500                         total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
 501                         used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
 502                         /* It is possible for a quota to be exceeded.
 503                          * Report 'zero' in that case
 504                          */
 505                         free = total > used ? total - used : 0;
 506                 }
 507                 spin_unlock(&ci->i_ceph_lock);
 508                 if (total) {
 509                         buf->f_blocks = total;
 510                         buf->f_bfree = free;
 511                         buf->f_bavail = free;
 512                         is_updated = true;
 513                 }
 514                 iput(in);
 515         }
 516         ceph_put_snap_realm(mdsc, realm);
 517 
 518         return is_updated;
 519 }
 520 

/* [<][>][^][v][top][bottom][index][help] */