1/* 2 * Lockless hierarchical page accounting & limiting 3 * 4 * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner 5 */ 6 7#include <linux/page_counter.h> 8#include <linux/atomic.h> 9#include <linux/kernel.h> 10#include <linux/string.h> 11#include <linux/sched.h> 12#include <linux/bug.h> 13#include <asm/page.h> 14 15/** 16 * page_counter_cancel - take pages out of the local counter 17 * @counter: counter 18 * @nr_pages: number of pages to cancel 19 */ 20void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages) 21{ 22 long new; 23 24 new = atomic_long_sub_return(nr_pages, &counter->count); 25 /* More uncharges than charges? */ 26 WARN_ON_ONCE(new < 0); 27} 28 29/** 30 * page_counter_charge - hierarchically charge pages 31 * @counter: counter 32 * @nr_pages: number of pages to charge 33 * 34 * NOTE: This does not consider any configured counter limits. 35 */ 36void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) 37{ 38 struct page_counter *c; 39 40 for (c = counter; c; c = c->parent) { 41 long new; 42 43 new = atomic_long_add_return(nr_pages, &c->count); 44 /* 45 * This is indeed racy, but we can live with some 46 * inaccuracy in the watermark. 47 */ 48 if (new > c->watermark) 49 c->watermark = new; 50 } 51} 52 53/** 54 * page_counter_try_charge - try to hierarchically charge pages 55 * @counter: counter 56 * @nr_pages: number of pages to charge 57 * @fail: points first counter to hit its limit, if any 58 * 59 * Returns 0 on success, or -ENOMEM and @fail if the counter or one of 60 * its ancestors has hit its configured limit. 61 */ 62int page_counter_try_charge(struct page_counter *counter, 63 unsigned long nr_pages, 64 struct page_counter **fail) 65{ 66 struct page_counter *c; 67 68 for (c = counter; c; c = c->parent) { 69 long new; 70 /* 71 * Charge speculatively to avoid an expensive CAS. If 72 * a bigger charge fails, it might falsely lock out a 73 * racing smaller charge and send it into reclaim 74 * early, but the error is limited to the difference 75 * between the two sizes, which is less than 2M/4M in 76 * case of a THP locking out a regular page charge. 77 * 78 * The atomic_long_add_return() implies a full memory 79 * barrier between incrementing the count and reading 80 * the limit. When racing with page_counter_limit(), 81 * we either see the new limit or the setter sees the 82 * counter has changed and retries. 83 */ 84 new = atomic_long_add_return(nr_pages, &c->count); 85 if (new > c->limit) { 86 atomic_long_sub(nr_pages, &c->count); 87 /* 88 * This is racy, but we can live with some 89 * inaccuracy in the failcnt. 90 */ 91 c->failcnt++; 92 *fail = c; 93 goto failed; 94 } 95 /* 96 * Just like with failcnt, we can live with some 97 * inaccuracy in the watermark. 98 */ 99 if (new > c->watermark) 100 c->watermark = new; 101 } 102 return 0; 103 104failed: 105 for (c = counter; c != *fail; c = c->parent) 106 page_counter_cancel(c, nr_pages); 107 108 return -ENOMEM; 109} 110 111/** 112 * page_counter_uncharge - hierarchically uncharge pages 113 * @counter: counter 114 * @nr_pages: number of pages to uncharge 115 */ 116void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages) 117{ 118 struct page_counter *c; 119 120 for (c = counter; c; c = c->parent) 121 page_counter_cancel(c, nr_pages); 122} 123 124/** 125 * page_counter_limit - limit the number of pages allowed 126 * @counter: counter 127 * @limit: limit to set 128 * 129 * Returns 0 on success, -EBUSY if the current number of pages on the 130 * counter already exceeds the specified limit. 131 * 132 * The caller must serialize invocations on the same counter. 133 */ 134int page_counter_limit(struct page_counter *counter, unsigned long limit) 135{ 136 for (;;) { 137 unsigned long old; 138 long count; 139 140 /* 141 * Update the limit while making sure that it's not 142 * below the concurrently-changing counter value. 143 * 144 * The xchg implies two full memory barriers before 145 * and after, so the read-swap-read is ordered and 146 * ensures coherency with page_counter_try_charge(): 147 * that function modifies the count before checking 148 * the limit, so if it sees the old limit, we see the 149 * modified counter and retry. 150 */ 151 count = atomic_long_read(&counter->count); 152 153 if (count > limit) 154 return -EBUSY; 155 156 old = xchg(&counter->limit, limit); 157 158 if (atomic_long_read(&counter->count) <= count) 159 return 0; 160 161 counter->limit = old; 162 cond_resched(); 163 } 164} 165 166/** 167 * page_counter_memparse - memparse() for page counter limits 168 * @buf: string to parse 169 * @max: string meaning maximum possible value 170 * @nr_pages: returns the result in number of pages 171 * 172 * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be 173 * limited to %PAGE_COUNTER_MAX. 174 */ 175int page_counter_memparse(const char *buf, const char *max, 176 unsigned long *nr_pages) 177{ 178 char *end; 179 u64 bytes; 180 181 if (!strcmp(buf, max)) { 182 *nr_pages = PAGE_COUNTER_MAX; 183 return 0; 184 } 185 186 bytes = memparse(buf, &end); 187 if (*end != '\0') 188 return -EINVAL; 189 190 *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX); 191 192 return 0; 193} 194