| xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 | 
|  | 2 | /* | 
|  | 3 | * Lockless hierarchical page accounting & limiting | 
|  | 4 | * | 
|  | 5 | * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner | 
|  | 6 | */ | 
|  | 7 |  | 
|  | 8 | #include <linux/page_counter.h> | 
|  | 9 | #include <linux/atomic.h> | 
|  | 10 | #include <linux/kernel.h> | 
|  | 11 | #include <linux/string.h> | 
|  | 12 | #include <linux/sched.h> | 
|  | 13 | #include <linux/bug.h> | 
|  | 14 | #include <asm/page.h> | 
|  | 15 |  | 
|  | 16 | static void propagate_protected_usage(struct page_counter *c, | 
|  | 17 | unsigned long usage) | 
|  | 18 | { | 
|  | 19 | unsigned long protected, old_protected; | 
|  | 20 | long delta; | 
|  | 21 |  | 
|  | 22 | if (!c->parent) | 
|  | 23 | return; | 
|  | 24 |  | 
|  | 25 | if (c->min || atomic_long_read(&c->min_usage)) { | 
|  | 26 | if (usage <= c->min) | 
|  | 27 | protected = usage; | 
|  | 28 | else | 
|  | 29 | protected = 0; | 
|  | 30 |  | 
|  | 31 | old_protected = atomic_long_xchg(&c->min_usage, protected); | 
|  | 32 | delta = protected - old_protected; | 
|  | 33 | if (delta) | 
|  | 34 | atomic_long_add(delta, &c->parent->children_min_usage); | 
|  | 35 | } | 
|  | 36 |  | 
|  | 37 | if (c->low || atomic_long_read(&c->low_usage)) { | 
|  | 38 | if (usage <= c->low) | 
|  | 39 | protected = usage; | 
|  | 40 | else | 
|  | 41 | protected = 0; | 
|  | 42 |  | 
|  | 43 | old_protected = atomic_long_xchg(&c->low_usage, protected); | 
|  | 44 | delta = protected - old_protected; | 
|  | 45 | if (delta) | 
|  | 46 | atomic_long_add(delta, &c->parent->children_low_usage); | 
|  | 47 | } | 
|  | 48 | } | 
|  | 49 |  | 
|  | 50 | /** | 
|  | 51 | * page_counter_cancel - take pages out of the local counter | 
|  | 52 | * @counter: counter | 
|  | 53 | * @nr_pages: number of pages to cancel | 
|  | 54 | */ | 
|  | 55 | void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages) | 
|  | 56 | { | 
|  | 57 | long new; | 
|  | 58 |  | 
|  | 59 | new = atomic_long_sub_return(nr_pages, &counter->usage); | 
|  | 60 | propagate_protected_usage(counter, new); | 
|  | 61 | /* More uncharges than charges? */ | 
|  | 62 | WARN_ON_ONCE(new < 0); | 
|  | 63 | } | 
|  | 64 |  | 
|  | 65 | /** | 
|  | 66 | * page_counter_charge - hierarchically charge pages | 
|  | 67 | * @counter: counter | 
|  | 68 | * @nr_pages: number of pages to charge | 
|  | 69 | * | 
|  | 70 | * NOTE: This does not consider any configured counter limits. | 
|  | 71 | */ | 
|  | 72 | void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) | 
|  | 73 | { | 
|  | 74 | struct page_counter *c; | 
|  | 75 |  | 
|  | 76 | for (c = counter; c; c = c->parent) { | 
|  | 77 | long new; | 
|  | 78 |  | 
|  | 79 | new = atomic_long_add_return(nr_pages, &c->usage); | 
|  | 80 | propagate_protected_usage(counter, new); | 
|  | 81 | /* | 
|  | 82 | * This is indeed racy, but we can live with some | 
|  | 83 | * inaccuracy in the watermark. | 
|  | 84 | */ | 
|  | 85 | if (new > c->watermark) | 
|  | 86 | c->watermark = new; | 
|  | 87 | } | 
|  | 88 | } | 
|  | 89 |  | 
|  | 90 | /** | 
|  | 91 | * page_counter_try_charge - try to hierarchically charge pages | 
|  | 92 | * @counter: counter | 
|  | 93 | * @nr_pages: number of pages to charge | 
|  | 94 | * @fail: points first counter to hit its limit, if any | 
|  | 95 | * | 
|  | 96 | * Returns %true on success, or %false and @fail if the counter or one | 
|  | 97 | * of its ancestors has hit its configured limit. | 
|  | 98 | */ | 
|  | 99 | bool page_counter_try_charge(struct page_counter *counter, | 
|  | 100 | unsigned long nr_pages, | 
|  | 101 | struct page_counter **fail) | 
|  | 102 | { | 
|  | 103 | struct page_counter *c; | 
|  | 104 |  | 
|  | 105 | for (c = counter; c; c = c->parent) { | 
|  | 106 | long new; | 
|  | 107 | /* | 
|  | 108 | * Charge speculatively to avoid an expensive CAS.  If | 
|  | 109 | * a bigger charge fails, it might falsely lock out a | 
|  | 110 | * racing smaller charge and send it into reclaim | 
|  | 111 | * early, but the error is limited to the difference | 
|  | 112 | * between the two sizes, which is less than 2M/4M in | 
|  | 113 | * case of a THP locking out a regular page charge. | 
|  | 114 | * | 
|  | 115 | * The atomic_long_add_return() implies a full memory | 
|  | 116 | * barrier between incrementing the count and reading | 
|  | 117 | * the limit.  When racing with page_counter_limit(), | 
|  | 118 | * we either see the new limit or the setter sees the | 
|  | 119 | * counter has changed and retries. | 
|  | 120 | */ | 
|  | 121 | new = atomic_long_add_return(nr_pages, &c->usage); | 
|  | 122 | if (new > c->max) { | 
|  | 123 | atomic_long_sub(nr_pages, &c->usage); | 
|  | 124 | propagate_protected_usage(counter, new); | 
|  | 125 | /* | 
|  | 126 | * This is racy, but we can live with some | 
|  | 127 | * inaccuracy in the failcnt. | 
|  | 128 | */ | 
|  | 129 | c->failcnt++; | 
|  | 130 | *fail = c; | 
|  | 131 | goto failed; | 
|  | 132 | } | 
|  | 133 | propagate_protected_usage(counter, new); | 
|  | 134 | /* | 
|  | 135 | * Just like with failcnt, we can live with some | 
|  | 136 | * inaccuracy in the watermark. | 
|  | 137 | */ | 
|  | 138 | if (new > c->watermark) | 
|  | 139 | c->watermark = new; | 
|  | 140 | } | 
|  | 141 | return true; | 
|  | 142 |  | 
|  | 143 | failed: | 
|  | 144 | for (c = counter; c != *fail; c = c->parent) | 
|  | 145 | page_counter_cancel(c, nr_pages); | 
|  | 146 |  | 
|  | 147 | return false; | 
|  | 148 | } | 
|  | 149 |  | 
|  | 150 | /** | 
|  | 151 | * page_counter_uncharge - hierarchically uncharge pages | 
|  | 152 | * @counter: counter | 
|  | 153 | * @nr_pages: number of pages to uncharge | 
|  | 154 | */ | 
|  | 155 | void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages) | 
|  | 156 | { | 
|  | 157 | struct page_counter *c; | 
|  | 158 |  | 
|  | 159 | for (c = counter; c; c = c->parent) | 
|  | 160 | page_counter_cancel(c, nr_pages); | 
|  | 161 | } | 
|  | 162 |  | 
|  | 163 | /** | 
|  | 164 | * page_counter_set_max - set the maximum number of pages allowed | 
|  | 165 | * @counter: counter | 
|  | 166 | * @nr_pages: limit to set | 
|  | 167 | * | 
|  | 168 | * Returns 0 on success, -EBUSY if the current number of pages on the | 
|  | 169 | * counter already exceeds the specified limit. | 
|  | 170 | * | 
|  | 171 | * The caller must serialize invocations on the same counter. | 
|  | 172 | */ | 
|  | 173 | int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages) | 
|  | 174 | { | 
|  | 175 | for (;;) { | 
|  | 176 | unsigned long old; | 
|  | 177 | long usage; | 
|  | 178 |  | 
|  | 179 | /* | 
|  | 180 | * Update the limit while making sure that it's not | 
|  | 181 | * below the concurrently-changing counter value. | 
|  | 182 | * | 
|  | 183 | * The xchg implies two full memory barriers before | 
|  | 184 | * and after, so the read-swap-read is ordered and | 
|  | 185 | * ensures coherency with page_counter_try_charge(): | 
|  | 186 | * that function modifies the count before checking | 
|  | 187 | * the limit, so if it sees the old limit, we see the | 
|  | 188 | * modified counter and retry. | 
|  | 189 | */ | 
|  | 190 | usage = atomic_long_read(&counter->usage); | 
|  | 191 |  | 
|  | 192 | if (usage > nr_pages) | 
|  | 193 | return -EBUSY; | 
|  | 194 |  | 
|  | 195 | old = xchg(&counter->max, nr_pages); | 
|  | 196 |  | 
|  | 197 | if (atomic_long_read(&counter->usage) <= usage) | 
|  | 198 | return 0; | 
|  | 199 |  | 
|  | 200 | counter->max = old; | 
|  | 201 | cond_resched(); | 
|  | 202 | } | 
|  | 203 | } | 
|  | 204 |  | 
|  | 205 | /** | 
|  | 206 | * page_counter_set_min - set the amount of protected memory | 
|  | 207 | * @counter: counter | 
|  | 208 | * @nr_pages: value to set | 
|  | 209 | * | 
|  | 210 | * The caller must serialize invocations on the same counter. | 
|  | 211 | */ | 
|  | 212 | void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages) | 
|  | 213 | { | 
|  | 214 | struct page_counter *c; | 
|  | 215 |  | 
|  | 216 | counter->min = nr_pages; | 
|  | 217 |  | 
|  | 218 | for (c = counter; c; c = c->parent) | 
|  | 219 | propagate_protected_usage(c, atomic_long_read(&c->usage)); | 
|  | 220 | } | 
|  | 221 |  | 
|  | 222 | /** | 
|  | 223 | * page_counter_set_low - set the amount of protected memory | 
|  | 224 | * @counter: counter | 
|  | 225 | * @nr_pages: value to set | 
|  | 226 | * | 
|  | 227 | * The caller must serialize invocations on the same counter. | 
|  | 228 | */ | 
|  | 229 | void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages) | 
|  | 230 | { | 
|  | 231 | struct page_counter *c; | 
|  | 232 |  | 
|  | 233 | counter->low = nr_pages; | 
|  | 234 |  | 
|  | 235 | for (c = counter; c; c = c->parent) | 
|  | 236 | propagate_protected_usage(c, atomic_long_read(&c->usage)); | 
|  | 237 | } | 
|  | 238 |  | 
|  | 239 | /** | 
|  | 240 | * page_counter_memparse - memparse() for page counter limits | 
|  | 241 | * @buf: string to parse | 
|  | 242 | * @max: string meaning maximum possible value | 
|  | 243 | * @nr_pages: returns the result in number of pages | 
|  | 244 | * | 
|  | 245 | * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be | 
|  | 246 | * limited to %PAGE_COUNTER_MAX. | 
|  | 247 | */ | 
|  | 248 | int page_counter_memparse(const char *buf, const char *max, | 
|  | 249 | unsigned long *nr_pages) | 
|  | 250 | { | 
|  | 251 | char *end; | 
|  | 252 | u64 bytes; | 
|  | 253 |  | 
|  | 254 | if (!strcmp(buf, max)) { | 
|  | 255 | *nr_pages = PAGE_COUNTER_MAX; | 
|  | 256 | return 0; | 
|  | 257 | } | 
|  | 258 |  | 
|  | 259 | bytes = memparse(buf, &end); | 
|  | 260 | if (*end != '\0') | 
|  | 261 | return -EINVAL; | 
|  | 262 |  | 
|  | 263 | *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX); | 
|  | 264 |  | 
|  | 265 | return 0; | 
|  | 266 | } |