Branch data Line data Source code
1 : : // SPDX-License-Identifier: GPL-2.0
2 : : /*
3 : : * Lockless hierarchical page accounting & limiting
4 : : *
5 : : * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
6 : : */
7 : :
8 : : #include <linux/page_counter.h>
9 : : #include <linux/atomic.h>
10 : : #include <linux/kernel.h>
11 : : #include <linux/string.h>
12 : : #include <linux/sched.h>
13 : : #include <linux/bug.h>
14 : : #include <asm/page.h>
15 : :
16 : 4140 : static void propagate_protected_usage(struct page_counter *c,
17 : : unsigned long usage)
18 : : {
19 : : unsigned long protected, old_protected;
20 : : long delta;
21 : :
22 [ - + ]: 4140 : if (!c->parent)
23 : 4140 : return;
24 : :
25 [ # # # # ]: 0 : if (c->min || atomic_long_read(&c->min_usage)) {
26 [ # # ]: 0 : if (usage <= c->min)
27 : : protected = usage;
28 : : else
29 : : protected = 0;
30 : :
31 : : old_protected = atomic_long_xchg(&c->min_usage, protected);
32 : 0 : delta = protected - old_protected;
33 [ # # ]: 0 : if (delta)
34 : 0 : atomic_long_add(delta, &c->parent->children_min_usage);
35 : : }
36 : :
37 [ # # # # ]: 0 : if (c->low || atomic_long_read(&c->low_usage)) {
38 [ # # ]: 0 : if (usage <= c->low)
39 : : protected = usage;
40 : : else
41 : : protected = 0;
42 : :
43 : : old_protected = atomic_long_xchg(&c->low_usage, protected);
44 : 0 : delta = protected - old_protected;
45 [ # # ]: 0 : if (delta)
46 : 0 : atomic_long_add(delta, &c->parent->children_low_usage);
47 : : }
48 : : }
49 : :
50 : : /**
51 : : * page_counter_cancel - take pages out of the local counter
52 : : * @counter: counter
53 : : * @nr_pages: number of pages to cancel
54 : : */
55 : 0 : void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
56 : : {
57 : : long new;
58 : :
59 : 0 : new = atomic_long_sub_return(nr_pages, &counter->usage);
60 : 0 : propagate_protected_usage(counter, new);
61 : : /* More uncharges than charges? */
62 [ # # # # ]: 0 : WARN_ON_ONCE(new < 0);
63 : 0 : }
64 : :
65 : : /**
66 : : * page_counter_charge - hierarchically charge pages
67 : : * @counter: counter
68 : : * @nr_pages: number of pages to charge
69 : : *
70 : : * NOTE: This does not consider any configured counter limits.
71 : : */
72 : 0 : void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
73 : : {
74 : : struct page_counter *c;
75 : :
76 [ # # ]: 0 : for (c = counter; c; c = c->parent) {
77 : : long new;
78 : :
79 : 0 : new = atomic_long_add_return(nr_pages, &c->usage);
80 : 0 : propagate_protected_usage(c, new);
81 : : /*
82 : : * This is indeed racy, but we can live with some
83 : : * inaccuracy in the watermark.
84 : : */
85 [ # # ]: 0 : if (new > c->watermark)
86 : 0 : c->watermark = new;
87 : : }
88 : 0 : }
89 : :
90 : : /**
91 : : * page_counter_try_charge - try to hierarchically charge pages
92 : : * @counter: counter
93 : : * @nr_pages: number of pages to charge
94 : : * @fail: points first counter to hit its limit, if any
95 : : *
96 : : * Returns %true on success, or %false and @fail if the counter or one
97 : : * of its ancestors has hit its configured limit.
98 : : */
99 : 0 : bool page_counter_try_charge(struct page_counter *counter,
100 : : unsigned long nr_pages,
101 : : struct page_counter **fail)
102 : : {
103 : : struct page_counter *c;
104 : :
105 [ # # ]: 0 : for (c = counter; c; c = c->parent) {
106 : : long new;
107 : : /*
108 : : * Charge speculatively to avoid an expensive CAS. If
109 : : * a bigger charge fails, it might falsely lock out a
110 : : * racing smaller charge and send it into reclaim
111 : : * early, but the error is limited to the difference
112 : : * between the two sizes, which is less than 2M/4M in
113 : : * case of a THP locking out a regular page charge.
114 : : *
115 : : * The atomic_long_add_return() implies a full memory
116 : : * barrier between incrementing the count and reading
117 : : * the limit. When racing with page_counter_limit(),
118 : : * we either see the new limit or the setter sees the
119 : : * counter has changed and retries.
120 : : */
121 : 0 : new = atomic_long_add_return(nr_pages, &c->usage);
122 [ # # ]: 0 : if (new > c->max) {
123 : 0 : atomic_long_sub(nr_pages, &c->usage);
124 : 0 : propagate_protected_usage(c, new);
125 : : /*
126 : : * This is racy, but we can live with some
127 : : * inaccuracy in the failcnt.
128 : : */
129 : 0 : c->failcnt++;
130 : 0 : *fail = c;
131 : : goto failed;
132 : : }
133 : 0 : propagate_protected_usage(c, new);
134 : : /*
135 : : * Just like with failcnt, we can live with some
136 : : * inaccuracy in the watermark.
137 : : */
138 [ # # ]: 0 : if (new > c->watermark)
139 : 0 : c->watermark = new;
140 : : }
141 : : return true;
142 : :
143 : : failed:
144 [ # # ]: 0 : for (c = counter; c != *fail; c = c->parent)
145 : 0 : page_counter_cancel(c, nr_pages);
146 : :
147 : : return false;
148 : : }
149 : :
150 : : /**
151 : : * page_counter_uncharge - hierarchically uncharge pages
152 : : * @counter: counter
153 : : * @nr_pages: number of pages to uncharge
154 : : */
155 : 0 : void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
156 : : {
157 : : struct page_counter *c;
158 : :
159 [ # # ]: 0 : for (c = counter; c; c = c->parent)
160 : 0 : page_counter_cancel(c, nr_pages);
161 : 0 : }
162 : :
163 : : /**
164 : : * page_counter_set_max - set the maximum number of pages allowed
165 : : * @counter: counter
166 : : * @nr_pages: limit to set
167 : : *
168 : : * Returns 0 on success, -EBUSY if the current number of pages on the
169 : : * counter already exceeds the specified limit.
170 : : *
171 : : * The caller must serialize invocations on the same counter.
172 : : */
173 : 10350 : int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
174 : : {
175 : : for (;;) {
176 : : unsigned long old;
177 : : long usage;
178 : :
179 : : /*
180 : : * Update the limit while making sure that it's not
181 : : * below the concurrently-changing counter value.
182 : : *
183 : : * The xchg implies two full memory barriers before
184 : : * and after, so the read-swap-read is ordered and
185 : : * ensures coherency with page_counter_try_charge():
186 : : * that function modifies the count before checking
187 : : * the limit, so if it sees the old limit, we see the
188 : : * modified counter and retry.
189 : : */
190 : : usage = atomic_long_read(&counter->usage);
191 : :
192 [ + - ]: 10350 : if (usage > nr_pages)
193 : : return -EBUSY;
194 : :
195 : 10350 : old = xchg(&counter->max, nr_pages);
196 : :
197 [ - + ]: 10350 : if (atomic_long_read(&counter->usage) <= usage)
198 : : return 0;
199 : :
200 : 0 : counter->max = old;
201 : 0 : cond_resched();
202 : 0 : }
203 : : }
204 : :
205 : : /**
206 : : * page_counter_set_min - set the amount of protected memory
207 : : * @counter: counter
208 : : * @nr_pages: value to set
209 : : *
210 : : * The caller must serialize invocations on the same counter.
211 : : */
212 : 2070 : void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
213 : : {
214 : : struct page_counter *c;
215 : :
216 : 2070 : counter->min = nr_pages;
217 : :
218 [ + + ]: 4140 : for (c = counter; c; c = c->parent)
219 : 2070 : propagate_protected_usage(c, atomic_long_read(&c->usage));
220 : 2070 : }
221 : :
222 : : /**
223 : : * page_counter_set_low - set the amount of protected memory
224 : : * @counter: counter
225 : : * @nr_pages: value to set
226 : : *
227 : : * The caller must serialize invocations on the same counter.
228 : : */
229 : 2070 : void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
230 : : {
231 : : struct page_counter *c;
232 : :
233 : 2070 : counter->low = nr_pages;
234 : :
235 [ + + ]: 4140 : for (c = counter; c; c = c->parent)
236 : 2070 : propagate_protected_usage(c, atomic_long_read(&c->usage));
237 : 2070 : }
238 : :
239 : : /**
240 : : * page_counter_memparse - memparse() for page counter limits
241 : : * @buf: string to parse
242 : : * @max: string meaning maximum possible value
243 : : * @nr_pages: returns the result in number of pages
244 : : *
245 : : * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be
246 : : * limited to %PAGE_COUNTER_MAX.
247 : : */
248 : 0 : int page_counter_memparse(const char *buf, const char *max,
249 : : unsigned long *nr_pages)
250 : : {
251 : : char *end;
252 : : u64 bytes;
253 : :
254 [ # # ]: 0 : if (!strcmp(buf, max)) {
255 : 0 : *nr_pages = PAGE_COUNTER_MAX;
256 : 0 : return 0;
257 : : }
258 : :
259 : 0 : bytes = memparse(buf, &end);
260 [ # # ]: 0 : if (*end != '\0')
261 : : return -EINVAL;
262 : :
263 : 0 : *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
264 : :
265 : 0 : return 0;
266 : : }
|