Branch data Line data Source code
1 : : // SPDX-License-Identifier: GPL-2.0-only
2 : : /*
3 : : * mm/percpu-vm.c - vmalloc area based chunk allocation
4 : : *
5 : : * Copyright (C) 2010 SUSE Linux Products GmbH
6 : : * Copyright (C) 2010 Tejun Heo <tj@kernel.org>
7 : : *
8 : : * Chunks are mapped into vmalloc areas and populated page by page.
9 : : * This is the default chunk allocator.
10 : : */
11 : :
12 : 0 : static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
13 : : unsigned int cpu, int page_idx)
14 : : {
15 : : /* must not be used on pre-mapped chunk */
16 : 0 : WARN_ON(chunk->immutable);
17 : :
18 : 0 : return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx));
19 : : }
20 : :
21 : : /**
22 : : * pcpu_get_pages - get temp pages array
23 : : *
24 : : * Returns pointer to array of pointers to struct page which can be indexed
25 : : * with pcpu_page_idx(). Note that there is only one array and accesses
26 : : * should be serialized by pcpu_alloc_mutex.
27 : : *
28 : : * RETURNS:
29 : : * Pointer to temp pages array on success.
30 : : */
31 : 3 : static struct page **pcpu_get_pages(void)
32 : : {
33 : : static struct page **pages;
34 : 3 : size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
35 : :
36 : : lockdep_assert_held(&pcpu_alloc_mutex);
37 : :
38 : 3 : if (!pages)
39 : 3 : pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL);
40 : 3 : return pages;
41 : : }
42 : :
43 : : /**
44 : : * pcpu_free_pages - free pages which were allocated for @chunk
45 : : * @chunk: chunk pages were allocated for
46 : : * @pages: array of pages to be freed, indexed by pcpu_page_idx()
47 : : * @page_start: page index of the first page to be freed
48 : : * @page_end: page index of the last page to be freed + 1
49 : : *
50 : : * Free pages [@page_start and @page_end) in @pages for all units.
51 : : * The pages were allocated for @chunk.
52 : : */
53 : 0 : static void pcpu_free_pages(struct pcpu_chunk *chunk,
54 : : struct page **pages, int page_start, int page_end)
55 : : {
56 : : unsigned int cpu;
57 : : int i;
58 : :
59 : 0 : for_each_possible_cpu(cpu) {
60 : 0 : for (i = page_start; i < page_end; i++) {
61 : 0 : struct page *page = pages[pcpu_page_idx(cpu, i)];
62 : :
63 : 0 : if (page)
64 : 0 : __free_page(page);
65 : : }
66 : : }
67 : 0 : }
68 : :
69 : : /**
70 : : * pcpu_alloc_pages - allocates pages for @chunk
71 : : * @chunk: target chunk
72 : : * @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
73 : : * @page_start: page index of the first page to be allocated
74 : : * @page_end: page index of the last page to be allocated + 1
75 : : * @gfp: allocation flags passed to the underlying allocator
76 : : *
77 : : * Allocate pages [@page_start,@page_end) into @pages for all units.
78 : : * The allocation is for @chunk. Percpu core doesn't care about the
79 : : * content of @pages and will pass it verbatim to pcpu_map_pages().
80 : : */
81 : 3 : static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
82 : : struct page **pages, int page_start, int page_end,
83 : : gfp_t gfp)
84 : : {
85 : : unsigned int cpu, tcpu;
86 : : int i;
87 : :
88 : 3 : gfp |= __GFP_HIGHMEM;
89 : :
90 : 3 : for_each_possible_cpu(cpu) {
91 : 3 : for (i = page_start; i < page_end; i++) {
92 : 3 : struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
93 : :
94 : 3 : *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0);
95 : 3 : if (!*pagep)
96 : : goto err;
97 : : }
98 : : }
99 : : return 0;
100 : :
101 : : err:
102 : 0 : while (--i >= page_start)
103 : 0 : __free_page(pages[pcpu_page_idx(cpu, i)]);
104 : :
105 : 0 : for_each_possible_cpu(tcpu) {
106 : 0 : if (tcpu == cpu)
107 : : break;
108 : 0 : for (i = page_start; i < page_end; i++)
109 : 0 : __free_page(pages[pcpu_page_idx(tcpu, i)]);
110 : : }
111 : : return -ENOMEM;
112 : : }
113 : :
114 : : /**
115 : : * pcpu_pre_unmap_flush - flush cache prior to unmapping
116 : : * @chunk: chunk the regions to be flushed belongs to
117 : : * @page_start: page index of the first page to be flushed
118 : : * @page_end: page index of the last page to be flushed + 1
119 : : *
120 : : * Pages in [@page_start,@page_end) of @chunk are about to be
121 : : * unmapped. Flush cache. As each flushing trial can be very
122 : : * expensive, issue flush on the whole region at once rather than
123 : : * doing it for each cpu. This could be an overkill but is more
124 : : * scalable.
125 : : */
126 : : static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
127 : : int page_start, int page_end)
128 : : {
129 : : flush_cache_vunmap(
130 : : pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
131 : : pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
132 : : }
133 : :
134 : : static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
135 : : {
136 : 0 : unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT);
137 : : }
138 : :
139 : : /**
140 : : * pcpu_unmap_pages - unmap pages out of a pcpu_chunk
141 : : * @chunk: chunk of interest
142 : : * @pages: pages array which can be used to pass information to free
143 : : * @page_start: page index of the first page to unmap
144 : : * @page_end: page index of the last page to unmap + 1
145 : : *
146 : : * For each cpu, unmap pages [@page_start,@page_end) out of @chunk.
147 : : * Corresponding elements in @pages were cleared by the caller and can
148 : : * be used to carry information to pcpu_free_pages() which will be
149 : : * called after all unmaps are finished. The caller should call
150 : : * proper pre/post flush functions.
151 : : */
152 : 0 : static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
153 : : struct page **pages, int page_start, int page_end)
154 : : {
155 : : unsigned int cpu;
156 : : int i;
157 : :
158 : 0 : for_each_possible_cpu(cpu) {
159 : 0 : for (i = page_start; i < page_end; i++) {
160 : : struct page *page;
161 : :
162 : 0 : page = pcpu_chunk_page(chunk, cpu, i);
163 : 0 : WARN_ON(!page);
164 : 0 : pages[pcpu_page_idx(cpu, i)] = page;
165 : : }
166 : 0 : __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start),
167 : : page_end - page_start);
168 : : }
169 : 0 : }
170 : :
171 : : /**
172 : : * pcpu_post_unmap_tlb_flush - flush TLB after unmapping
173 : : * @chunk: pcpu_chunk the regions to be flushed belong to
174 : : * @page_start: page index of the first page to be flushed
175 : : * @page_end: page index of the last page to be flushed + 1
176 : : *
177 : : * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush
178 : : * TLB for the regions. This can be skipped if the area is to be
179 : : * returned to vmalloc as vmalloc will handle TLB flushing lazily.
180 : : *
181 : : * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
182 : : * for the whole region.
183 : : */
184 : 0 : static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
185 : : int page_start, int page_end)
186 : : {
187 : 0 : flush_tlb_kernel_range(
188 : : pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
189 : : pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
190 : 0 : }
191 : :
192 : : static int __pcpu_map_pages(unsigned long addr, struct page **pages,
193 : : int nr_pages)
194 : : {
195 : 3 : return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT,
196 : : PAGE_KERNEL, pages);
197 : : }
198 : :
199 : : /**
200 : : * pcpu_map_pages - map pages into a pcpu_chunk
201 : : * @chunk: chunk of interest
202 : : * @pages: pages array containing pages to be mapped
203 : : * @page_start: page index of the first page to map
204 : : * @page_end: page index of the last page to map + 1
205 : : *
206 : : * For each cpu, map pages [@page_start,@page_end) into @chunk. The
207 : : * caller is responsible for calling pcpu_post_map_flush() after all
208 : : * mappings are complete.
209 : : *
210 : : * This function is responsible for setting up whatever is necessary for
211 : : * reverse lookup (addr -> chunk).
212 : : */
213 : 3 : static int pcpu_map_pages(struct pcpu_chunk *chunk,
214 : : struct page **pages, int page_start, int page_end)
215 : : {
216 : : unsigned int cpu, tcpu;
217 : : int i, err;
218 : :
219 : 3 : for_each_possible_cpu(cpu) {
220 : 3 : err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start),
221 : 3 : &pages[pcpu_page_idx(cpu, page_start)],
222 : : page_end - page_start);
223 : 3 : if (err < 0)
224 : : goto err;
225 : :
226 : 3 : for (i = page_start; i < page_end; i++)
227 : 3 : pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)],
228 : : chunk);
229 : : }
230 : : return 0;
231 : : err:
232 : 0 : for_each_possible_cpu(tcpu) {
233 : 0 : if (tcpu == cpu)
234 : : break;
235 : : __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
236 : : page_end - page_start);
237 : : }
238 : 0 : pcpu_post_unmap_tlb_flush(chunk, page_start, page_end);
239 : 0 : return err;
240 : : }
241 : :
242 : : /**
243 : : * pcpu_post_map_flush - flush cache after mapping
244 : : * @chunk: pcpu_chunk the regions to be flushed belong to
245 : : * @page_start: page index of the first page to be flushed
246 : : * @page_end: page index of the last page to be flushed + 1
247 : : *
248 : : * Pages [@page_start,@page_end) of @chunk have been mapped. Flush
249 : : * cache.
250 : : *
251 : : * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
252 : : * for the whole region.
253 : : */
254 : : static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
255 : : int page_start, int page_end)
256 : : {
257 : : flush_cache_vmap(
258 : : pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
259 : : pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
260 : : }
261 : :
262 : : /**
263 : : * pcpu_populate_chunk - populate and map an area of a pcpu_chunk
264 : : * @chunk: chunk of interest
265 : : * @page_start: the start page
266 : : * @page_end: the end page
267 : : * @gfp: allocation flags passed to the underlying memory allocator
268 : : *
269 : : * For each cpu, populate and map pages [@page_start,@page_end) into
270 : : * @chunk.
271 : : *
272 : : * CONTEXT:
273 : : * pcpu_alloc_mutex, does GFP_KERNEL allocation.
274 : : */
275 : 3 : static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
276 : : int page_start, int page_end, gfp_t gfp)
277 : : {
278 : : struct page **pages;
279 : :
280 : 3 : pages = pcpu_get_pages();
281 : 3 : if (!pages)
282 : : return -ENOMEM;
283 : :
284 : 3 : if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp))
285 : : return -ENOMEM;
286 : :
287 : 3 : if (pcpu_map_pages(chunk, pages, page_start, page_end)) {
288 : 0 : pcpu_free_pages(chunk, pages, page_start, page_end);
289 : 0 : return -ENOMEM;
290 : : }
291 : : pcpu_post_map_flush(chunk, page_start, page_end);
292 : :
293 : : return 0;
294 : : }
295 : :
296 : : /**
297 : : * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk
298 : : * @chunk: chunk to depopulate
299 : : * @page_start: the start page
300 : : * @page_end: the end page
301 : : *
302 : : * For each cpu, depopulate and unmap pages [@page_start,@page_end)
303 : : * from @chunk.
304 : : *
305 : : * CONTEXT:
306 : : * pcpu_alloc_mutex.
307 : : */
308 : 0 : static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
309 : : int page_start, int page_end)
310 : : {
311 : : struct page **pages;
312 : :
313 : : /*
314 : : * If control reaches here, there must have been at least one
315 : : * successful population attempt so the temp pages array must
316 : : * be available now.
317 : : */
318 : 0 : pages = pcpu_get_pages();
319 : 0 : BUG_ON(!pages);
320 : :
321 : : /* unmap and free */
322 : : pcpu_pre_unmap_flush(chunk, page_start, page_end);
323 : :
324 : 0 : pcpu_unmap_pages(chunk, pages, page_start, page_end);
325 : :
326 : : /* no need to flush tlb, vmalloc will handle it lazily */
327 : :
328 : 0 : pcpu_free_pages(chunk, pages, page_start, page_end);
329 : 0 : }
330 : :
331 : 3 : static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
332 : : {
333 : : struct pcpu_chunk *chunk;
334 : : struct vm_struct **vms;
335 : :
336 : 3 : chunk = pcpu_alloc_chunk(gfp);
337 : 3 : if (!chunk)
338 : : return NULL;
339 : :
340 : 3 : vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes,
341 : : pcpu_nr_groups, pcpu_atom_size);
342 : 3 : if (!vms) {
343 : 0 : pcpu_free_chunk(chunk);
344 : 0 : return NULL;
345 : : }
346 : :
347 : 3 : chunk->data = vms;
348 : 3 : chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0];
349 : :
350 : : pcpu_stats_chunk_alloc();
351 : 3 : trace_percpu_create_chunk(chunk->base_addr);
352 : :
353 : 3 : return chunk;
354 : : }
355 : :
356 : 0 : static void pcpu_destroy_chunk(struct pcpu_chunk *chunk)
357 : : {
358 : 0 : if (!chunk)
359 : 0 : return;
360 : :
361 : : pcpu_stats_chunk_dealloc();
362 : 0 : trace_percpu_destroy_chunk(chunk->base_addr);
363 : :
364 : 0 : if (chunk->data)
365 : 0 : pcpu_free_vm_areas(chunk->data, pcpu_nr_groups);
366 : 0 : pcpu_free_chunk(chunk);
367 : : }
368 : :
369 : : static struct page *pcpu_addr_to_page(void *addr)
370 : : {
371 : 3 : return vmalloc_to_page(addr);
372 : : }
373 : :
374 : : static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai)
375 : : {
376 : : /* no extra restriction */
377 : : return 0;
378 : : }
|