Branch data Line data Source code
1 : : // SPDX-License-Identifier: GPL-2.0-or-later
2 : : /*
3 : : * Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 : : *
5 : : * membarrier system call
6 : : */
7 : : #include "sched.h"
8 : :
9 : : /*
10 : : * Bitmask made from a "or" of all commands within enum membarrier_cmd,
11 : : * except MEMBARRIER_CMD_QUERY.
12 : : */
13 : : #ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE
14 : : #define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
15 : : (MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \
16 : : | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
17 : : #else
18 : : #define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK 0
19 : : #endif
20 : :
21 : : #define MEMBARRIER_CMD_BITMASK \
22 : : (MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \
23 : : | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
24 : : | MEMBARRIER_CMD_PRIVATE_EXPEDITED \
25 : : | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
26 : : | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
27 : :
28 : 0 : static void ipi_mb(void *info)
29 : : {
30 : 0 : smp_mb(); /* IPIs should be serializing but paranoid. */
31 : 0 : }
32 : :
33 : 0 : static void ipi_sync_rq_state(void *info)
34 : : {
35 : 0 : struct mm_struct *mm = (struct mm_struct *) info;
36 : :
37 [ # # ]: 0 : if (current->mm != mm)
38 : : return;
39 : 0 : this_cpu_write(runqueues.membarrier_state,
40 : : atomic_read(&mm->membarrier_state));
41 : : /*
42 : : * Issue a memory barrier after setting
43 : : * MEMBARRIER_STATE_GLOBAL_EXPEDITED in the current runqueue to
44 : : * guarantee that no memory access following registration is reordered
45 : : * before registration.
46 : : */
47 : 0 : smp_mb();
48 : : }
49 : :
50 : 13950 : void membarrier_exec_mmap(struct mm_struct *mm)
51 : : {
52 : : /*
53 : : * Issue a memory barrier before clearing membarrier_state to
54 : : * guarantee that no memory access prior to exec is reordered after
55 : : * clearing this state.
56 : : */
57 : 13950 : smp_mb();
58 : 13950 : atomic_set(&mm->membarrier_state, 0);
59 : : /*
60 : : * Keep the runqueue membarrier_state in sync with this mm
61 : : * membarrier_state.
62 : : */
63 : 13950 : this_cpu_write(runqueues.membarrier_state, 0);
64 : 13950 : }
65 : :
66 : 0 : static int membarrier_global_expedited(void)
67 : : {
68 : 0 : int cpu;
69 : 0 : cpumask_var_t tmpmask;
70 : :
71 [ # # ]: 0 : if (num_online_cpus() == 1)
72 : : return 0;
73 : :
74 : : /*
75 : : * Matches memory barriers around rq->curr modification in
76 : : * scheduler.
77 : : */
78 : 0 : smp_mb(); /* system call entry is not a mb. */
79 : :
80 : 0 : if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
81 : : return -ENOMEM;
82 : :
83 : 0 : cpus_read_lock();
84 : 0 : rcu_read_lock();
85 [ # # ]: 0 : for_each_online_cpu(cpu) {
86 : 0 : struct task_struct *p;
87 : :
88 : : /*
89 : : * Skipping the current CPU is OK even through we can be
90 : : * migrated at any point. The current CPU, at the point
91 : : * where we read raw_smp_processor_id(), is ensured to
92 : : * be in program order with respect to the caller
93 : : * thread. Therefore, we can skip this CPU from the
94 : : * iteration.
95 : : */
96 [ # # ]: 0 : if (cpu == raw_smp_processor_id())
97 : 0 : continue;
98 : :
99 [ # # ]: 0 : if (!(READ_ONCE(cpu_rq(cpu)->membarrier_state) &
100 : : MEMBARRIER_STATE_GLOBAL_EXPEDITED))
101 : 0 : continue;
102 : :
103 : : /*
104 : : * Skip the CPU if it runs a kernel thread. The scheduler
105 : : * leaves the prior task mm in place as an optimization when
106 : : * scheduling a kthread.
107 : : */
108 [ # # ]: 0 : p = rcu_dereference(cpu_rq(cpu)->curr);
109 [ # # ]: 0 : if (p->flags & PF_KTHREAD)
110 : 0 : continue;
111 : :
112 : 0 : __cpumask_set_cpu(cpu, tmpmask);
113 : : }
114 : 0 : rcu_read_unlock();
115 : :
116 : 0 : preempt_disable();
117 : 0 : smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
118 : 0 : preempt_enable();
119 : :
120 : 0 : free_cpumask_var(tmpmask);
121 : 0 : cpus_read_unlock();
122 : :
123 : : /*
124 : : * Memory barrier on the caller thread _after_ we finished
125 : : * waiting for the last IPI. Matches memory barriers around
126 : : * rq->curr modification in scheduler.
127 : : */
128 : 0 : smp_mb(); /* exit from system call is not a mb */
129 : 0 : return 0;
130 : : }
131 : :
132 : 0 : static int membarrier_private_expedited(int flags)
133 : : {
134 : 0 : int cpu;
135 : 0 : cpumask_var_t tmpmask;
136 [ # # ]: 0 : struct mm_struct *mm = current->mm;
137 : :
138 [ # # ]: 0 : if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
139 : 0 : if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
140 : : return -EINVAL;
141 [ # # ]: 0 : if (!(atomic_read(&mm->membarrier_state) &
142 : : MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
143 : : return -EPERM;
144 : : } else {
145 [ # # ]: 0 : if (!(atomic_read(&mm->membarrier_state) &
146 : : MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
147 : : return -EPERM;
148 : : }
149 : :
150 [ # # # # ]: 0 : if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1)
151 : 0 : return 0;
152 : :
153 : : /*
154 : : * Matches memory barriers around rq->curr modification in
155 : : * scheduler.
156 : : */
157 : 0 : smp_mb(); /* system call entry is not a mb. */
158 : :
159 : 0 : if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
160 : : return -ENOMEM;
161 : :
162 : 0 : cpus_read_lock();
163 : 0 : rcu_read_lock();
164 [ # # ]: 0 : for_each_online_cpu(cpu) {
165 : 0 : struct task_struct *p;
166 : :
167 : : /*
168 : : * Skipping the current CPU is OK even through we can be
169 : : * migrated at any point. The current CPU, at the point
170 : : * where we read raw_smp_processor_id(), is ensured to
171 : : * be in program order with respect to the caller
172 : : * thread. Therefore, we can skip this CPU from the
173 : : * iteration.
174 : : */
175 [ # # ]: 0 : if (cpu == raw_smp_processor_id())
176 : 0 : continue;
177 [ # # ]: 0 : p = rcu_dereference(cpu_rq(cpu)->curr);
178 [ # # # # ]: 0 : if (p && p->mm == mm)
179 : 0 : __cpumask_set_cpu(cpu, tmpmask);
180 : : }
181 : 0 : rcu_read_unlock();
182 : :
183 : 0 : preempt_disable();
184 : 0 : smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
185 : 0 : preempt_enable();
186 : :
187 : 0 : free_cpumask_var(tmpmask);
188 : 0 : cpus_read_unlock();
189 : :
190 : : /*
191 : : * Memory barrier on the caller thread _after_ we finished
192 : : * waiting for the last IPI. Matches memory barriers around
193 : : * rq->curr modification in scheduler.
194 : : */
195 : 0 : smp_mb(); /* exit from system call is not a mb */
196 : :
197 : 0 : return 0;
198 : : }
199 : :
200 : 0 : static int sync_runqueues_membarrier_state(struct mm_struct *mm)
201 : : {
202 : 0 : int membarrier_state = atomic_read(&mm->membarrier_state);
203 : 0 : cpumask_var_t tmpmask;
204 : 0 : int cpu;
205 : :
206 [ # # # # ]: 0 : if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1) {
207 : 0 : this_cpu_write(runqueues.membarrier_state, membarrier_state);
208 : :
209 : : /*
210 : : * For single mm user, we can simply issue a memory barrier
211 : : * after setting MEMBARRIER_STATE_GLOBAL_EXPEDITED in the
212 : : * mm and in the current runqueue to guarantee that no memory
213 : : * access following registration is reordered before
214 : : * registration.
215 : : */
216 : 0 : smp_mb();
217 : 0 : return 0;
218 : : }
219 : :
220 : 0 : if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
221 : : return -ENOMEM;
222 : :
223 : : /*
224 : : * For mm with multiple users, we need to ensure all future
225 : : * scheduler executions will observe @mm's new membarrier
226 : : * state.
227 : : */
228 : 0 : synchronize_rcu();
229 : :
230 : : /*
231 : : * For each cpu runqueue, if the task's mm match @mm, ensure that all
232 : : * @mm's membarrier state set bits are also set in in the runqueue's
233 : : * membarrier state. This ensures that a runqueue scheduling
234 : : * between threads which are users of @mm has its membarrier state
235 : : * updated.
236 : : */
237 : 0 : cpus_read_lock();
238 : 0 : rcu_read_lock();
239 [ # # ]: 0 : for_each_online_cpu(cpu) {
240 : 0 : struct rq *rq = cpu_rq(cpu);
241 : 0 : struct task_struct *p;
242 : :
243 [ # # ]: 0 : p = rcu_dereference(rq->curr);
244 [ # # # # ]: 0 : if (p && p->mm == mm)
245 : 0 : __cpumask_set_cpu(cpu, tmpmask);
246 : : }
247 : 0 : rcu_read_unlock();
248 : :
249 : 0 : preempt_disable();
250 : 0 : smp_call_function_many(tmpmask, ipi_sync_rq_state, mm, 1);
251 : 0 : preempt_enable();
252 : :
253 : 0 : free_cpumask_var(tmpmask);
254 : 0 : cpus_read_unlock();
255 : :
256 : 0 : return 0;
257 : : }
258 : :
259 : 0 : static int membarrier_register_global_expedited(void)
260 : : {
261 : 0 : struct task_struct *p = current;
262 : 0 : struct mm_struct *mm = p->mm;
263 : 0 : int ret;
264 : :
265 [ # # ]: 0 : if (atomic_read(&mm->membarrier_state) &
266 : : MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
267 : : return 0;
268 : 0 : atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
269 : 0 : ret = sync_runqueues_membarrier_state(mm);
270 [ # # ]: 0 : if (ret)
271 : : return ret;
272 : 0 : atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
273 : : &mm->membarrier_state);
274 : :
275 : 0 : return 0;
276 : : }
277 : :
278 : 0 : static int membarrier_register_private_expedited(int flags)
279 : : {
280 [ # # ]: 0 : struct task_struct *p = current;
281 : 0 : struct mm_struct *mm = p->mm;
282 : 0 : int ready_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
283 : : set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED,
284 : : ret;
285 : :
286 [ # # ]: 0 : if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
287 : 0 : if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
288 : : return -EINVAL;
289 : 0 : ready_state =
290 : : MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
291 : : }
292 : :
293 : : /*
294 : : * We need to consider threads belonging to different thread
295 : : * groups, which use the same mm. (CLONE_VM but not
296 : : * CLONE_THREAD).
297 : : */
298 [ # # ]: 0 : if ((atomic_read(&mm->membarrier_state) & ready_state) == ready_state)
299 : : return 0;
300 [ # # ]: 0 : if (flags & MEMBARRIER_FLAG_SYNC_CORE)
301 : 0 : set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE;
302 : 0 : atomic_or(set_state, &mm->membarrier_state);
303 : 0 : ret = sync_runqueues_membarrier_state(mm);
304 [ # # ]: 0 : if (ret)
305 : : return ret;
306 : 0 : atomic_or(ready_state, &mm->membarrier_state);
307 : :
308 : 0 : return 0;
309 : : }
310 : :
311 : : /**
312 : : * sys_membarrier - issue memory barriers on a set of threads
313 : : * @cmd: Takes command values defined in enum membarrier_cmd.
314 : : * @flags: Currently needs to be 0. For future extensions.
315 : : *
316 : : * If this system call is not implemented, -ENOSYS is returned. If the
317 : : * command specified does not exist, not available on the running
318 : : * kernel, or if the command argument is invalid, this system call
319 : : * returns -EINVAL. For a given command, with flags argument set to 0,
320 : : * if this system call returns -ENOSYS or -EINVAL, it is guaranteed to
321 : : * always return the same value until reboot. In addition, it can return
322 : : * -ENOMEM if there is not enough memory available to perform the system
323 : : * call.
324 : : *
325 : : * All memory accesses performed in program order from each targeted thread
326 : : * is guaranteed to be ordered with respect to sys_membarrier(). If we use
327 : : * the semantic "barrier()" to represent a compiler barrier forcing memory
328 : : * accesses to be performed in program order across the barrier, and
329 : : * smp_mb() to represent explicit memory barriers forcing full memory
330 : : * ordering across the barrier, we have the following ordering table for
331 : : * each pair of barrier(), sys_membarrier() and smp_mb():
332 : : *
333 : : * The pair ordering is detailed as (O: ordered, X: not ordered):
334 : : *
335 : : * barrier() smp_mb() sys_membarrier()
336 : : * barrier() X X O
337 : : * smp_mb() X O O
338 : : * sys_membarrier() O O O
339 : : */
340 : 0 : SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
341 : : {
342 [ # # ]: 0 : if (unlikely(flags))
343 : : return -EINVAL;
344 [ # # # # : 0 : switch (cmd) {
# # # #
# ]
345 : : case MEMBARRIER_CMD_QUERY:
346 : : {
347 : : int cmd_mask = MEMBARRIER_CMD_BITMASK;
348 : :
349 : : if (tick_nohz_full_enabled())
350 : : cmd_mask &= ~MEMBARRIER_CMD_GLOBAL;
351 : : return cmd_mask;
352 : : }
353 : : case MEMBARRIER_CMD_GLOBAL:
354 : : /* MEMBARRIER_CMD_GLOBAL is not compatible with nohz_full. */
355 : 0 : if (tick_nohz_full_enabled())
356 : : return -EINVAL;
357 [ # # ]: 0 : if (num_online_cpus() > 1)
358 : 0 : synchronize_rcu();
359 : : return 0;
360 : 0 : case MEMBARRIER_CMD_GLOBAL_EXPEDITED:
361 : 0 : return membarrier_global_expedited();
362 : 0 : case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
363 : 0 : return membarrier_register_global_expedited();
364 : 0 : case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
365 : 0 : return membarrier_private_expedited(0);
366 : 0 : case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
367 : 0 : return membarrier_register_private_expedited(0);
368 : 0 : case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
369 : 0 : return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
370 : 0 : case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
371 : 0 : return membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
372 : 0 : default:
373 : 0 : return -EINVAL;
374 : : }
375 : : }
|