Branch data Line data Source code
1 : : /*
2 : : * Copyright © 2017 Intel Corporation
3 : : *
4 : : * Permission is hereby granted, free of charge, to any person obtaining a
5 : : * copy of this software and associated documentation files (the "Software"),
6 : : * to deal in the Software without restriction, including without limitation
7 : : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : : * and/or sell copies of the Software, and to permit persons to whom the
9 : : * Software is furnished to do so, subject to the following conditions:
10 : : *
11 : : * The above copyright notice and this permission notice (including the next
12 : : * paragraph) shall be included in all copies or substantial portions of the
13 : : * Software.
14 : : *
15 : : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 : : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 : : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 : : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 : : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 : : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 : : * IN THE SOFTWARE.
22 : : *
23 : : */
24 : :
25 : : #include <linux/slab.h>
26 : :
27 : : #include "i915_syncmap.h"
28 : :
29 : : #include "i915_gem.h" /* GEM_BUG_ON() */
30 : : #include "i915_selftest.h"
31 : :
32 : : #define SHIFT ilog2(KSYNCMAP)
33 : : #define MASK (KSYNCMAP - 1)
34 : :
35 : : /*
36 : : * struct i915_syncmap is a layer of a radixtree that maps a u64 fence
37 : : * context id to the last u32 fence seqno waited upon from that context.
38 : : * Unlike lib/radixtree it uses a parent pointer that allows traversal back to
39 : : * the root. This allows us to access the whole tree via a single pointer
40 : : * to the most recently used layer. We expect fence contexts to be dense
41 : : * and most reuse to be on the same i915_gem_context but on neighbouring
42 : : * engines (i.e. on adjacent contexts) and reuse the same leaf, a very
43 : : * effective lookup cache. If the new lookup is not on the same leaf, we
44 : : * expect it to be on the neighbouring branch.
45 : : *
46 : : * A leaf holds an array of u32 seqno, and has height 0. The bitmap field
47 : : * allows us to store whether a particular seqno is valid (i.e. allows us
48 : : * to distinguish unset from 0).
49 : : *
50 : : * A branch holds an array of layer pointers, and has height > 0, and always
51 : : * has at least 2 layers (either branches or leaves) below it.
52 : : *
53 : : * For example,
54 : : * for x in
55 : : * 0 1 2 0x10 0x11 0x200 0x201
56 : : * 0x500000 0x500001 0x503000 0x503001
57 : : * 0xE<<60:
58 : : * i915_syncmap_set(&sync, x, lower_32_bits(x));
59 : : * will build a tree like:
60 : : * 0xXXXXXXXXXXXXXXXX
61 : : * 0-> 0x0000000000XXXXXX
62 : : * | 0-> 0x0000000000000XXX
63 : : * | | 0-> 0x00000000000000XX
64 : : * | | | 0-> 0x000000000000000X 0:0, 1:1, 2:2
65 : : * | | | 1-> 0x000000000000001X 0:10, 1:11
66 : : * | | 2-> 0x000000000000020X 0:200, 1:201
67 : : * | 5-> 0x000000000050XXXX
68 : : * | 0-> 0x000000000050000X 0:500000, 1:500001
69 : : * | 3-> 0x000000000050300X 0:503000, 1:503001
70 : : * e-> 0xe00000000000000X e:e
71 : : */
72 : :
73 : : struct i915_syncmap {
74 : : u64 prefix;
75 : : unsigned int height;
76 : : unsigned int bitmap;
77 : : struct i915_syncmap *parent;
78 : : /*
79 : : * Following this header is an array of either seqno or child pointers:
80 : : * union {
81 : : * u32 seqno[KSYNCMAP];
82 : : * struct i915_syncmap *child[KSYNCMAP];
83 : : * };
84 : : */
85 : : };
86 : :
87 : : /**
88 : : * i915_syncmap_init -- initialise the #i915_syncmap
89 : : * @root: pointer to the #i915_syncmap
90 : : */
91 : 0 : void i915_syncmap_init(struct i915_syncmap **root)
92 : : {
93 : 0 : BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP);
94 : 0 : BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT);
95 : 0 : BUILD_BUG_ON(KSYNCMAP > BITS_PER_TYPE((*root)->bitmap));
96 : 0 : *root = NULL;
97 : 0 : }
98 : :
99 : 0 : static inline u32 *__sync_seqno(struct i915_syncmap *p)
100 : : {
101 : 0 : GEM_BUG_ON(p->height);
102 : 0 : return (u32 *)(p + 1);
103 : : }
104 : :
105 : 0 : static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p)
106 : : {
107 : 0 : GEM_BUG_ON(!p->height);
108 : 0 : return (struct i915_syncmap **)(p + 1);
109 : : }
110 : :
111 : : static inline unsigned int
112 : 0 : __sync_branch_idx(const struct i915_syncmap *p, u64 id)
113 : : {
114 : 0 : return (id >> p->height) & MASK;
115 : : }
116 : :
117 : : static inline unsigned int
118 : 0 : __sync_leaf_idx(const struct i915_syncmap *p, u64 id)
119 : : {
120 : 0 : GEM_BUG_ON(p->height);
121 : 0 : return id & MASK;
122 : : }
123 : :
124 : 0 : static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id)
125 : : {
126 : 0 : return id >> p->height >> SHIFT;
127 : : }
128 : :
129 : 0 : static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id)
130 : : {
131 : 0 : GEM_BUG_ON(p->height);
132 : 0 : return id >> SHIFT;
133 : : }
134 : :
135 : 0 : static inline bool seqno_later(u32 a, u32 b)
136 : : {
137 : 0 : return (s32)(a - b) >= 0;
138 : : }
139 : :
140 : : /**
141 : : * i915_syncmap_is_later -- compare against the last know sync point
142 : : * @root: pointer to the #i915_syncmap
143 : : * @id: the context id (other timeline) we are synchronising to
144 : : * @seqno: the sequence number along the other timeline
145 : : *
146 : : * If we have already synchronised this @root timeline with another (@id) then
147 : : * we can omit any repeated or earlier synchronisation requests. If the two
148 : : * timelines are already coupled, we can also omit the dependency between the
149 : : * two as that is already known via the timeline.
150 : : *
151 : : * Returns true if the two timelines are already synchronised wrt to @seqno,
152 : : * false if not and the synchronisation must be emitted.
153 : : */
154 : 0 : bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
155 : : {
156 : 0 : struct i915_syncmap *p;
157 : 0 : unsigned int idx;
158 : :
159 : 0 : p = *root;
160 [ # # ]: 0 : if (!p)
161 : : return false;
162 : :
163 [ # # ]: 0 : if (likely(__sync_leaf_prefix(p, id) == p->prefix))
164 : 0 : goto found;
165 : :
166 : : /* First climb the tree back to a parent branch */
167 : 0 : do {
168 : 0 : p = p->parent;
169 [ # # ]: 0 : if (!p)
170 : : return false;
171 : :
172 [ # # ]: 0 : if (__sync_branch_prefix(p, id) == p->prefix)
173 : : break;
174 : : } while (1);
175 : :
176 : : /* And then descend again until we find our leaf */
177 : 0 : do {
178 [ # # ]: 0 : if (!p->height)
179 : : break;
180 : :
181 : 0 : p = __sync_child(p)[__sync_branch_idx(p, id)];
182 [ # # ]: 0 : if (!p)
183 : : return false;
184 : :
185 [ # # ]: 0 : if (__sync_branch_prefix(p, id) != p->prefix)
186 : : return false;
187 : : } while (1);
188 : :
189 : 0 : *root = p;
190 : 0 : found:
191 : 0 : idx = __sync_leaf_idx(p, id);
192 [ # # ]: 0 : if (!(p->bitmap & BIT(idx)))
193 : : return false;
194 : :
195 : 0 : return seqno_later(__sync_seqno(p)[idx], seqno);
196 : : }
197 : :
198 : : static struct i915_syncmap *
199 : 0 : __sync_alloc_leaf(struct i915_syncmap *parent, u64 id)
200 : : {
201 : 0 : struct i915_syncmap *p;
202 : :
203 : 0 : p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL);
204 [ # # ]: 0 : if (unlikely(!p))
205 : : return NULL;
206 : :
207 : 0 : p->parent = parent;
208 : 0 : p->height = 0;
209 : 0 : p->bitmap = 0;
210 : 0 : p->prefix = __sync_leaf_prefix(p, id);
211 : 0 : return p;
212 : : }
213 : :
214 : 0 : static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno)
215 : : {
216 : 0 : unsigned int idx = __sync_leaf_idx(p, id);
217 : :
218 : 0 : p->bitmap |= BIT(idx);
219 : 0 : __sync_seqno(p)[idx] = seqno;
220 : : }
221 : :
222 : 0 : static inline void __sync_set_child(struct i915_syncmap *p,
223 : : unsigned int idx,
224 : : struct i915_syncmap *child)
225 : : {
226 : 0 : p->bitmap |= BIT(idx);
227 : 0 : __sync_child(p)[idx] = child;
228 : : }
229 : :
230 : 0 : static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno)
231 : : {
232 : 0 : struct i915_syncmap *p = *root;
233 : 0 : unsigned int idx;
234 : :
235 [ # # ]: 0 : if (!p) {
236 : 0 : p = __sync_alloc_leaf(NULL, id);
237 [ # # ]: 0 : if (unlikely(!p))
238 : : return -ENOMEM;
239 : :
240 : 0 : goto found;
241 : : }
242 : :
243 : : /* Caller handled the likely cached case */
244 : 0 : GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix);
245 : :
246 : : /* Climb back up the tree until we find a common prefix */
247 : 0 : do {
248 [ # # ]: 0 : if (!p->parent)
249 : : break;
250 : :
251 : 0 : p = p->parent;
252 : :
253 [ # # ]: 0 : if (__sync_branch_prefix(p, id) == p->prefix)
254 : : break;
255 : : } while (1);
256 : :
257 : : /*
258 : : * No shortcut, we have to descend the tree to find the right layer
259 : : * containing this fence.
260 : : *
261 : : * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences
262 : : * or lower layers. Leaf nodes (height = 0) contain the fences, all
263 : : * other nodes (height > 0) are internal layers that point to a lower
264 : : * node. Each internal layer has at least 2 descendents.
265 : : *
266 : : * Starting at the top, we check whether the current prefix matches. If
267 : : * it doesn't, we have gone past our target and need to insert a join
268 : : * into the tree, and a new leaf node for the target as a descendent
269 : : * of the join, as well as the original layer.
270 : : *
271 : : * The matching prefix means we are still following the right branch
272 : : * of the tree. If it has height 0, we have found our leaf and just
273 : : * need to replace the fence slot with ourselves. If the height is
274 : : * not zero, our slot contains the next layer in the tree (unless
275 : : * it is empty, in which case we can add ourselves as a new leaf).
276 : : * As descend the tree the prefix grows (and height decreases).
277 : : */
278 : 0 : do {
279 : 0 : struct i915_syncmap *next;
280 : :
281 [ # # ]: 0 : if (__sync_branch_prefix(p, id) != p->prefix) {
282 : 0 : unsigned int above;
283 : :
284 : : /* Insert a join above the current layer */
285 : 0 : next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next),
286 : : GFP_KERNEL);
287 [ # # ]: 0 : if (unlikely(!next))
288 : : return -ENOMEM;
289 : :
290 : : /* Compute the height at which these two diverge */
291 [ # # ]: 0 : above = fls64(__sync_branch_prefix(p, id) ^ p->prefix);
292 : 0 : above = round_up(above, SHIFT);
293 : 0 : next->height = above + p->height;
294 : 0 : next->prefix = __sync_branch_prefix(next, id);
295 : :
296 : : /* Insert the join into the parent */
297 [ # # ]: 0 : if (p->parent) {
298 : 0 : idx = __sync_branch_idx(p->parent, id);
299 : 0 : __sync_child(p->parent)[idx] = next;
300 : 0 : GEM_BUG_ON(!(p->parent->bitmap & BIT(idx)));
301 : : }
302 : 0 : next->parent = p->parent;
303 : :
304 : : /* Compute the idx of the other branch, not our id! */
305 : 0 : idx = p->prefix >> (above - SHIFT) & MASK;
306 : 0 : __sync_set_child(next, idx, p);
307 : 0 : p->parent = next;
308 : :
309 : : /* Ascend to the join */
310 : 0 : p = next;
311 : : } else {
312 [ # # ]: 0 : if (!p->height)
313 : : break;
314 : : }
315 : :
316 : : /* Descend into the next layer */
317 : 0 : GEM_BUG_ON(!p->height);
318 : 0 : idx = __sync_branch_idx(p, id);
319 : 0 : next = __sync_child(p)[idx];
320 [ # # ]: 0 : if (!next) {
321 : 0 : next = __sync_alloc_leaf(p, id);
322 [ # # ]: 0 : if (unlikely(!next))
323 : : return -ENOMEM;
324 : :
325 : 0 : __sync_set_child(p, idx, next);
326 : 0 : p = next;
327 : 0 : break;
328 : : }
329 : :
330 : : p = next;
331 : : } while (1);
332 : :
333 : 0 : found:
334 : 0 : GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id));
335 : 0 : __sync_set_seqno(p, id, seqno);
336 : 0 : *root = p;
337 : 0 : return 0;
338 : : }
339 : :
340 : : /**
341 : : * i915_syncmap_set -- mark the most recent syncpoint between contexts
342 : : * @root: pointer to the #i915_syncmap
343 : : * @id: the context id (other timeline) we have synchronised to
344 : : * @seqno: the sequence number along the other timeline
345 : : *
346 : : * When we synchronise this @root timeline with another (@id), we also know
347 : : * that we have synchronized with all previous seqno along that timeline. If
348 : : * we then have a request to synchronise with the same seqno or older, we can
349 : : * omit it, see i915_syncmap_is_later()
350 : : *
351 : : * Returns 0 on success, or a negative error code.
352 : : */
353 : 0 : int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
354 : : {
355 : 0 : struct i915_syncmap *p = *root;
356 : :
357 : : /*
358 : : * We expect to be called in sequence following is_later(id), which
359 : : * should have preloaded the root for us.
360 : : */
361 [ # # # # ]: 0 : if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) {
362 : 0 : __sync_set_seqno(p, id, seqno);
363 : 0 : return 0;
364 : : }
365 : :
366 : 0 : return __sync_set(root, id, seqno);
367 : : }
368 : :
369 : 0 : static void __sync_free(struct i915_syncmap *p)
370 : : {
371 [ # # ]: 0 : if (p->height) {
372 : : unsigned int i;
373 : :
374 [ # # ]: 0 : while ((i = ffs(p->bitmap))) {
375 : 0 : p->bitmap &= ~0u << i;
376 : 0 : __sync_free(__sync_child(p)[i - 1]);
377 : : }
378 : : }
379 : :
380 : 0 : kfree(p);
381 : 0 : }
382 : :
383 : : /**
384 : : * i915_syncmap_free -- free all memory associated with the syncmap
385 : : * @root: pointer to the #i915_syncmap
386 : : *
387 : : * Either when the timeline is to be freed and we no longer need the sync
388 : : * point tracking, or when the fences are all known to be signaled and the
389 : : * sync point tracking is redundant, we can free the #i915_syncmap to recover
390 : : * its allocations.
391 : : *
392 : : * Will reinitialise the @root pointer so that the #i915_syncmap is ready for
393 : : * reuse.
394 : : */
395 : 0 : void i915_syncmap_free(struct i915_syncmap **root)
396 : : {
397 : 0 : struct i915_syncmap *p;
398 : :
399 : 0 : p = *root;
400 [ # # ]: 0 : if (!p)
401 : : return;
402 : :
403 [ # # ]: 0 : while (p->parent)
404 : : p = p->parent;
405 : :
406 : 0 : __sync_free(p);
407 : 0 : *root = NULL;
408 : : }
409 : :
410 : : #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
411 : : #include "selftests/i915_syncmap.c"
412 : : #endif
|