Branch data Line data Source code
1 : : /*
2 : : * Copyright © 2008-2015 Intel Corporation
3 : : *
4 : : * Permission is hereby granted, free of charge, to any person obtaining a
5 : : * copy of this software and associated documentation files (the "Software"),
6 : : * to deal in the Software without restriction, including without limitation
7 : : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : : * and/or sell copies of the Software, and to permit persons to whom the
9 : : * Software is furnished to do so, subject to the following conditions:
10 : : *
11 : : * The above copyright notice and this permission notice (including the next
12 : : * paragraph) shall be included in all copies or substantial portions of the
13 : : * Software.
14 : : *
15 : : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 : : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 : : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 : : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 : : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 : : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 : : * IN THE SOFTWARE.
22 : : *
23 : : */
24 : :
25 : : #include <linux/dma-fence-array.h>
26 : : #include <linux/irq_work.h>
27 : : #include <linux/prefetch.h>
28 : : #include <linux/sched.h>
29 : : #include <linux/sched/clock.h>
30 : : #include <linux/sched/signal.h>
31 : :
32 : : #include "gem/i915_gem_context.h"
33 : : #include "gt/intel_context.h"
34 : : #include "gt/intel_ring.h"
35 : : #include "gt/intel_rps.h"
36 : :
37 : : #include "i915_active.h"
38 : : #include "i915_drv.h"
39 : : #include "i915_globals.h"
40 : : #include "i915_trace.h"
41 : : #include "intel_pm.h"
42 : :
43 : : struct execute_cb {
44 : : struct list_head link;
45 : : struct irq_work work;
46 : : struct i915_sw_fence *fence;
47 : : void (*hook)(struct i915_request *rq, struct dma_fence *signal);
48 : : struct i915_request *signal;
49 : : };
50 : :
51 : : static struct i915_global_request {
52 : : struct i915_global base;
53 : : struct kmem_cache *slab_requests;
54 : : struct kmem_cache *slab_dependencies;
55 : : struct kmem_cache *slab_execute_cbs;
56 : : } global;
57 : :
58 : 0 : static const char *i915_fence_get_driver_name(struct dma_fence *fence)
59 : : {
60 [ # # ]: 0 : return dev_name(to_request(fence)->i915->drm.dev);
61 : : }
62 : :
63 : 0 : static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
64 : : {
65 : 0 : const struct i915_gem_context *ctx;
66 : :
67 : : /*
68 : : * The timeline struct (as part of the ppgtt underneath a context)
69 : : * may be freed when the request is no longer in use by the GPU.
70 : : * We could extend the life of a context to beyond that of all
71 : : * fences, possibly keeping the hw resource around indefinitely,
72 : : * or we just give them a false name. Since
73 : : * dma_fence_ops.get_timeline_name is a debug feature, the occasional
74 : : * lie seems justifiable.
75 : : */
76 [ # # ]: 0 : if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
77 : : return "signaled";
78 : :
79 [ # # ]: 0 : ctx = i915_request_gem_context(to_request(fence));
80 [ # # ]: 0 : if (!ctx)
81 : : return "[" DRIVER_NAME "]";
82 : :
83 : 0 : return ctx->name;
84 : : }
85 : :
86 : 0 : static bool i915_fence_signaled(struct dma_fence *fence)
87 : : {
88 : 0 : return i915_request_completed(to_request(fence));
89 : : }
90 : :
91 : 0 : static bool i915_fence_enable_signaling(struct dma_fence *fence)
92 : : {
93 : 0 : return i915_request_enable_breadcrumb(to_request(fence));
94 : : }
95 : :
96 : 0 : static signed long i915_fence_wait(struct dma_fence *fence,
97 : : bool interruptible,
98 : : signed long timeout)
99 : : {
100 : 0 : return i915_request_wait(to_request(fence),
101 : : interruptible | I915_WAIT_PRIORITY,
102 : : timeout);
103 : : }
104 : :
105 : 0 : static void i915_fence_release(struct dma_fence *fence)
106 : : {
107 : 0 : struct i915_request *rq = to_request(fence);
108 : :
109 : : /*
110 : : * The request is put onto a RCU freelist (i.e. the address
111 : : * is immediately reused), mark the fences as being freed now.
112 : : * Otherwise the debugobjects for the fences are only marked as
113 : : * freed when the slab cache itself is freed, and so we would get
114 : : * caught trying to reuse dead objects.
115 : : */
116 : 0 : i915_sw_fence_fini(&rq->submit);
117 : 0 : i915_sw_fence_fini(&rq->semaphore);
118 : :
119 : 0 : kmem_cache_free(global.slab_requests, rq);
120 : 0 : }
121 : :
122 : : const struct dma_fence_ops i915_fence_ops = {
123 : : .get_driver_name = i915_fence_get_driver_name,
124 : : .get_timeline_name = i915_fence_get_timeline_name,
125 : : .enable_signaling = i915_fence_enable_signaling,
126 : : .signaled = i915_fence_signaled,
127 : : .wait = i915_fence_wait,
128 : : .release = i915_fence_release,
129 : : };
130 : :
131 : 0 : static void irq_execute_cb(struct irq_work *wrk)
132 : : {
133 : 0 : struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
134 : :
135 : 0 : i915_sw_fence_complete(cb->fence);
136 : 0 : kmem_cache_free(global.slab_execute_cbs, cb);
137 : 0 : }
138 : :
139 : 0 : static void irq_execute_cb_hook(struct irq_work *wrk)
140 : : {
141 : 0 : struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
142 : :
143 : 0 : cb->hook(container_of(cb->fence, struct i915_request, submit),
144 : 0 : &cb->signal->fence);
145 : 0 : i915_request_put(cb->signal);
146 : :
147 : 0 : irq_execute_cb(wrk);
148 : 0 : }
149 : :
150 : 0 : static void __notify_execute_cb(struct i915_request *rq)
151 : : {
152 : 0 : struct execute_cb *cb;
153 : :
154 : 0 : lockdep_assert_held(&rq->lock);
155 : :
156 : 0 : if (list_empty(&rq->execute_cb))
157 : : return;
158 : :
159 [ # # # # ]: 0 : list_for_each_entry(cb, &rq->execute_cb, link)
160 : 0 : irq_work_queue(&cb->work);
161 : :
162 : : /*
163 : : * XXX Rollback on __i915_request_unsubmit()
164 : : *
165 : : * In the future, perhaps when we have an active time-slicing scheduler,
166 : : * it will be interesting to unsubmit parallel execution and remove
167 : : * busywaits from the GPU until their master is restarted. This is
168 : : * quite hairy, we have to carefully rollback the fence and do a
169 : : * preempt-to-idle cycle on the target engine, all the while the
170 : : * master execute_cb may refire.
171 : : */
172 : 0 : INIT_LIST_HEAD(&rq->execute_cb);
173 : : }
174 : :
175 : : static inline void
176 : 0 : remove_from_client(struct i915_request *request)
177 : : {
178 : 0 : struct drm_i915_file_private *file_priv;
179 : :
180 [ # # ]: 0 : if (!READ_ONCE(request->file_priv))
181 : : return;
182 : :
183 : 0 : rcu_read_lock();
184 : 0 : file_priv = xchg(&request->file_priv, NULL);
185 [ # # ]: 0 : if (file_priv) {
186 : 0 : spin_lock(&file_priv->mm.lock);
187 : 0 : list_del(&request->client_link);
188 : 0 : spin_unlock(&file_priv->mm.lock);
189 : : }
190 : 0 : rcu_read_unlock();
191 : : }
192 : :
193 : 0 : static void free_capture_list(struct i915_request *request)
194 : : {
195 : 0 : struct i915_capture_list *capture;
196 : :
197 : 0 : capture = fetch_and_zero(&request->capture_list);
198 [ # # ]: 0 : while (capture) {
199 : 0 : struct i915_capture_list *next = capture->next;
200 : :
201 : 0 : kfree(capture);
202 : 0 : capture = next;
203 : : }
204 : : }
205 : :
206 : 0 : static void remove_from_engine(struct i915_request *rq)
207 : : {
208 : 0 : struct intel_engine_cs *engine, *locked;
209 : :
210 : : /*
211 : : * Virtual engines complicate acquiring the engine timeline lock,
212 : : * as their rq->engine pointer is not stable until under that
213 : : * engine lock. The simple ploy we use is to take the lock then
214 : : * check that the rq still belongs to the newly locked engine.
215 : : */
216 : 0 : locked = READ_ONCE(rq->engine);
217 : 0 : spin_lock_irq(&locked->active.lock);
218 [ # # ]: 0 : while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
219 : 0 : spin_unlock(&locked->active.lock);
220 : 0 : spin_lock(&engine->active.lock);
221 : 0 : locked = engine;
222 : : }
223 : 0 : list_del_init(&rq->sched.link);
224 : 0 : clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
225 : 0 : clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
226 : 0 : spin_unlock_irq(&locked->active.lock);
227 : 0 : }
228 : :
229 : 0 : bool i915_request_retire(struct i915_request *rq)
230 : : {
231 [ # # ]: 0 : if (!i915_request_completed(rq))
232 : : return false;
233 : :
234 : 0 : RQ_TRACE(rq, "\n");
235 : :
236 : 0 : GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
237 : 0 : trace_i915_request_retire(rq);
238 : :
239 : : /*
240 : : * We know the GPU must have read the request to have
241 : : * sent us the seqno + interrupt, so use the position
242 : : * of tail of the request to update the last known position
243 : : * of the GPU head.
244 : : *
245 : : * Note this requires that we are always called in request
246 : : * completion order.
247 : : */
248 : 0 : GEM_BUG_ON(!list_is_first(&rq->link,
249 : : &i915_request_timeline(rq)->requests));
250 : 0 : rq->ring->head = rq->postfix;
251 : :
252 : : /*
253 : : * We only loosely track inflight requests across preemption,
254 : : * and so we may find ourselves attempting to retire a _completed_
255 : : * request that we have removed from the HW and put back on a run
256 : : * queue.
257 : : */
258 : 0 : remove_from_engine(rq);
259 : :
260 : 0 : spin_lock_irq(&rq->lock);
261 : 0 : i915_request_mark_complete(rq);
262 [ # # ]: 0 : if (!i915_request_signaled(rq))
263 : 0 : dma_fence_signal_locked(&rq->fence);
264 [ # # ]: 0 : if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
265 : 0 : i915_request_cancel_breadcrumb(rq);
266 [ # # ]: 0 : if (i915_request_has_waitboost(rq)) {
267 : 0 : GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
268 : 0 : atomic_dec(&rq->engine->gt->rps.num_waiters);
269 : : }
270 [ # # ]: 0 : if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
271 : 0 : set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
272 [ # # ]: 0 : __notify_execute_cb(rq);
273 : : }
274 : 0 : GEM_BUG_ON(!list_empty(&rq->execute_cb));
275 : 0 : spin_unlock_irq(&rq->lock);
276 : :
277 : 0 : remove_from_client(rq);
278 [ # # ]: 0 : list_del_rcu(&rq->link);
279 : :
280 [ # # ]: 0 : intel_context_exit(rq->context);
281 : 0 : intel_context_unpin(rq->context);
282 : :
283 : 0 : free_capture_list(rq);
284 : 0 : i915_sched_node_fini(&rq->sched);
285 : 0 : i915_request_put(rq);
286 : :
287 : 0 : return true;
288 : : }
289 : :
290 : 0 : void i915_request_retire_upto(struct i915_request *rq)
291 : : {
292 : 0 : struct intel_timeline * const tl = i915_request_timeline(rq);
293 : 0 : struct i915_request *tmp;
294 : :
295 : 0 : RQ_TRACE(rq, "\n");
296 : :
297 : 0 : GEM_BUG_ON(!i915_request_completed(rq));
298 : :
299 : 0 : do {
300 : 0 : tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
301 [ # # # # ]: 0 : } while (i915_request_retire(tmp) && tmp != rq);
302 : 0 : }
303 : :
304 : : static int
305 : 0 : __await_execution(struct i915_request *rq,
306 : : struct i915_request *signal,
307 : : void (*hook)(struct i915_request *rq,
308 : : struct dma_fence *signal),
309 : : gfp_t gfp)
310 : : {
311 : 0 : struct execute_cb *cb;
312 : :
313 [ # # ]: 0 : if (i915_request_is_active(signal)) {
314 [ # # ]: 0 : if (hook)
315 : 0 : hook(rq, &signal->fence);
316 : 0 : return 0;
317 : : }
318 : :
319 : 0 : cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
320 [ # # ]: 0 : if (!cb)
321 : : return -ENOMEM;
322 : :
323 : 0 : cb->fence = &rq->submit;
324 : 0 : i915_sw_fence_await(cb->fence);
325 : 0 : init_irq_work(&cb->work, irq_execute_cb);
326 : :
327 [ # # ]: 0 : if (hook) {
328 : 0 : cb->hook = hook;
329 [ # # ]: 0 : cb->signal = i915_request_get(signal);
330 : 0 : cb->work.func = irq_execute_cb_hook;
331 : : }
332 : :
333 : 0 : spin_lock_irq(&signal->lock);
334 [ # # ]: 0 : if (i915_request_is_active(signal)) {
335 [ # # ]: 0 : if (hook) {
336 : 0 : hook(rq, &signal->fence);
337 : 0 : i915_request_put(signal);
338 : : }
339 : 0 : i915_sw_fence_complete(cb->fence);
340 : 0 : kmem_cache_free(global.slab_execute_cbs, cb);
341 : : } else {
342 : 0 : list_add_tail(&cb->link, &signal->execute_cb);
343 : : }
344 : 0 : spin_unlock_irq(&signal->lock);
345 : :
346 : : /* Copy across semaphore status as we need the same behaviour */
347 : 0 : rq->sched.flags |= signal->sched.flags;
348 : 0 : return 0;
349 : : }
350 : :
351 : 0 : bool __i915_request_submit(struct i915_request *request)
352 : : {
353 : 0 : struct intel_engine_cs *engine = request->engine;
354 : 0 : bool result = false;
355 : :
356 : 0 : RQ_TRACE(request, "\n");
357 : :
358 : 0 : GEM_BUG_ON(!irqs_disabled());
359 : 0 : lockdep_assert_held(&engine->active.lock);
360 : :
361 : : /*
362 : : * With the advent of preempt-to-busy, we frequently encounter
363 : : * requests that we have unsubmitted from HW, but left running
364 : : * until the next ack and so have completed in the meantime. On
365 : : * resubmission of that completed request, we can skip
366 : : * updating the payload, and execlists can even skip submitting
367 : : * the request.
368 : : *
369 : : * We must remove the request from the caller's priority queue,
370 : : * and the caller must only call us when the request is in their
371 : : * priority queue, under the active.lock. This ensures that the
372 : : * request has *not* yet been retired and we can safely move
373 : : * the request into the engine->active.list where it will be
374 : : * dropped upon retiring. (Otherwise if resubmit a *retired*
375 : : * request, this would be a horrible use-after-free.)
376 : : */
377 [ # # ]: 0 : if (i915_request_completed(request))
378 : 0 : goto xfer;
379 : :
380 [ # # ]: 0 : if (intel_context_is_banned(request->context))
381 : 0 : i915_request_skip(request, -EIO);
382 : :
383 : : /*
384 : : * Are we using semaphores when the gpu is already saturated?
385 : : *
386 : : * Using semaphores incurs a cost in having the GPU poll a
387 : : * memory location, busywaiting for it to change. The continual
388 : : * memory reads can have a noticeable impact on the rest of the
389 : : * system with the extra bus traffic, stalling the cpu as it too
390 : : * tries to access memory across the bus (perf stat -e bus-cycles).
391 : : *
392 : : * If we installed a semaphore on this request and we only submit
393 : : * the request after the signaler completed, that indicates the
394 : : * system is overloaded and using semaphores at this time only
395 : : * increases the amount of work we are doing. If so, we disable
396 : : * further use of semaphores until we are idle again, whence we
397 : : * optimistically try again.
398 : : */
399 [ # # # # ]: 0 : if (request->sched.semaphores &&
400 : : i915_sw_fence_signaled(&request->semaphore))
401 : 0 : engine->saturated |= request->sched.semaphores;
402 : :
403 : 0 : engine->emit_fini_breadcrumb(request,
404 : 0 : request->ring->vaddr + request->postfix);
405 : :
406 : 0 : trace_i915_request_execute(request);
407 : 0 : engine->serial++;
408 : 0 : result = true;
409 : :
410 : 0 : xfer: /* We may be recursing from the signal callback of another i915 fence */
411 : 0 : spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
412 : :
413 [ # # ]: 0 : if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) {
414 : 0 : list_move_tail(&request->sched.link, &engine->active.requests);
415 : 0 : clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
416 : : }
417 : :
418 [ # # # # ]: 0 : if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
419 [ # # ]: 0 : !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
420 : 0 : !i915_request_enable_breadcrumb(request))
421 : 0 : intel_engine_signal_breadcrumbs(engine);
422 : :
423 [ # # ]: 0 : __notify_execute_cb(request);
424 : :
425 : 0 : spin_unlock(&request->lock);
426 : :
427 : 0 : return result;
428 : : }
429 : :
430 : 0 : void i915_request_submit(struct i915_request *request)
431 : : {
432 : 0 : struct intel_engine_cs *engine = request->engine;
433 : 0 : unsigned long flags;
434 : :
435 : : /* Will be called from irq-context when using foreign fences. */
436 : 0 : spin_lock_irqsave(&engine->active.lock, flags);
437 : :
438 : 0 : __i915_request_submit(request);
439 : :
440 : 0 : spin_unlock_irqrestore(&engine->active.lock, flags);
441 : 0 : }
442 : :
443 : 0 : void __i915_request_unsubmit(struct i915_request *request)
444 : : {
445 : 0 : struct intel_engine_cs *engine = request->engine;
446 : :
447 : 0 : RQ_TRACE(request, "\n");
448 : :
449 : 0 : GEM_BUG_ON(!irqs_disabled());
450 : 0 : lockdep_assert_held(&engine->active.lock);
451 : :
452 : : /*
453 : : * Only unwind in reverse order, required so that the per-context list
454 : : * is kept in seqno/ring order.
455 : : */
456 : :
457 : : /* We may be recursing from the signal callback of another i915 fence */
458 : 0 : spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
459 : :
460 [ # # ]: 0 : if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
461 : 0 : i915_request_cancel_breadcrumb(request);
462 : :
463 : 0 : GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
464 : 0 : clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
465 : :
466 : 0 : spin_unlock(&request->lock);
467 : :
468 : : /* We've already spun, don't charge on resubmitting. */
469 [ # # # # ]: 0 : if (request->sched.semaphores && i915_request_started(request)) {
470 : 0 : request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE;
471 : 0 : request->sched.semaphores = 0;
472 : : }
473 : :
474 : : /*
475 : : * We don't need to wake_up any waiters on request->execute, they
476 : : * will get woken by any other event or us re-adding this request
477 : : * to the engine timeline (__i915_request_submit()). The waiters
478 : : * should be quite adapt at finding that the request now has a new
479 : : * global_seqno to the one they went to sleep on.
480 : : */
481 : 0 : }
482 : :
483 : 0 : void i915_request_unsubmit(struct i915_request *request)
484 : : {
485 : 0 : struct intel_engine_cs *engine = request->engine;
486 : 0 : unsigned long flags;
487 : :
488 : : /* Will be called from irq-context when using foreign fences. */
489 : 0 : spin_lock_irqsave(&engine->active.lock, flags);
490 : :
491 : 0 : __i915_request_unsubmit(request);
492 : :
493 : 0 : spin_unlock_irqrestore(&engine->active.lock, flags);
494 : 0 : }
495 : :
496 : : static int __i915_sw_fence_call
497 : 0 : submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
498 : : {
499 : 0 : struct i915_request *request =
500 : 0 : container_of(fence, typeof(*request), submit);
501 : :
502 [ # # # ]: 0 : switch (state) {
503 : : case FENCE_COMPLETE:
504 [ # # ]: 0 : trace_i915_request_submit(request);
505 : :
506 [ # # ]: 0 : if (unlikely(fence->error))
507 : 0 : i915_request_skip(request, fence->error);
508 : :
509 : : /*
510 : : * We need to serialize use of the submit_request() callback
511 : : * with its hotplugging performed during an emergency
512 : : * i915_gem_set_wedged(). We use the RCU mechanism to mark the
513 : : * critical section in order to force i915_gem_set_wedged() to
514 : : * wait until the submit_request() is completed before
515 : : * proceeding.
516 : : */
517 : 0 : rcu_read_lock();
518 : 0 : request->engine->submit_request(request);
519 : 0 : rcu_read_unlock();
520 : : break;
521 : :
522 : : case FENCE_FREE:
523 : 0 : i915_request_put(request);
524 : : break;
525 : : }
526 : :
527 : 0 : return NOTIFY_DONE;
528 : : }
529 : :
530 : 0 : static void irq_semaphore_cb(struct irq_work *wrk)
531 : : {
532 : 0 : struct i915_request *rq =
533 : 0 : container_of(wrk, typeof(*rq), semaphore_work);
534 : :
535 : 0 : i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE);
536 : 0 : i915_request_put(rq);
537 : 0 : }
538 : :
539 : : static int __i915_sw_fence_call
540 : 0 : semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
541 : : {
542 : 0 : struct i915_request *rq = container_of(fence, typeof(*rq), semaphore);
543 : :
544 [ # # # ]: 0 : switch (state) {
545 : : case FENCE_COMPLETE:
546 [ # # ]: 0 : if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) {
547 [ # # ]: 0 : i915_request_get(rq);
548 : 0 : init_irq_work(&rq->semaphore_work, irq_semaphore_cb);
549 : 0 : irq_work_queue(&rq->semaphore_work);
550 : : }
551 : : break;
552 : :
553 : : case FENCE_FREE:
554 : 0 : i915_request_put(rq);
555 : : break;
556 : : }
557 : :
558 : 0 : return NOTIFY_DONE;
559 : : }
560 : :
561 : 0 : static void retire_requests(struct intel_timeline *tl)
562 : : {
563 : 0 : struct i915_request *rq, *rn;
564 : :
565 [ # # ]: 0 : list_for_each_entry_safe(rq, rn, &tl->requests, link)
566 [ # # ]: 0 : if (!i915_request_retire(rq))
567 : : break;
568 : 0 : }
569 : :
570 : : static noinline struct i915_request *
571 : 0 : request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
572 : : {
573 : 0 : struct i915_request *rq;
574 : :
575 [ # # ]: 0 : if (list_empty(&tl->requests))
576 : 0 : goto out;
577 : :
578 [ # # ]: 0 : if (!gfpflags_allow_blocking(gfp))
579 : 0 : goto out;
580 : :
581 : : /* Move our oldest request to the slab-cache (if not in use!) */
582 : 0 : rq = list_first_entry(&tl->requests, typeof(*rq), link);
583 : 0 : i915_request_retire(rq);
584 : :
585 : 0 : rq = kmem_cache_alloc(global.slab_requests,
586 : : gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
587 [ # # ]: 0 : if (rq)
588 : : return rq;
589 : :
590 : : /* Ratelimit ourselves to prevent oom from malicious clients */
591 : 0 : rq = list_last_entry(&tl->requests, typeof(*rq), link);
592 : 0 : cond_synchronize_rcu(rq->rcustate);
593 : :
594 : : /* Retire our old requests in the hope that we free some */
595 : 0 : retire_requests(tl);
596 : :
597 : 0 : out:
598 : 0 : return kmem_cache_alloc(global.slab_requests, gfp);
599 : : }
600 : :
601 : 0 : static void __i915_request_ctor(void *arg)
602 : : {
603 : 0 : struct i915_request *rq = arg;
604 : :
605 : 0 : spin_lock_init(&rq->lock);
606 : 0 : i915_sched_node_init(&rq->sched);
607 : 0 : i915_sw_fence_init(&rq->submit, submit_notify);
608 : 0 : i915_sw_fence_init(&rq->semaphore, semaphore_notify);
609 : :
610 : 0 : dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0);
611 : :
612 : 0 : rq->file_priv = NULL;
613 : 0 : rq->capture_list = NULL;
614 : :
615 : 0 : INIT_LIST_HEAD(&rq->execute_cb);
616 : 0 : }
617 : :
618 : : struct i915_request *
619 : 0 : __i915_request_create(struct intel_context *ce, gfp_t gfp)
620 : : {
621 : 0 : struct intel_timeline *tl = ce->timeline;
622 : 0 : struct i915_request *rq;
623 : 0 : u32 seqno;
624 : 0 : int ret;
625 : :
626 [ # # ]: 0 : might_sleep_if(gfpflags_allow_blocking(gfp));
627 : :
628 : : /* Check that the caller provided an already pinned context */
629 : 0 : __intel_context_pin(ce);
630 : :
631 : : /*
632 : : * Beware: Dragons be flying overhead.
633 : : *
634 : : * We use RCU to look up requests in flight. The lookups may
635 : : * race with the request being allocated from the slab freelist.
636 : : * That is the request we are writing to here, may be in the process
637 : : * of being read by __i915_active_request_get_rcu(). As such,
638 : : * we have to be very careful when overwriting the contents. During
639 : : * the RCU lookup, we change chase the request->engine pointer,
640 : : * read the request->global_seqno and increment the reference count.
641 : : *
642 : : * The reference count is incremented atomically. If it is zero,
643 : : * the lookup knows the request is unallocated and complete. Otherwise,
644 : : * it is either still in use, or has been reallocated and reset
645 : : * with dma_fence_init(). This increment is safe for release as we
646 : : * check that the request we have a reference to and matches the active
647 : : * request.
648 : : *
649 : : * Before we increment the refcount, we chase the request->engine
650 : : * pointer. We must not call kmem_cache_zalloc() or else we set
651 : : * that pointer to NULL and cause a crash during the lookup. If
652 : : * we see the request is completed (based on the value of the
653 : : * old engine and seqno), the lookup is complete and reports NULL.
654 : : * If we decide the request is not completed (new engine or seqno),
655 : : * then we grab a reference and double check that it is still the
656 : : * active request - which it won't be and restart the lookup.
657 : : *
658 : : * Do not use kmem_cache_zalloc() here!
659 : : */
660 : 0 : rq = kmem_cache_alloc(global.slab_requests,
661 : : gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
662 [ # # ]: 0 : if (unlikely(!rq)) {
663 : 0 : rq = request_alloc_slow(tl, gfp);
664 [ # # ]: 0 : if (!rq) {
665 : 0 : ret = -ENOMEM;
666 : 0 : goto err_unreserve;
667 : : }
668 : : }
669 : :
670 : 0 : rq->i915 = ce->engine->i915;
671 : 0 : rq->context = ce;
672 : 0 : rq->engine = ce->engine;
673 : 0 : rq->ring = ce->ring;
674 : 0 : rq->execution_mask = ce->engine->mask;
675 : :
676 : 0 : kref_init(&rq->fence.refcount);
677 : 0 : rq->fence.flags = 0;
678 : 0 : rq->fence.error = 0;
679 : 0 : INIT_LIST_HEAD(&rq->fence.cb_list);
680 : :
681 : 0 : ret = intel_timeline_get_seqno(tl, rq, &seqno);
682 [ # # ]: 0 : if (ret)
683 : 0 : goto err_free;
684 : :
685 : 0 : rq->fence.context = tl->fence_context;
686 : 0 : rq->fence.seqno = seqno;
687 : :
688 : 0 : RCU_INIT_POINTER(rq->timeline, tl);
689 : 0 : RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline);
690 : 0 : rq->hwsp_seqno = tl->hwsp_seqno;
691 : :
692 : 0 : rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
693 : :
694 : : /* We bump the ref for the fence chain */
695 [ # # ]: 0 : i915_sw_fence_reinit(&i915_request_get(rq)->submit);
696 [ # # ]: 0 : i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
697 : :
698 : 0 : i915_sched_node_reinit(&rq->sched);
699 : :
700 : : /* No zalloc, everything must be cleared after use */
701 : 0 : rq->batch = NULL;
702 : 0 : GEM_BUG_ON(rq->file_priv);
703 : 0 : GEM_BUG_ON(rq->capture_list);
704 : 0 : GEM_BUG_ON(!list_empty(&rq->execute_cb));
705 : :
706 : : /*
707 : : * Reserve space in the ring buffer for all the commands required to
708 : : * eventually emit this request. This is to guarantee that the
709 : : * i915_request_add() call can't fail. Note that the reserve may need
710 : : * to be redone if the request is not actually submitted straight
711 : : * away, e.g. because a GPU scheduler has deferred it.
712 : : *
713 : : * Note that due to how we add reserved_space to intel_ring_begin()
714 : : * we need to double our request to ensure that if we need to wrap
715 : : * around inside i915_request_add() there is sufficient space at
716 : : * the beginning of the ring as well.
717 : : */
718 : 0 : rq->reserved_space =
719 : 0 : 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
720 : :
721 : : /*
722 : : * Record the position of the start of the request so that
723 : : * should we detect the updated seqno part-way through the
724 : : * GPU processing the request, we never over-estimate the
725 : : * position of the head.
726 : : */
727 : 0 : rq->head = rq->ring->emit;
728 : :
729 : 0 : ret = rq->engine->request_alloc(rq);
730 [ # # ]: 0 : if (ret)
731 : 0 : goto err_unwind;
732 : :
733 : 0 : rq->infix = rq->ring->emit; /* end of header; start of user payload */
734 : :
735 : 0 : intel_context_mark_active(ce);
736 : 0 : list_add_tail_rcu(&rq->link, &tl->requests);
737 : :
738 : 0 : return rq;
739 : :
740 : : err_unwind:
741 : 0 : ce->ring->emit = rq->head;
742 : :
743 : : /* Make sure we didn't add ourselves to external state before freeing */
744 : 0 : GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
745 : 0 : GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
746 : :
747 : 0 : err_free:
748 : 0 : kmem_cache_free(global.slab_requests, rq);
749 : 0 : err_unreserve:
750 : 0 : intel_context_unpin(ce);
751 : 0 : return ERR_PTR(ret);
752 : : }
753 : :
754 : : struct i915_request *
755 : 0 : i915_request_create(struct intel_context *ce)
756 : : {
757 : 0 : struct i915_request *rq;
758 : 0 : struct intel_timeline *tl;
759 : :
760 : 0 : tl = intel_context_timeline_lock(ce);
761 [ # # ]: 0 : if (IS_ERR(tl))
762 : : return ERR_CAST(tl);
763 : :
764 : : /* Move our oldest request to the slab-cache (if not in use!) */
765 : 0 : rq = list_first_entry(&tl->requests, typeof(*rq), link);
766 [ # # ]: 0 : if (!list_is_last(&rq->link, &tl->requests))
767 : 0 : i915_request_retire(rq);
768 : :
769 [ # # ]: 0 : intel_context_enter(ce);
770 : 0 : rq = __i915_request_create(ce, GFP_KERNEL);
771 [ # # ]: 0 : intel_context_exit(ce); /* active reference transferred to request */
772 [ # # ]: 0 : if (IS_ERR(rq))
773 : 0 : goto err_unlock;
774 : :
775 : : /* Check that we do not interrupt ourselves with a new request */
776 : : rq->cookie = lockdep_pin_lock(&tl->mutex);
777 : :
778 : : return rq;
779 : :
780 : : err_unlock:
781 : 0 : intel_context_timeline_unlock(tl);
782 : 0 : return rq;
783 : : }
784 : :
785 : : static int
786 : 0 : i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
787 : : {
788 : 0 : struct dma_fence *fence;
789 : 0 : int err;
790 : :
791 [ # # ]: 0 : if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline))
792 : : return 0;
793 : :
794 [ # # ]: 0 : if (i915_request_started(signal))
795 : : return 0;
796 : :
797 : 0 : fence = NULL;
798 : 0 : rcu_read_lock();
799 : 0 : spin_lock_irq(&signal->lock);
800 : 0 : do {
801 : 0 : struct list_head *pos = READ_ONCE(signal->link.prev);
802 : 0 : struct i915_request *prev;
803 : :
804 : : /* Confirm signal has not been retired, the link is valid */
805 [ # # ]: 0 : if (unlikely(i915_request_started(signal)))
806 : : break;
807 : :
808 : : /* Is signal the earliest request on its timeline? */
809 [ # # ]: 0 : if (pos == &rcu_dereference(signal->timeline)->requests)
810 : : break;
811 : :
812 : : /*
813 : : * Peek at the request before us in the timeline. That
814 : : * request will only be valid before it is retired, so
815 : : * after acquiring a reference to it, confirm that it is
816 : : * still part of the signaler's timeline.
817 : : */
818 : 0 : prev = list_entry(pos, typeof(*prev), link);
819 [ # # ]: 0 : if (!i915_request_get_rcu(prev))
820 : : break;
821 : :
822 : : /* After the strong barrier, confirm prev is still attached */
823 [ # # ]: 0 : if (unlikely(READ_ONCE(prev->link.next) != &signal->link)) {
824 : 0 : i915_request_put(prev);
825 : : break;
826 : : }
827 : :
828 : : fence = &prev->fence;
829 : 0 : } while (0);
830 : 0 : spin_unlock_irq(&signal->lock);
831 : 0 : rcu_read_unlock();
832 [ # # ]: 0 : if (!fence)
833 : : return 0;
834 : :
835 : 0 : err = 0;
836 [ # # ]: 0 : if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
837 : 0 : err = i915_sw_fence_await_dma_fence(&rq->submit,
838 : : fence, 0,
839 : : I915_FENCE_GFP);
840 : 0 : dma_fence_put(fence);
841 : :
842 : 0 : return err;
843 : : }
844 : :
845 : : static intel_engine_mask_t
846 : 0 : already_busywaiting(struct i915_request *rq)
847 : : {
848 : : /*
849 : : * Polling a semaphore causes bus traffic, delaying other users of
850 : : * both the GPU and CPU. We want to limit the impact on others,
851 : : * while taking advantage of early submission to reduce GPU
852 : : * latency. Therefore we restrict ourselves to not using more
853 : : * than one semaphore from each source, and not using a semaphore
854 : : * if we have detected the engine is saturated (i.e. would not be
855 : : * submitted early and cause bus traffic reading an already passed
856 : : * semaphore).
857 : : *
858 : : * See the are-we-too-late? check in __i915_request_submit().
859 : : */
860 : 0 : return rq->sched.semaphores | rq->engine->saturated;
861 : : }
862 : :
863 : : static int
864 : 0 : __emit_semaphore_wait(struct i915_request *to,
865 : : struct i915_request *from,
866 : : u32 seqno)
867 : : {
868 : 0 : const int has_token = INTEL_GEN(to->i915) >= 12;
869 : 0 : u32 hwsp_offset;
870 : 0 : int len, err;
871 : 0 : u32 *cs;
872 : :
873 : 0 : GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
874 : :
875 : : /* We need to pin the signaler's HWSP until we are finished reading. */
876 : 0 : err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
877 [ # # ]: 0 : if (err)
878 : : return err;
879 : :
880 : 0 : len = 4;
881 [ # # ]: 0 : if (has_token)
882 : 0 : len += 2;
883 : :
884 : 0 : cs = intel_ring_begin(to, len);
885 [ # # ]: 0 : if (IS_ERR(cs))
886 : 0 : return PTR_ERR(cs);
887 : :
888 : : /*
889 : : * Using greater-than-or-equal here means we have to worry
890 : : * about seqno wraparound. To side step that issue, we swap
891 : : * the timeline HWSP upon wrapping, so that everyone listening
892 : : * for the old (pre-wrap) values do not see the much smaller
893 : : * (post-wrap) values than they were expecting (and so wait
894 : : * forever).
895 : : */
896 : 0 : *cs++ = (MI_SEMAPHORE_WAIT |
897 : : MI_SEMAPHORE_GLOBAL_GTT |
898 : : MI_SEMAPHORE_POLL |
899 : 0 : MI_SEMAPHORE_SAD_GTE_SDD) +
900 : : has_token;
901 : 0 : *cs++ = seqno;
902 : 0 : *cs++ = hwsp_offset;
903 : 0 : *cs++ = 0;
904 [ # # ]: 0 : if (has_token) {
905 : 0 : *cs++ = 0;
906 : 0 : *cs++ = MI_NOOP;
907 : : }
908 : :
909 : : intel_ring_advance(to, cs);
910 : : return 0;
911 : : }
912 : :
913 : : static int
914 : 0 : emit_semaphore_wait(struct i915_request *to,
915 : : struct i915_request *from,
916 : : gfp_t gfp)
917 : : {
918 : : /* Just emit the first semaphore we see as request space is limited. */
919 [ # # ]: 0 : if (already_busywaiting(to) & from->engine->mask)
920 : 0 : goto await_fence;
921 : :
922 [ # # ]: 0 : if (i915_request_await_start(to, from) < 0)
923 : 0 : goto await_fence;
924 : :
925 : : /* Only submit our spinner after the signaler is running! */
926 [ # # ]: 0 : if (__await_execution(to, from, NULL, gfp))
927 : 0 : goto await_fence;
928 : :
929 [ # # ]: 0 : if (__emit_semaphore_wait(to, from, from->fence.seqno))
930 : 0 : goto await_fence;
931 : :
932 : 0 : to->sched.semaphores |= from->engine->mask;
933 : 0 : to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
934 : 0 : return 0;
935 : :
936 : 0 : await_fence:
937 : 0 : return i915_sw_fence_await_dma_fence(&to->submit,
938 : : &from->fence, 0,
939 : : I915_FENCE_GFP);
940 : : }
941 : :
942 : : static int
943 : 0 : i915_request_await_request(struct i915_request *to, struct i915_request *from)
944 : : {
945 : 0 : int ret;
946 : :
947 : 0 : GEM_BUG_ON(to == from);
948 : 0 : GEM_BUG_ON(to->timeline == from->timeline);
949 : :
950 [ # # ]: 0 : if (i915_request_completed(from))
951 : : return 0;
952 : :
953 [ # # ]: 0 : if (to->engine->schedule) {
954 : 0 : ret = i915_sched_node_add_dependency(&to->sched, &from->sched);
955 [ # # ]: 0 : if (ret < 0)
956 : : return ret;
957 : : }
958 : :
959 [ # # ]: 0 : if (to->engine == from->engine)
960 : 0 : ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
961 : : &from->submit,
962 : : I915_FENCE_GFP);
963 [ # # ]: 0 : else if (intel_context_use_semaphores(to->context))
964 : 0 : ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
965 : : else
966 : 0 : ret = i915_sw_fence_await_dma_fence(&to->submit,
967 : : &from->fence, 0,
968 : : I915_FENCE_GFP);
969 [ # # ]: 0 : if (ret < 0)
970 : : return ret;
971 : :
972 [ # # ]: 0 : if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) {
973 : 0 : ret = i915_sw_fence_await_dma_fence(&to->semaphore,
974 : : &from->fence, 0,
975 : : I915_FENCE_GFP);
976 : 0 : if (ret < 0)
977 : : return ret;
978 : : }
979 : :
980 : : return 0;
981 : : }
982 : :
983 : : int
984 : 0 : i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
985 : : {
986 : 0 : struct dma_fence **child = &fence;
987 : 0 : unsigned int nchild = 1;
988 : 0 : int ret;
989 : :
990 : : /*
991 : : * Note that if the fence-array was created in signal-on-any mode,
992 : : * we should *not* decompose it into its individual fences. However,
993 : : * we don't currently store which mode the fence-array is operating
994 : : * in. Fortunately, the only user of signal-on-any is private to
995 : : * amdgpu and we should not see any incoming fence-array from
996 : : * sync-file being in signal-on-any mode.
997 : : */
998 [ # # ]: 0 : if (dma_fence_is_array(fence)) {
999 : 0 : struct dma_fence_array *array = to_dma_fence_array(fence);
1000 : :
1001 : 0 : child = array->fences;
1002 : 0 : nchild = array->num_fences;
1003 : 0 : GEM_BUG_ON(!nchild);
1004 : : }
1005 : :
1006 : 0 : do {
1007 : 0 : fence = *child++;
1008 [ # # ]: 0 : if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
1009 : 0 : i915_sw_fence_set_error_once(&rq->submit, fence->error);
1010 : 0 : continue;
1011 : : }
1012 : :
1013 : : /*
1014 : : * Requests on the same timeline are explicitly ordered, along
1015 : : * with their dependencies, by i915_request_add() which ensures
1016 : : * that requests are submitted in-order through each ring.
1017 : : */
1018 [ # # ]: 0 : if (fence->context == rq->fence.context)
1019 : 0 : continue;
1020 : :
1021 : : /* Squash repeated waits to the same timelines */
1022 [ # # # # ]: 0 : if (fence->context &&
1023 : 0 : intel_timeline_sync_is_later(i915_request_timeline(rq),
1024 : : fence))
1025 : 0 : continue;
1026 : :
1027 [ # # ]: 0 : if (dma_fence_is_i915(fence))
1028 : 0 : ret = i915_request_await_request(rq, to_request(fence));
1029 : : else
1030 : 0 : ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
1031 [ # # ]: 0 : fence->context ? I915_FENCE_TIMEOUT : 0,
1032 : : I915_FENCE_GFP);
1033 [ # # ]: 0 : if (ret < 0)
1034 : 0 : return ret;
1035 : :
1036 : : /* Record the latest fence used against each timeline */
1037 [ # # ]: 0 : if (fence->context)
1038 : 0 : intel_timeline_sync_set(i915_request_timeline(rq),
1039 : : fence);
1040 [ # # ]: 0 : } while (--nchild);
1041 : :
1042 : : return 0;
1043 : : }
1044 : :
1045 : 0 : static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
1046 : : struct dma_fence *fence)
1047 : : {
1048 : 0 : return __intel_timeline_sync_is_later(tl,
1049 : : fence->context,
1050 : : fence->seqno - 1);
1051 : : }
1052 : :
1053 : 0 : static int intel_timeline_sync_set_start(struct intel_timeline *tl,
1054 : : const struct dma_fence *fence)
1055 : : {
1056 : 0 : return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
1057 : : }
1058 : :
1059 : : static int
1060 : 0 : __i915_request_await_execution(struct i915_request *to,
1061 : : struct i915_request *from,
1062 : : void (*hook)(struct i915_request *rq,
1063 : : struct dma_fence *signal))
1064 : : {
1065 : 0 : int err;
1066 : :
1067 : : /* Submit both requests at the same time */
1068 : 0 : err = __await_execution(to, from, hook, I915_FENCE_GFP);
1069 [ # # ]: 0 : if (err)
1070 : : return err;
1071 : :
1072 : : /* Squash repeated depenendices to the same timelines */
1073 [ # # ]: 0 : if (intel_timeline_sync_has_start(i915_request_timeline(to),
1074 : : &from->fence))
1075 : : return 0;
1076 : :
1077 : : /* Ensure both start together [after all semaphores in signal] */
1078 [ # # ]: 0 : if (intel_engine_has_semaphores(to->engine))
1079 : 0 : err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
1080 : : else
1081 : 0 : err = i915_request_await_start(to, from);
1082 [ # # ]: 0 : if (err < 0)
1083 : : return err;
1084 : :
1085 : : /* Couple the dependency tree for PI on this exposed to->fence */
1086 [ # # ]: 0 : if (to->engine->schedule) {
1087 : 0 : err = i915_sched_node_add_dependency(&to->sched, &from->sched);
1088 [ # # ]: 0 : if (err < 0)
1089 : : return err;
1090 : : }
1091 : :
1092 : 0 : return intel_timeline_sync_set_start(i915_request_timeline(to),
1093 : : &from->fence);
1094 : : }
1095 : :
1096 : : int
1097 : 0 : i915_request_await_execution(struct i915_request *rq,
1098 : : struct dma_fence *fence,
1099 : : void (*hook)(struct i915_request *rq,
1100 : : struct dma_fence *signal))
1101 : : {
1102 : 0 : struct dma_fence **child = &fence;
1103 : 0 : unsigned int nchild = 1;
1104 : 0 : int ret;
1105 : :
1106 [ # # ]: 0 : if (dma_fence_is_array(fence)) {
1107 : 0 : struct dma_fence_array *array = to_dma_fence_array(fence);
1108 : :
1109 : : /* XXX Error for signal-on-any fence arrays */
1110 : :
1111 : 0 : child = array->fences;
1112 : 0 : nchild = array->num_fences;
1113 : 0 : GEM_BUG_ON(!nchild);
1114 : : }
1115 : :
1116 : 0 : do {
1117 : 0 : fence = *child++;
1118 [ # # ]: 0 : if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
1119 : 0 : i915_sw_fence_set_error_once(&rq->submit, fence->error);
1120 : 0 : continue;
1121 : : }
1122 : :
1123 : : /*
1124 : : * We don't squash repeated fence dependencies here as we
1125 : : * want to run our callback in all cases.
1126 : : */
1127 : :
1128 [ # # ]: 0 : if (dma_fence_is_i915(fence))
1129 : 0 : ret = __i915_request_await_execution(rq,
1130 : : to_request(fence),
1131 : : hook);
1132 : : else
1133 : 0 : ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
1134 : : I915_FENCE_TIMEOUT,
1135 : : GFP_KERNEL);
1136 [ # # ]: 0 : if (ret < 0)
1137 : 0 : return ret;
1138 [ # # ]: 0 : } while (--nchild);
1139 : :
1140 : : return 0;
1141 : : }
1142 : :
1143 : : /**
1144 : : * i915_request_await_object - set this request to (async) wait upon a bo
1145 : : * @to: request we are wishing to use
1146 : : * @obj: object which may be in use on another ring.
1147 : : * @write: whether the wait is on behalf of a writer
1148 : : *
1149 : : * This code is meant to abstract object synchronization with the GPU.
1150 : : * Conceptually we serialise writes between engines inside the GPU.
1151 : : * We only allow one engine to write into a buffer at any time, but
1152 : : * multiple readers. To ensure each has a coherent view of memory, we must:
1153 : : *
1154 : : * - If there is an outstanding write request to the object, the new
1155 : : * request must wait for it to complete (either CPU or in hw, requests
1156 : : * on the same ring will be naturally ordered).
1157 : : *
1158 : : * - If we are a write request (pending_write_domain is set), the new
1159 : : * request must wait for outstanding read requests to complete.
1160 : : *
1161 : : * Returns 0 if successful, else propagates up the lower layer error.
1162 : : */
1163 : : int
1164 : 0 : i915_request_await_object(struct i915_request *to,
1165 : : struct drm_i915_gem_object *obj,
1166 : : bool write)
1167 : : {
1168 : 0 : struct dma_fence *excl;
1169 : 0 : int ret = 0;
1170 : :
1171 [ # # ]: 0 : if (write) {
1172 : 0 : struct dma_fence **shared;
1173 : 0 : unsigned int count, i;
1174 : :
1175 : 0 : ret = dma_resv_get_fences_rcu(obj->base.resv,
1176 : : &excl, &count, &shared);
1177 [ # # ]: 0 : if (ret)
1178 : 0 : return ret;
1179 : :
1180 [ # # ]: 0 : for (i = 0; i < count; i++) {
1181 : 0 : ret = i915_request_await_dma_fence(to, shared[i]);
1182 [ # # ]: 0 : if (ret)
1183 : : break;
1184 : :
1185 : 0 : dma_fence_put(shared[i]);
1186 : : }
1187 : :
1188 [ # # ]: 0 : for (; i < count; i++)
1189 : 0 : dma_fence_put(shared[i]);
1190 : 0 : kfree(shared);
1191 : : } else {
1192 [ # # ]: 0 : excl = dma_resv_get_excl_rcu(obj->base.resv);
1193 : : }
1194 : :
1195 [ # # ]: 0 : if (excl) {
1196 [ # # ]: 0 : if (ret == 0)
1197 : 0 : ret = i915_request_await_dma_fence(to, excl);
1198 : :
1199 : 0 : dma_fence_put(excl);
1200 : : }
1201 : :
1202 : : return ret;
1203 : : }
1204 : :
1205 : 0 : void i915_request_skip(struct i915_request *rq, int error)
1206 : : {
1207 : 0 : void *vaddr = rq->ring->vaddr;
1208 : 0 : u32 head;
1209 : :
1210 : 0 : GEM_BUG_ON(!IS_ERR_VALUE((long)error));
1211 : 0 : dma_fence_set_error(&rq->fence, error);
1212 : :
1213 [ # # ]: 0 : if (rq->infix == rq->postfix)
1214 : : return;
1215 : :
1216 : : /*
1217 : : * As this request likely depends on state from the lost
1218 : : * context, clear out all the user operations leaving the
1219 : : * breadcrumb at the end (so we get the fence notifications).
1220 : : */
1221 : 0 : head = rq->infix;
1222 [ # # ]: 0 : if (rq->postfix < head) {
1223 : 0 : memset(vaddr + head, 0, rq->ring->size - head);
1224 : 0 : head = 0;
1225 : : }
1226 : 0 : memset(vaddr + head, 0, rq->postfix - head);
1227 : 0 : rq->infix = rq->postfix;
1228 : : }
1229 : :
1230 : : static struct i915_request *
1231 : 0 : __i915_request_add_to_timeline(struct i915_request *rq)
1232 : : {
1233 : 0 : struct intel_timeline *timeline = i915_request_timeline(rq);
1234 : 0 : struct i915_request *prev;
1235 : :
1236 : : /*
1237 : : * Dependency tracking and request ordering along the timeline
1238 : : * is special cased so that we can eliminate redundant ordering
1239 : : * operations while building the request (we know that the timeline
1240 : : * itself is ordered, and here we guarantee it).
1241 : : *
1242 : : * As we know we will need to emit tracking along the timeline,
1243 : : * we embed the hooks into our request struct -- at the cost of
1244 : : * having to have specialised no-allocation interfaces (which will
1245 : : * be beneficial elsewhere).
1246 : : *
1247 : : * A second benefit to open-coding i915_request_await_request is
1248 : : * that we can apply a slight variant of the rules specialised
1249 : : * for timelines that jump between engines (such as virtual engines).
1250 : : * If we consider the case of virtual engine, we must emit a dma-fence
1251 : : * to prevent scheduling of the second request until the first is
1252 : : * complete (to maximise our greedy late load balancing) and this
1253 : : * precludes optimising to use semaphores serialisation of a single
1254 : : * timeline across engines.
1255 : : */
1256 : 0 : prev = to_request(__i915_active_fence_set(&timeline->last_request,
1257 : : &rq->fence));
1258 [ # # # # ]: 0 : if (prev && !i915_request_completed(prev)) {
1259 [ # # # # ]: 0 : if (is_power_of_2(prev->engine->mask | rq->engine->mask))
1260 : 0 : i915_sw_fence_await_sw_fence(&rq->submit,
1261 : : &prev->submit,
1262 : : &rq->submitq);
1263 : : else
1264 : 0 : __i915_sw_fence_await_dma_fence(&rq->submit,
1265 : : &prev->fence,
1266 : : &rq->dmaq);
1267 [ # # ]: 0 : if (rq->engine->schedule)
1268 : 0 : __i915_sched_node_add_dependency(&rq->sched,
1269 : : &prev->sched,
1270 : : &rq->dep,
1271 : : 0);
1272 : : }
1273 : :
1274 : : /*
1275 : : * Make sure that no request gazumped us - if it was allocated after
1276 : : * our i915_request_alloc() and called __i915_request_add() before
1277 : : * us, the timeline will hold its seqno which is later than ours.
1278 : : */
1279 : 0 : GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
1280 : :
1281 : 0 : return prev;
1282 : : }
1283 : :
1284 : : /*
1285 : : * NB: This function is not allowed to fail. Doing so would mean the the
1286 : : * request is not being tracked for completion but the work itself is
1287 : : * going to happen on the hardware. This would be a Bad Thing(tm).
1288 : : */
1289 : 0 : struct i915_request *__i915_request_commit(struct i915_request *rq)
1290 : : {
1291 : 0 : struct intel_engine_cs *engine = rq->engine;
1292 : 0 : struct intel_ring *ring = rq->ring;
1293 : 0 : u32 *cs;
1294 : :
1295 : 0 : RQ_TRACE(rq, "\n");
1296 : :
1297 : : /*
1298 : : * To ensure that this call will not fail, space for its emissions
1299 : : * should already have been reserved in the ring buffer. Let the ring
1300 : : * know that it is time to use that space up.
1301 : : */
1302 : 0 : GEM_BUG_ON(rq->reserved_space > ring->space);
1303 : 0 : rq->reserved_space = 0;
1304 : 0 : rq->emitted_jiffies = jiffies;
1305 : :
1306 : : /*
1307 : : * Record the position of the start of the breadcrumb so that
1308 : : * should we detect the updated seqno part-way through the
1309 : : * GPU processing the request, we never over-estimate the
1310 : : * position of the ring's HEAD.
1311 : : */
1312 : 0 : cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
1313 : 0 : GEM_BUG_ON(IS_ERR(cs));
1314 : 0 : rq->postfix = intel_ring_offset(rq, cs);
1315 : :
1316 : 0 : return __i915_request_add_to_timeline(rq);
1317 : : }
1318 : :
1319 : 0 : void __i915_request_queue(struct i915_request *rq,
1320 : : const struct i915_sched_attr *attr)
1321 : : {
1322 : : /*
1323 : : * Let the backend know a new request has arrived that may need
1324 : : * to adjust the existing execution schedule due to a high priority
1325 : : * request - i.e. we may want to preempt the current request in order
1326 : : * to run a high priority dependency chain *before* we can execute this
1327 : : * request.
1328 : : *
1329 : : * This is called before the request is ready to run so that we can
1330 : : * decide whether to preempt the entire chain so that it is ready to
1331 : : * run at the earliest possible convenience.
1332 : : */
1333 [ # # # # ]: 0 : if (attr && rq->engine->schedule)
1334 : 0 : rq->engine->schedule(rq, attr);
1335 : 0 : i915_sw_fence_commit(&rq->semaphore);
1336 : 0 : i915_sw_fence_commit(&rq->submit);
1337 : 0 : }
1338 : :
1339 : 0 : void i915_request_add(struct i915_request *rq)
1340 : : {
1341 : 0 : struct intel_timeline * const tl = i915_request_timeline(rq);
1342 : 0 : struct i915_sched_attr attr = {};
1343 : 0 : struct i915_request *prev;
1344 : :
1345 : 0 : lockdep_assert_held(&tl->mutex);
1346 : 0 : lockdep_unpin_lock(&tl->mutex, rq->cookie);
1347 : :
1348 : 0 : trace_i915_request_add(rq);
1349 : :
1350 : 0 : prev = __i915_request_commit(rq);
1351 : :
1352 [ # # ]: 0 : if (rcu_access_pointer(rq->context->gem_context))
1353 : 0 : attr = i915_request_gem_context(rq)->sched;
1354 : :
1355 : : /*
1356 : : * Boost actual workloads past semaphores!
1357 : : *
1358 : : * With semaphores we spin on one engine waiting for another,
1359 : : * simply to reduce the latency of starting our work when
1360 : : * the signaler completes. However, if there is any other
1361 : : * work that we could be doing on this engine instead, that
1362 : : * is better utilisation and will reduce the overall duration
1363 : : * of the current work. To avoid PI boosting a semaphore
1364 : : * far in the distance past over useful work, we keep a history
1365 : : * of any semaphore use along our dependency chain.
1366 : : */
1367 [ # # ]: 0 : if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
1368 : 0 : attr.priority |= I915_PRIORITY_NOSEMAPHORE;
1369 : :
1370 : : /*
1371 : : * Boost priorities to new clients (new request flows).
1372 : : *
1373 : : * Allow interactive/synchronous clients to jump ahead of
1374 : : * the bulk clients. (FQ_CODEL)
1375 : : */
1376 [ # # ]: 0 : if (list_empty(&rq->sched.signalers_list))
1377 : 0 : attr.priority |= I915_PRIORITY_WAIT;
1378 : :
1379 : 0 : local_bh_disable();
1380 : 0 : __i915_request_queue(rq, &attr);
1381 : 0 : local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
1382 : :
1383 : : /*
1384 : : * In typical scenarios, we do not expect the previous request on
1385 : : * the timeline to be still tracked by timeline->last_request if it
1386 : : * has been completed. If the completed request is still here, that
1387 : : * implies that request retirement is a long way behind submission,
1388 : : * suggesting that we haven't been retiring frequently enough from
1389 : : * the combination of retire-before-alloc, waiters and the background
1390 : : * retirement worker. So if the last request on this timeline was
1391 : : * already completed, do a catch up pass, flushing the retirement queue
1392 : : * up to this client. Since we have now moved the heaviest operations
1393 : : * during retirement onto secondary workers, such as freeing objects
1394 : : * or contexts, retiring a bunch of requests is mostly list management
1395 : : * (and cache misses), and so we should not be overly penalizing this
1396 : : * client by performing excess work, though we may still performing
1397 : : * work on behalf of others -- but instead we should benefit from
1398 : : * improved resource management. (Well, that's the theory at least.)
1399 : : */
1400 [ # # # # ]: 0 : if (prev &&
1401 [ # # ]: 0 : i915_request_completed(prev) &&
1402 [ # # ]: 0 : rcu_access_pointer(prev->timeline) == tl)
1403 : 0 : i915_request_retire_upto(prev);
1404 : :
1405 : 0 : mutex_unlock(&tl->mutex);
1406 : 0 : }
1407 : :
1408 : 0 : static unsigned long local_clock_us(unsigned int *cpu)
1409 : : {
1410 : 0 : unsigned long t;
1411 : :
1412 : : /*
1413 : : * Cheaply and approximately convert from nanoseconds to microseconds.
1414 : : * The result and subsequent calculations are also defined in the same
1415 : : * approximate microseconds units. The principal source of timing
1416 : : * error here is from the simple truncation.
1417 : : *
1418 : : * Note that local_clock() is only defined wrt to the current CPU;
1419 : : * the comparisons are no longer valid if we switch CPUs. Instead of
1420 : : * blocking preemption for the entire busywait, we can detect the CPU
1421 : : * switch and use that as indicator of system load and a reason to
1422 : : * stop busywaiting, see busywait_stop().
1423 : : */
1424 : 0 : *cpu = get_cpu();
1425 : 0 : t = local_clock() >> 10;
1426 : 0 : put_cpu();
1427 : :
1428 : 0 : return t;
1429 : : }
1430 : :
1431 : 0 : static bool busywait_stop(unsigned long timeout, unsigned int cpu)
1432 : : {
1433 : 0 : unsigned int this_cpu;
1434 : :
1435 [ # # ]: 0 : if (time_after(local_clock_us(&this_cpu), timeout))
1436 : : return true;
1437 : :
1438 : 0 : return this_cpu != cpu;
1439 : : }
1440 : :
1441 : 0 : static bool __i915_spin_request(const struct i915_request * const rq,
1442 : : int state, unsigned long timeout_us)
1443 : : {
1444 : 0 : unsigned int cpu;
1445 : :
1446 : : /*
1447 : : * Only wait for the request if we know it is likely to complete.
1448 : : *
1449 : : * We don't track the timestamps around requests, nor the average
1450 : : * request length, so we do not have a good indicator that this
1451 : : * request will complete within the timeout. What we do know is the
1452 : : * order in which requests are executed by the context and so we can
1453 : : * tell if the request has been started. If the request is not even
1454 : : * running yet, it is a fair assumption that it will not complete
1455 : : * within our relatively short timeout.
1456 : : */
1457 [ # # ]: 0 : if (!i915_request_is_running(rq))
1458 : : return false;
1459 : :
1460 : : /*
1461 : : * When waiting for high frequency requests, e.g. during synchronous
1462 : : * rendering split between the CPU and GPU, the finite amount of time
1463 : : * required to set up the irq and wait upon it limits the response
1464 : : * rate. By busywaiting on the request completion for a short while we
1465 : : * can service the high frequency waits as quick as possible. However,
1466 : : * if it is a slow request, we want to sleep as quickly as possible.
1467 : : * The tradeoff between waiting and sleeping is roughly the time it
1468 : : * takes to sleep on a request, on the order of a microsecond.
1469 : : */
1470 : :
1471 : 0 : timeout_us += local_clock_us(&cpu);
1472 : 0 : do {
1473 [ # # ]: 0 : if (i915_request_completed(rq))
1474 : : return true;
1475 : :
1476 [ # # ]: 0 : if (signal_pending_state(state, current))
1477 : : break;
1478 : :
1479 [ # # ]: 0 : if (busywait_stop(timeout_us, cpu))
1480 : : break;
1481 : :
1482 : 0 : cpu_relax();
1483 [ # # ]: 0 : } while (!need_resched());
1484 : :
1485 : : return false;
1486 : : }
1487 : :
1488 : : struct request_wait {
1489 : : struct dma_fence_cb cb;
1490 : : struct task_struct *tsk;
1491 : : };
1492 : :
1493 : 0 : static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
1494 : : {
1495 : 0 : struct request_wait *wait = container_of(cb, typeof(*wait), cb);
1496 : :
1497 : 0 : wake_up_process(wait->tsk);
1498 : 0 : }
1499 : :
1500 : : /**
1501 : : * i915_request_wait - wait until execution of request has finished
1502 : : * @rq: the request to wait upon
1503 : : * @flags: how to wait
1504 : : * @timeout: how long to wait in jiffies
1505 : : *
1506 : : * i915_request_wait() waits for the request to be completed, for a
1507 : : * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
1508 : : * unbounded wait).
1509 : : *
1510 : : * Returns the remaining time (in jiffies) if the request completed, which may
1511 : : * be zero or -ETIME if the request is unfinished after the timeout expires.
1512 : : * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
1513 : : * pending before the request completes.
1514 : : */
1515 : 0 : long i915_request_wait(struct i915_request *rq,
1516 : : unsigned int flags,
1517 : : long timeout)
1518 : : {
1519 : 0 : const int state = flags & I915_WAIT_INTERRUPTIBLE ?
1520 : 0 : TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
1521 : 0 : struct request_wait wait;
1522 : :
1523 : 0 : might_sleep();
1524 : 0 : GEM_BUG_ON(timeout < 0);
1525 : :
1526 [ # # ]: 0 : if (dma_fence_is_signaled(&rq->fence))
1527 : : return timeout;
1528 : :
1529 [ # # ]: 0 : if (!timeout)
1530 : : return -ETIME;
1531 : :
1532 : 0 : trace_i915_request_wait_begin(rq, flags);
1533 : :
1534 : : /*
1535 : : * We must never wait on the GPU while holding a lock as we
1536 : : * may need to perform a GPU reset. So while we don't need to
1537 : : * serialise wait/reset with an explicit lock, we do want
1538 : : * lockdep to detect potential dependency cycles.
1539 : : */
1540 : 0 : mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_);
1541 : :
1542 : : /*
1543 : : * Optimistic spin before touching IRQs.
1544 : : *
1545 : : * We may use a rather large value here to offset the penalty of
1546 : : * switching away from the active task. Frequently, the client will
1547 : : * wait upon an old swapbuffer to throttle itself to remain within a
1548 : : * frame of the gpu. If the client is running in lockstep with the gpu,
1549 : : * then it should not be waiting long at all, and a sleep now will incur
1550 : : * extra scheduler latency in producing the next frame. To try to
1551 : : * avoid adding the cost of enabling/disabling the interrupt to the
1552 : : * short wait, we first spin to see if the request would have completed
1553 : : * in the time taken to setup the interrupt.
1554 : : *
1555 : : * We need upto 5us to enable the irq, and upto 20us to hide the
1556 : : * scheduler latency of a context switch, ignoring the secondary
1557 : : * impacts from a context switch such as cache eviction.
1558 : : *
1559 : : * The scheme used for low-latency IO is called "hybrid interrupt
1560 : : * polling". The suggestion there is to sleep until just before you
1561 : : * expect to be woken by the device interrupt and then poll for its
1562 : : * completion. That requires having a good predictor for the request
1563 : : * duration, which we currently lack.
1564 : : */
1565 [ # # ]: 0 : if (IS_ACTIVE(CONFIG_DRM_I915_SPIN_REQUEST) &&
1566 : 0 : __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) {
1567 : 0 : dma_fence_signal(&rq->fence);
1568 : 0 : goto out;
1569 : : }
1570 : :
1571 : : /*
1572 : : * This client is about to stall waiting for the GPU. In many cases
1573 : : * this is undesirable and limits the throughput of the system, as
1574 : : * many clients cannot continue processing user input/output whilst
1575 : : * blocked. RPS autotuning may take tens of milliseconds to respond
1576 : : * to the GPU load and thus incurs additional latency for the client.
1577 : : * We can circumvent that by promoting the GPU frequency to maximum
1578 : : * before we sleep. This makes the GPU throttle up much more quickly
1579 : : * (good for benchmarks and user experience, e.g. window animations),
1580 : : * but at a cost of spending more power processing the workload
1581 : : * (bad for battery).
1582 : : */
1583 [ # # ]: 0 : if (flags & I915_WAIT_PRIORITY) {
1584 [ # # # # ]: 0 : if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
1585 : 0 : intel_rps_boost(rq);
1586 : 0 : i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
1587 : : }
1588 : :
1589 : 0 : wait.tsk = current;
1590 [ # # ]: 0 : if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
1591 : 0 : goto out;
1592 : :
1593 : 0 : for (;;) {
1594 : 0 : set_current_state(state);
1595 : :
1596 [ # # ]: 0 : if (i915_request_completed(rq)) {
1597 : 0 : dma_fence_signal(&rq->fence);
1598 : 0 : break;
1599 : : }
1600 : :
1601 [ # # ]: 0 : if (signal_pending_state(state, current)) {
1602 : : timeout = -ERESTARTSYS;
1603 : : break;
1604 : : }
1605 : :
1606 [ # # ]: 0 : if (!timeout) {
1607 : : timeout = -ETIME;
1608 : : break;
1609 : : }
1610 : :
1611 : 0 : intel_engine_flush_submission(rq->engine);
1612 : 0 : timeout = io_schedule_timeout(timeout);
1613 : : }
1614 : 0 : __set_current_state(TASK_RUNNING);
1615 : :
1616 : 0 : dma_fence_remove_callback(&rq->fence, &wait.cb);
1617 : :
1618 : 0 : out:
1619 : 0 : mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
1620 : 0 : trace_i915_request_wait_end(rq);
1621 : 0 : return timeout;
1622 : : }
1623 : :
1624 : : #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1625 : : #include "selftests/mock_request.c"
1626 : : #include "selftests/i915_request.c"
1627 : : #endif
1628 : :
1629 : 0 : static void i915_global_request_shrink(void)
1630 : : {
1631 : 0 : kmem_cache_shrink(global.slab_dependencies);
1632 : 0 : kmem_cache_shrink(global.slab_execute_cbs);
1633 : 0 : kmem_cache_shrink(global.slab_requests);
1634 : 0 : }
1635 : :
1636 : 0 : static void i915_global_request_exit(void)
1637 : : {
1638 : 0 : kmem_cache_destroy(global.slab_dependencies);
1639 : 0 : kmem_cache_destroy(global.slab_execute_cbs);
1640 : 0 : kmem_cache_destroy(global.slab_requests);
1641 : 0 : }
1642 : :
1643 : : static struct i915_global_request global = { {
1644 : : .shrink = i915_global_request_shrink,
1645 : : .exit = i915_global_request_exit,
1646 : : } };
1647 : :
1648 : 13 : int __init i915_global_request_init(void)
1649 : : {
1650 : 26 : global.slab_requests =
1651 : 13 : kmem_cache_create("i915_request",
1652 : : sizeof(struct i915_request),
1653 : : __alignof__(struct i915_request),
1654 : : SLAB_HWCACHE_ALIGN |
1655 : : SLAB_RECLAIM_ACCOUNT |
1656 : : SLAB_TYPESAFE_BY_RCU,
1657 : : __i915_request_ctor);
1658 [ + - ]: 13 : if (!global.slab_requests)
1659 : : return -ENOMEM;
1660 : :
1661 : 13 : global.slab_execute_cbs = KMEM_CACHE(execute_cb,
1662 : : SLAB_HWCACHE_ALIGN |
1663 : : SLAB_RECLAIM_ACCOUNT |
1664 : : SLAB_TYPESAFE_BY_RCU);
1665 [ - + ]: 13 : if (!global.slab_execute_cbs)
1666 : 0 : goto err_requests;
1667 : :
1668 : 13 : global.slab_dependencies = KMEM_CACHE(i915_dependency,
1669 : : SLAB_HWCACHE_ALIGN |
1670 : : SLAB_RECLAIM_ACCOUNT);
1671 [ - + ]: 13 : if (!global.slab_dependencies)
1672 : 0 : goto err_execute_cbs;
1673 : :
1674 : 13 : i915_global_register(&global.base);
1675 : 13 : return 0;
1676 : :
1677 : : err_execute_cbs:
1678 : 0 : kmem_cache_destroy(global.slab_execute_cbs);
1679 : 0 : err_requests:
1680 : 0 : kmem_cache_destroy(global.slab_requests);
1681 : 0 : return -ENOMEM;
1682 : : }
|