Branch data Line data Source code
1 : : /*
2 : : * SPDX-License-Identifier: MIT
3 : : *
4 : : * Copyright © 2019 Intel Corporation
5 : : */
6 : :
7 : : #include "i915_drv.h"
8 : :
9 : : #include "intel_context.h"
10 : : #include "intel_engine.h"
11 : : #include "intel_engine_heartbeat.h"
12 : : #include "intel_engine_pm.h"
13 : : #include "intel_engine_pool.h"
14 : : #include "intel_gt.h"
15 : : #include "intel_gt_pm.h"
16 : : #include "intel_rc6.h"
17 : : #include "intel_ring.h"
18 : :
19 : 0 : static int __engine_unpark(struct intel_wakeref *wf)
20 : : {
21 : 0 : struct intel_engine_cs *engine =
22 : 0 : container_of(wf, typeof(*engine), wakeref);
23 : 0 : struct intel_context *ce;
24 : 0 : void *map;
25 : :
26 : 0 : ENGINE_TRACE(engine, "\n");
27 : :
28 : 0 : intel_gt_pm_get(engine->gt);
29 : :
30 : : /* Pin the default state for fast resets from atomic context. */
31 : 0 : map = NULL;
32 [ # # ]: 0 : if (engine->default_state)
33 : 0 : map = i915_gem_object_pin_map(engine->default_state,
34 : : I915_MAP_WB);
35 [ # # # # ]: 0 : if (!IS_ERR_OR_NULL(map))
36 : 0 : engine->pinned_default_state = map;
37 : :
38 : : /* Discard stale context state from across idling */
39 : 0 : ce = engine->kernel_context;
40 [ # # ]: 0 : if (ce) {
41 : 0 : GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags));
42 : :
43 : : /* First poison the image to verify we never fully trust it */
44 : 0 : if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
45 : : struct drm_i915_gem_object *obj = ce->state->obj;
46 : : int type = i915_coherent_map_type(engine->i915);
47 : :
48 : : map = i915_gem_object_pin_map(obj, type);
49 : : if (!IS_ERR(map)) {
50 : : memset(map, CONTEXT_REDZONE, obj->base.size);
51 : : i915_gem_object_flush_map(obj);
52 : : i915_gem_object_unpin_map(obj);
53 : : }
54 : : }
55 : :
56 : 0 : ce->ops->reset(ce);
57 : : }
58 : :
59 [ # # ]: 0 : if (engine->unpark)
60 : 0 : engine->unpark(engine);
61 : :
62 : 0 : intel_engine_unpark_heartbeat(engine);
63 : 0 : return 0;
64 : : }
65 : :
66 : : #if IS_ENABLED(CONFIG_LOCKDEP)
67 : :
68 : : static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
69 : : {
70 : : unsigned long flags;
71 : :
72 : : local_irq_save(flags);
73 : : mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
74 : :
75 : : return flags;
76 : : }
77 : :
78 : : static inline void __timeline_mark_unlock(struct intel_context *ce,
79 : : unsigned long flags)
80 : : {
81 : : mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
82 : : local_irq_restore(flags);
83 : : }
84 : :
85 : : #else
86 : :
87 : 0 : static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
88 : : {
89 : 0 : return 0;
90 : : }
91 : :
92 : : static inline void __timeline_mark_unlock(struct intel_context *ce,
93 : : unsigned long flags)
94 : : {
95 : : }
96 : :
97 : : #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
98 : :
99 : 0 : static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
100 : : {
101 [ # # ]: 0 : struct i915_request *rq = to_request(fence);
102 : :
103 : 0 : ewma__engine_latency_add(&rq->engine->latency,
104 [ # # ]: 0 : ktime_us_delta(rq->fence.timestamp,
105 : : rq->duration.emitted));
106 : 0 : }
107 : :
108 : : static void
109 : 0 : __queue_and_release_pm(struct i915_request *rq,
110 : : struct intel_timeline *tl,
111 : : struct intel_engine_cs *engine)
112 : : {
113 : 0 : struct intel_gt_timelines *timelines = &engine->gt->timelines;
114 : :
115 : 0 : ENGINE_TRACE(engine, "\n");
116 : :
117 : : /*
118 : : * We have to serialise all potential retirement paths with our
119 : : * submission, as we don't want to underflow either the
120 : : * engine->wakeref.counter or our timeline->active_count.
121 : : *
122 : : * Equally, we cannot allow a new submission to start until
123 : : * after we finish queueing, nor could we allow that submitter
124 : : * to retire us before we are ready!
125 : : */
126 : 0 : spin_lock(&timelines->lock);
127 : :
128 : : /* Let intel_gt_retire_requests() retire us (acquired under lock) */
129 [ # # ]: 0 : if (!atomic_fetch_inc(&tl->active_count))
130 : 0 : list_add_tail(&tl->link, &timelines->active_list);
131 : :
132 : : /* Hand the request over to HW and so engine_retire() */
133 : 0 : __i915_request_queue(rq, NULL);
134 : :
135 : : /* Let new submissions commence (and maybe retire this timeline) */
136 : 0 : __intel_wakeref_defer_park(&engine->wakeref);
137 : :
138 : 0 : spin_unlock(&timelines->lock);
139 : 0 : }
140 : :
141 : 0 : static bool switch_to_kernel_context(struct intel_engine_cs *engine)
142 : : {
143 : 0 : struct intel_context *ce = engine->kernel_context;
144 : 0 : struct i915_request *rq;
145 : 0 : unsigned long flags;
146 : 0 : bool result = true;
147 : :
148 : : /* GPU is pointing to the void, as good as in the kernel context. */
149 [ # # ]: 0 : if (intel_gt_is_wedged(engine->gt))
150 : : return true;
151 : :
152 : 0 : GEM_BUG_ON(!intel_context_is_barrier(ce));
153 : :
154 : : /* Already inside the kernel context, safe to power down. */
155 [ # # ]: 0 : if (engine->wakeref_serial == engine->serial)
156 : : return true;
157 : :
158 : : /*
159 : : * Note, we do this without taking the timeline->mutex. We cannot
160 : : * as we may be called while retiring the kernel context and so
161 : : * already underneath the timeline->mutex. Instead we rely on the
162 : : * exclusive property of the __engine_park that prevents anyone
163 : : * else from creating a request on this engine. This also requires
164 : : * that the ring is empty and we avoid any waits while constructing
165 : : * the context, as they assume protection by the timeline->mutex.
166 : : * This should hold true as we can only park the engine after
167 : : * retiring the last request, thus all rings should be empty and
168 : : * all timelines idle.
169 : : *
170 : : * For unlocking, there are 2 other parties and the GPU who have a
171 : : * stake here.
172 : : *
173 : : * A new gpu user will be waiting on the engine-pm to start their
174 : : * engine_unpark. New waiters are predicated on engine->wakeref.count
175 : : * and so intel_wakeref_defer_park() acts like a mutex_unlock of the
176 : : * engine->wakeref.
177 : : *
178 : : * The other party is intel_gt_retire_requests(), which is walking the
179 : : * list of active timelines looking for completions. Meanwhile as soon
180 : : * as we call __i915_request_queue(), the GPU may complete our request.
181 : : * Ergo, if we put ourselves on the timelines.active_list
182 : : * (se intel_timeline_enter()) before we increment the
183 : : * engine->wakeref.count, we may see the request completion and retire
184 : : * it causing an undeflow of the engine->wakeref.
185 : : */
186 : 0 : flags = __timeline_mark_lock(ce);
187 : 0 : GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
188 : :
189 : 0 : rq = __i915_request_create(ce, GFP_NOWAIT);
190 [ # # ]: 0 : if (IS_ERR(rq))
191 : : /* Context switch failed, hope for the best! Maybe reset? */
192 : 0 : goto out_unlock;
193 : :
194 : : /* Check again on the next retirement. */
195 : 0 : engine->wakeref_serial = engine->serial + 1;
196 : 0 : i915_request_add_active_barriers(rq);
197 : :
198 : : /* Install ourselves as a preemption barrier */
199 : 0 : rq->sched.attr.priority = I915_PRIORITY_BARRIER;
200 [ # # ]: 0 : if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */
201 : : /*
202 : : * Use an interrupt for precise measurement of duration,
203 : : * otherwise we rely on someone else retiring all the requests
204 : : * which may delay the signaling (i.e. we will likely wait
205 : : * until the background request retirement running every
206 : : * second or two).
207 : : */
208 : 0 : BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq));
209 : 0 : dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration);
210 : 0 : rq->duration.emitted = ktime_get();
211 : : }
212 : :
213 : : /* Expose ourselves to the world */
214 : 0 : __queue_and_release_pm(rq, ce->timeline, engine);
215 : :
216 : 0 : result = false;
217 : : out_unlock:
218 : : __timeline_mark_unlock(ce, flags);
219 : : return result;
220 : : }
221 : :
222 : 0 : static void call_idle_barriers(struct intel_engine_cs *engine)
223 : : {
224 : 0 : struct llist_node *node, *next;
225 : :
226 [ # # ]: 0 : llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
227 : 0 : struct dma_fence_cb *cb =
228 : 0 : container_of((struct list_head *)node,
229 : : typeof(*cb), node);
230 : :
231 : 0 : cb->func(ERR_PTR(-EAGAIN), cb);
232 : : }
233 : 0 : }
234 : :
235 : 0 : static int __engine_park(struct intel_wakeref *wf)
236 : : {
237 : 0 : struct intel_engine_cs *engine =
238 : 0 : container_of(wf, typeof(*engine), wakeref);
239 : :
240 : 0 : engine->saturated = 0;
241 : :
242 : : /*
243 : : * If one and only one request is completed between pm events,
244 : : * we know that we are inside the kernel context and it is
245 : : * safe to power down. (We are paranoid in case that runtime
246 : : * suspend causes corruption to the active context image, and
247 : : * want to avoid that impacting userspace.)
248 : : */
249 [ # # ]: 0 : if (!switch_to_kernel_context(engine))
250 : : return -EBUSY;
251 : :
252 : 0 : ENGINE_TRACE(engine, "\n");
253 : :
254 : 0 : call_idle_barriers(engine); /* cleanup after wedging */
255 : :
256 : 0 : intel_engine_park_heartbeat(engine);
257 : 0 : intel_engine_disarm_breadcrumbs(engine);
258 : 0 : intel_engine_pool_park(&engine->pool);
259 : :
260 : : /* Must be reset upon idling, or we may miss the busy wakeup. */
261 : 0 : GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
262 : :
263 [ # # ]: 0 : if (engine->park)
264 : 0 : engine->park(engine);
265 : :
266 [ # # ]: 0 : if (engine->pinned_default_state) {
267 : 0 : i915_gem_object_unpin_map(engine->default_state);
268 : 0 : engine->pinned_default_state = NULL;
269 : : }
270 : :
271 : 0 : engine->execlists.no_priolist = false;
272 : :
273 : : /* While gt calls i915_vma_parked(), we have to break the lock cycle */
274 : 0 : intel_gt_pm_put_async(engine->gt);
275 : 0 : return 0;
276 : : }
277 : :
278 : : static const struct intel_wakeref_ops wf_ops = {
279 : : .get = __engine_unpark,
280 : : .put = __engine_park,
281 : : };
282 : :
283 : 0 : void intel_engine_init__pm(struct intel_engine_cs *engine)
284 : : {
285 : 0 : struct intel_runtime_pm *rpm = engine->uncore->rpm;
286 : :
287 : 0 : intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
288 : 0 : intel_engine_init_heartbeat(engine);
289 : 0 : }
290 : :
291 : : #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
292 : : #include "selftest_engine_pm.c"
293 : : #endif
|