LCOV - code coverage report
Current view: top level - drivers/gpu/drm/i915 - i915_request.c (source / functions) Hit Total Coverage
Test: combined.info Lines: 10 639 1.6 %
Date: 2022-04-01 14:35:51 Functions: 1 45 2.2 %
Branches: 3 300 1.0 %

           Branch data     Line data    Source code
       1                 :            : /*
       2                 :            :  * Copyright © 2008-2015 Intel Corporation
       3                 :            :  *
       4                 :            :  * Permission is hereby granted, free of charge, to any person obtaining a
       5                 :            :  * copy of this software and associated documentation files (the "Software"),
       6                 :            :  * to deal in the Software without restriction, including without limitation
       7                 :            :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
       8                 :            :  * and/or sell copies of the Software, and to permit persons to whom the
       9                 :            :  * Software is furnished to do so, subject to the following conditions:
      10                 :            :  *
      11                 :            :  * The above copyright notice and this permission notice (including the next
      12                 :            :  * paragraph) shall be included in all copies or substantial portions of the
      13                 :            :  * Software.
      14                 :            :  *
      15                 :            :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      16                 :            :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      17                 :            :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
      18                 :            :  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      19                 :            :  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      20                 :            :  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
      21                 :            :  * IN THE SOFTWARE.
      22                 :            :  *
      23                 :            :  */
      24                 :            : 
      25                 :            : #include <linux/dma-fence-array.h>
      26                 :            : #include <linux/irq_work.h>
      27                 :            : #include <linux/prefetch.h>
      28                 :            : #include <linux/sched.h>
      29                 :            : #include <linux/sched/clock.h>
      30                 :            : #include <linux/sched/signal.h>
      31                 :            : 
      32                 :            : #include "gem/i915_gem_context.h"
      33                 :            : #include "gt/intel_context.h"
      34                 :            : #include "gt/intel_ring.h"
      35                 :            : #include "gt/intel_rps.h"
      36                 :            : 
      37                 :            : #include "i915_active.h"
      38                 :            : #include "i915_drv.h"
      39                 :            : #include "i915_globals.h"
      40                 :            : #include "i915_trace.h"
      41                 :            : #include "intel_pm.h"
      42                 :            : 
      43                 :            : struct execute_cb {
      44                 :            :         struct list_head link;
      45                 :            :         struct irq_work work;
      46                 :            :         struct i915_sw_fence *fence;
      47                 :            :         void (*hook)(struct i915_request *rq, struct dma_fence *signal);
      48                 :            :         struct i915_request *signal;
      49                 :            : };
      50                 :            : 
      51                 :            : static struct i915_global_request {
      52                 :            :         struct i915_global base;
      53                 :            :         struct kmem_cache *slab_requests;
      54                 :            :         struct kmem_cache *slab_dependencies;
      55                 :            :         struct kmem_cache *slab_execute_cbs;
      56                 :            : } global;
      57                 :            : 
      58                 :          0 : static const char *i915_fence_get_driver_name(struct dma_fence *fence)
      59                 :            : {
      60         [ #  # ]:          0 :         return dev_name(to_request(fence)->i915->drm.dev);
      61                 :            : }
      62                 :            : 
      63                 :          0 : static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
      64                 :            : {
      65                 :          0 :         const struct i915_gem_context *ctx;
      66                 :            : 
      67                 :            :         /*
      68                 :            :          * The timeline struct (as part of the ppgtt underneath a context)
      69                 :            :          * may be freed when the request is no longer in use by the GPU.
      70                 :            :          * We could extend the life of a context to beyond that of all
      71                 :            :          * fences, possibly keeping the hw resource around indefinitely,
      72                 :            :          * or we just give them a false name. Since
      73                 :            :          * dma_fence_ops.get_timeline_name is a debug feature, the occasional
      74                 :            :          * lie seems justifiable.
      75                 :            :          */
      76         [ #  # ]:          0 :         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
      77                 :            :                 return "signaled";
      78                 :            : 
      79         [ #  # ]:          0 :         ctx = i915_request_gem_context(to_request(fence));
      80         [ #  # ]:          0 :         if (!ctx)
      81                 :            :                 return "[" DRIVER_NAME "]";
      82                 :            : 
      83                 :          0 :         return ctx->name;
      84                 :            : }
      85                 :            : 
      86                 :          0 : static bool i915_fence_signaled(struct dma_fence *fence)
      87                 :            : {
      88                 :          0 :         return i915_request_completed(to_request(fence));
      89                 :            : }
      90                 :            : 
      91                 :          0 : static bool i915_fence_enable_signaling(struct dma_fence *fence)
      92                 :            : {
      93                 :          0 :         return i915_request_enable_breadcrumb(to_request(fence));
      94                 :            : }
      95                 :            : 
      96                 :          0 : static signed long i915_fence_wait(struct dma_fence *fence,
      97                 :            :                                    bool interruptible,
      98                 :            :                                    signed long timeout)
      99                 :            : {
     100                 :          0 :         return i915_request_wait(to_request(fence),
     101                 :            :                                  interruptible | I915_WAIT_PRIORITY,
     102                 :            :                                  timeout);
     103                 :            : }
     104                 :            : 
     105                 :          0 : static void i915_fence_release(struct dma_fence *fence)
     106                 :            : {
     107                 :          0 :         struct i915_request *rq = to_request(fence);
     108                 :            : 
     109                 :            :         /*
     110                 :            :          * The request is put onto a RCU freelist (i.e. the address
     111                 :            :          * is immediately reused), mark the fences as being freed now.
     112                 :            :          * Otherwise the debugobjects for the fences are only marked as
     113                 :            :          * freed when the slab cache itself is freed, and so we would get
     114                 :            :          * caught trying to reuse dead objects.
     115                 :            :          */
     116                 :          0 :         i915_sw_fence_fini(&rq->submit);
     117                 :          0 :         i915_sw_fence_fini(&rq->semaphore);
     118                 :            : 
     119                 :          0 :         kmem_cache_free(global.slab_requests, rq);
     120                 :          0 : }
     121                 :            : 
     122                 :            : const struct dma_fence_ops i915_fence_ops = {
     123                 :            :         .get_driver_name = i915_fence_get_driver_name,
     124                 :            :         .get_timeline_name = i915_fence_get_timeline_name,
     125                 :            :         .enable_signaling = i915_fence_enable_signaling,
     126                 :            :         .signaled = i915_fence_signaled,
     127                 :            :         .wait = i915_fence_wait,
     128                 :            :         .release = i915_fence_release,
     129                 :            : };
     130                 :            : 
     131                 :          0 : static void irq_execute_cb(struct irq_work *wrk)
     132                 :            : {
     133                 :          0 :         struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
     134                 :            : 
     135                 :          0 :         i915_sw_fence_complete(cb->fence);
     136                 :          0 :         kmem_cache_free(global.slab_execute_cbs, cb);
     137                 :          0 : }
     138                 :            : 
     139                 :          0 : static void irq_execute_cb_hook(struct irq_work *wrk)
     140                 :            : {
     141                 :          0 :         struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
     142                 :            : 
     143                 :          0 :         cb->hook(container_of(cb->fence, struct i915_request, submit),
     144                 :          0 :                  &cb->signal->fence);
     145                 :          0 :         i915_request_put(cb->signal);
     146                 :            : 
     147                 :          0 :         irq_execute_cb(wrk);
     148                 :          0 : }
     149                 :            : 
     150                 :          0 : static void __notify_execute_cb(struct i915_request *rq)
     151                 :            : {
     152                 :          0 :         struct execute_cb *cb;
     153                 :            : 
     154                 :          0 :         lockdep_assert_held(&rq->lock);
     155                 :            : 
     156                 :          0 :         if (list_empty(&rq->execute_cb))
     157                 :            :                 return;
     158                 :            : 
     159   [ #  #  #  # ]:          0 :         list_for_each_entry(cb, &rq->execute_cb, link)
     160                 :          0 :                 irq_work_queue(&cb->work);
     161                 :            : 
     162                 :            :         /*
     163                 :            :          * XXX Rollback on __i915_request_unsubmit()
     164                 :            :          *
     165                 :            :          * In the future, perhaps when we have an active time-slicing scheduler,
     166                 :            :          * it will be interesting to unsubmit parallel execution and remove
     167                 :            :          * busywaits from the GPU until their master is restarted. This is
     168                 :            :          * quite hairy, we have to carefully rollback the fence and do a
     169                 :            :          * preempt-to-idle cycle on the target engine, all the while the
     170                 :            :          * master execute_cb may refire.
     171                 :            :          */
     172                 :          0 :         INIT_LIST_HEAD(&rq->execute_cb);
     173                 :            : }
     174                 :            : 
     175                 :            : static inline void
     176                 :          0 : remove_from_client(struct i915_request *request)
     177                 :            : {
     178                 :          0 :         struct drm_i915_file_private *file_priv;
     179                 :            : 
     180         [ #  # ]:          0 :         if (!READ_ONCE(request->file_priv))
     181                 :            :                 return;
     182                 :            : 
     183                 :          0 :         rcu_read_lock();
     184                 :          0 :         file_priv = xchg(&request->file_priv, NULL);
     185         [ #  # ]:          0 :         if (file_priv) {
     186                 :          0 :                 spin_lock(&file_priv->mm.lock);
     187                 :          0 :                 list_del(&request->client_link);
     188                 :          0 :                 spin_unlock(&file_priv->mm.lock);
     189                 :            :         }
     190                 :          0 :         rcu_read_unlock();
     191                 :            : }
     192                 :            : 
     193                 :          0 : static void free_capture_list(struct i915_request *request)
     194                 :            : {
     195                 :          0 :         struct i915_capture_list *capture;
     196                 :            : 
     197                 :          0 :         capture = fetch_and_zero(&request->capture_list);
     198         [ #  # ]:          0 :         while (capture) {
     199                 :          0 :                 struct i915_capture_list *next = capture->next;
     200                 :            : 
     201                 :          0 :                 kfree(capture);
     202                 :          0 :                 capture = next;
     203                 :            :         }
     204                 :            : }
     205                 :            : 
     206                 :          0 : static void remove_from_engine(struct i915_request *rq)
     207                 :            : {
     208                 :          0 :         struct intel_engine_cs *engine, *locked;
     209                 :            : 
     210                 :            :         /*
     211                 :            :          * Virtual engines complicate acquiring the engine timeline lock,
     212                 :            :          * as their rq->engine pointer is not stable until under that
     213                 :            :          * engine lock. The simple ploy we use is to take the lock then
     214                 :            :          * check that the rq still belongs to the newly locked engine.
     215                 :            :          */
     216                 :          0 :         locked = READ_ONCE(rq->engine);
     217                 :          0 :         spin_lock_irq(&locked->active.lock);
     218         [ #  # ]:          0 :         while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
     219                 :          0 :                 spin_unlock(&locked->active.lock);
     220                 :          0 :                 spin_lock(&engine->active.lock);
     221                 :          0 :                 locked = engine;
     222                 :            :         }
     223                 :          0 :         list_del_init(&rq->sched.link);
     224                 :          0 :         clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
     225                 :          0 :         clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
     226                 :          0 :         spin_unlock_irq(&locked->active.lock);
     227                 :          0 : }
     228                 :            : 
     229                 :          0 : bool i915_request_retire(struct i915_request *rq)
     230                 :            : {
     231         [ #  # ]:          0 :         if (!i915_request_completed(rq))
     232                 :            :                 return false;
     233                 :            : 
     234                 :          0 :         RQ_TRACE(rq, "\n");
     235                 :            : 
     236                 :          0 :         GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
     237                 :          0 :         trace_i915_request_retire(rq);
     238                 :            : 
     239                 :            :         /*
     240                 :            :          * We know the GPU must have read the request to have
     241                 :            :          * sent us the seqno + interrupt, so use the position
     242                 :            :          * of tail of the request to update the last known position
     243                 :            :          * of the GPU head.
     244                 :            :          *
     245                 :            :          * Note this requires that we are always called in request
     246                 :            :          * completion order.
     247                 :            :          */
     248                 :          0 :         GEM_BUG_ON(!list_is_first(&rq->link,
     249                 :            :                                   &i915_request_timeline(rq)->requests));
     250                 :          0 :         rq->ring->head = rq->postfix;
     251                 :            : 
     252                 :            :         /*
     253                 :            :          * We only loosely track inflight requests across preemption,
     254                 :            :          * and so we may find ourselves attempting to retire a _completed_
     255                 :            :          * request that we have removed from the HW and put back on a run
     256                 :            :          * queue.
     257                 :            :          */
     258                 :          0 :         remove_from_engine(rq);
     259                 :            : 
     260                 :          0 :         spin_lock_irq(&rq->lock);
     261                 :          0 :         i915_request_mark_complete(rq);
     262         [ #  # ]:          0 :         if (!i915_request_signaled(rq))
     263                 :          0 :                 dma_fence_signal_locked(&rq->fence);
     264         [ #  # ]:          0 :         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
     265                 :          0 :                 i915_request_cancel_breadcrumb(rq);
     266         [ #  # ]:          0 :         if (i915_request_has_waitboost(rq)) {
     267                 :          0 :                 GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
     268                 :          0 :                 atomic_dec(&rq->engine->gt->rps.num_waiters);
     269                 :            :         }
     270         [ #  # ]:          0 :         if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
     271                 :          0 :                 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
     272         [ #  # ]:          0 :                 __notify_execute_cb(rq);
     273                 :            :         }
     274                 :          0 :         GEM_BUG_ON(!list_empty(&rq->execute_cb));
     275                 :          0 :         spin_unlock_irq(&rq->lock);
     276                 :            : 
     277                 :          0 :         remove_from_client(rq);
     278         [ #  # ]:          0 :         list_del_rcu(&rq->link);
     279                 :            : 
     280         [ #  # ]:          0 :         intel_context_exit(rq->context);
     281                 :          0 :         intel_context_unpin(rq->context);
     282                 :            : 
     283                 :          0 :         free_capture_list(rq);
     284                 :          0 :         i915_sched_node_fini(&rq->sched);
     285                 :          0 :         i915_request_put(rq);
     286                 :            : 
     287                 :          0 :         return true;
     288                 :            : }
     289                 :            : 
     290                 :          0 : void i915_request_retire_upto(struct i915_request *rq)
     291                 :            : {
     292                 :          0 :         struct intel_timeline * const tl = i915_request_timeline(rq);
     293                 :          0 :         struct i915_request *tmp;
     294                 :            : 
     295                 :          0 :         RQ_TRACE(rq, "\n");
     296                 :            : 
     297                 :          0 :         GEM_BUG_ON(!i915_request_completed(rq));
     298                 :            : 
     299                 :          0 :         do {
     300                 :          0 :                 tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
     301   [ #  #  #  # ]:          0 :         } while (i915_request_retire(tmp) && tmp != rq);
     302                 :          0 : }
     303                 :            : 
     304                 :            : static int
     305                 :          0 : __await_execution(struct i915_request *rq,
     306                 :            :                   struct i915_request *signal,
     307                 :            :                   void (*hook)(struct i915_request *rq,
     308                 :            :                                struct dma_fence *signal),
     309                 :            :                   gfp_t gfp)
     310                 :            : {
     311                 :          0 :         struct execute_cb *cb;
     312                 :            : 
     313         [ #  # ]:          0 :         if (i915_request_is_active(signal)) {
     314         [ #  # ]:          0 :                 if (hook)
     315                 :          0 :                         hook(rq, &signal->fence);
     316                 :          0 :                 return 0;
     317                 :            :         }
     318                 :            : 
     319                 :          0 :         cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
     320         [ #  # ]:          0 :         if (!cb)
     321                 :            :                 return -ENOMEM;
     322                 :            : 
     323                 :          0 :         cb->fence = &rq->submit;
     324                 :          0 :         i915_sw_fence_await(cb->fence);
     325                 :          0 :         init_irq_work(&cb->work, irq_execute_cb);
     326                 :            : 
     327         [ #  # ]:          0 :         if (hook) {
     328                 :          0 :                 cb->hook = hook;
     329         [ #  # ]:          0 :                 cb->signal = i915_request_get(signal);
     330                 :          0 :                 cb->work.func = irq_execute_cb_hook;
     331                 :            :         }
     332                 :            : 
     333                 :          0 :         spin_lock_irq(&signal->lock);
     334         [ #  # ]:          0 :         if (i915_request_is_active(signal)) {
     335         [ #  # ]:          0 :                 if (hook) {
     336                 :          0 :                         hook(rq, &signal->fence);
     337                 :          0 :                         i915_request_put(signal);
     338                 :            :                 }
     339                 :          0 :                 i915_sw_fence_complete(cb->fence);
     340                 :          0 :                 kmem_cache_free(global.slab_execute_cbs, cb);
     341                 :            :         } else {
     342                 :          0 :                 list_add_tail(&cb->link, &signal->execute_cb);
     343                 :            :         }
     344                 :          0 :         spin_unlock_irq(&signal->lock);
     345                 :            : 
     346                 :            :         /* Copy across semaphore status as we need the same behaviour */
     347                 :          0 :         rq->sched.flags |= signal->sched.flags;
     348                 :          0 :         return 0;
     349                 :            : }
     350                 :            : 
     351                 :          0 : bool __i915_request_submit(struct i915_request *request)
     352                 :            : {
     353                 :          0 :         struct intel_engine_cs *engine = request->engine;
     354                 :          0 :         bool result = false;
     355                 :            : 
     356                 :          0 :         RQ_TRACE(request, "\n");
     357                 :            : 
     358                 :          0 :         GEM_BUG_ON(!irqs_disabled());
     359                 :          0 :         lockdep_assert_held(&engine->active.lock);
     360                 :            : 
     361                 :            :         /*
     362                 :            :          * With the advent of preempt-to-busy, we frequently encounter
     363                 :            :          * requests that we have unsubmitted from HW, but left running
     364                 :            :          * until the next ack and so have completed in the meantime. On
     365                 :            :          * resubmission of that completed request, we can skip
     366                 :            :          * updating the payload, and execlists can even skip submitting
     367                 :            :          * the request.
     368                 :            :          *
     369                 :            :          * We must remove the request from the caller's priority queue,
     370                 :            :          * and the caller must only call us when the request is in their
     371                 :            :          * priority queue, under the active.lock. This ensures that the
     372                 :            :          * request has *not* yet been retired and we can safely move
     373                 :            :          * the request into the engine->active.list where it will be
     374                 :            :          * dropped upon retiring. (Otherwise if resubmit a *retired*
     375                 :            :          * request, this would be a horrible use-after-free.)
     376                 :            :          */
     377         [ #  # ]:          0 :         if (i915_request_completed(request))
     378                 :          0 :                 goto xfer;
     379                 :            : 
     380         [ #  # ]:          0 :         if (intel_context_is_banned(request->context))
     381                 :          0 :                 i915_request_skip(request, -EIO);
     382                 :            : 
     383                 :            :         /*
     384                 :            :          * Are we using semaphores when the gpu is already saturated?
     385                 :            :          *
     386                 :            :          * Using semaphores incurs a cost in having the GPU poll a
     387                 :            :          * memory location, busywaiting for it to change. The continual
     388                 :            :          * memory reads can have a noticeable impact on the rest of the
     389                 :            :          * system with the extra bus traffic, stalling the cpu as it too
     390                 :            :          * tries to access memory across the bus (perf stat -e bus-cycles).
     391                 :            :          *
     392                 :            :          * If we installed a semaphore on this request and we only submit
     393                 :            :          * the request after the signaler completed, that indicates the
     394                 :            :          * system is overloaded and using semaphores at this time only
     395                 :            :          * increases the amount of work we are doing. If so, we disable
     396                 :            :          * further use of semaphores until we are idle again, whence we
     397                 :            :          * optimistically try again.
     398                 :            :          */
     399   [ #  #  #  # ]:          0 :         if (request->sched.semaphores &&
     400                 :            :             i915_sw_fence_signaled(&request->semaphore))
     401                 :          0 :                 engine->saturated |= request->sched.semaphores;
     402                 :            : 
     403                 :          0 :         engine->emit_fini_breadcrumb(request,
     404                 :          0 :                                      request->ring->vaddr + request->postfix);
     405                 :            : 
     406                 :          0 :         trace_i915_request_execute(request);
     407                 :          0 :         engine->serial++;
     408                 :          0 :         result = true;
     409                 :            : 
     410                 :          0 : xfer:   /* We may be recursing from the signal callback of another i915 fence */
     411                 :          0 :         spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
     412                 :            : 
     413         [ #  # ]:          0 :         if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) {
     414                 :          0 :                 list_move_tail(&request->sched.link, &engine->active.requests);
     415                 :          0 :                 clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
     416                 :            :         }
     417                 :            : 
     418   [ #  #  #  # ]:          0 :         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
     419         [ #  # ]:          0 :             !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
     420                 :          0 :             !i915_request_enable_breadcrumb(request))
     421                 :          0 :                 intel_engine_signal_breadcrumbs(engine);
     422                 :            : 
     423         [ #  # ]:          0 :         __notify_execute_cb(request);
     424                 :            : 
     425                 :          0 :         spin_unlock(&request->lock);
     426                 :            : 
     427                 :          0 :         return result;
     428                 :            : }
     429                 :            : 
     430                 :          0 : void i915_request_submit(struct i915_request *request)
     431                 :            : {
     432                 :          0 :         struct intel_engine_cs *engine = request->engine;
     433                 :          0 :         unsigned long flags;
     434                 :            : 
     435                 :            :         /* Will be called from irq-context when using foreign fences. */
     436                 :          0 :         spin_lock_irqsave(&engine->active.lock, flags);
     437                 :            : 
     438                 :          0 :         __i915_request_submit(request);
     439                 :            : 
     440                 :          0 :         spin_unlock_irqrestore(&engine->active.lock, flags);
     441                 :          0 : }
     442                 :            : 
     443                 :          0 : void __i915_request_unsubmit(struct i915_request *request)
     444                 :            : {
     445                 :          0 :         struct intel_engine_cs *engine = request->engine;
     446                 :            : 
     447                 :          0 :         RQ_TRACE(request, "\n");
     448                 :            : 
     449                 :          0 :         GEM_BUG_ON(!irqs_disabled());
     450                 :          0 :         lockdep_assert_held(&engine->active.lock);
     451                 :            : 
     452                 :            :         /*
     453                 :            :          * Only unwind in reverse order, required so that the per-context list
     454                 :            :          * is kept in seqno/ring order.
     455                 :            :          */
     456                 :            : 
     457                 :            :         /* We may be recursing from the signal callback of another i915 fence */
     458                 :          0 :         spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
     459                 :            : 
     460         [ #  # ]:          0 :         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
     461                 :          0 :                 i915_request_cancel_breadcrumb(request);
     462                 :            : 
     463                 :          0 :         GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
     464                 :          0 :         clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
     465                 :            : 
     466                 :          0 :         spin_unlock(&request->lock);
     467                 :            : 
     468                 :            :         /* We've already spun, don't charge on resubmitting. */
     469   [ #  #  #  # ]:          0 :         if (request->sched.semaphores && i915_request_started(request)) {
     470                 :          0 :                 request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE;
     471                 :          0 :                 request->sched.semaphores = 0;
     472                 :            :         }
     473                 :            : 
     474                 :            :         /*
     475                 :            :          * We don't need to wake_up any waiters on request->execute, they
     476                 :            :          * will get woken by any other event or us re-adding this request
     477                 :            :          * to the engine timeline (__i915_request_submit()). The waiters
     478                 :            :          * should be quite adapt at finding that the request now has a new
     479                 :            :          * global_seqno to the one they went to sleep on.
     480                 :            :          */
     481                 :          0 : }
     482                 :            : 
     483                 :          0 : void i915_request_unsubmit(struct i915_request *request)
     484                 :            : {
     485                 :          0 :         struct intel_engine_cs *engine = request->engine;
     486                 :          0 :         unsigned long flags;
     487                 :            : 
     488                 :            :         /* Will be called from irq-context when using foreign fences. */
     489                 :          0 :         spin_lock_irqsave(&engine->active.lock, flags);
     490                 :            : 
     491                 :          0 :         __i915_request_unsubmit(request);
     492                 :            : 
     493                 :          0 :         spin_unlock_irqrestore(&engine->active.lock, flags);
     494                 :          0 : }
     495                 :            : 
     496                 :            : static int __i915_sw_fence_call
     497                 :          0 : submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
     498                 :            : {
     499                 :          0 :         struct i915_request *request =
     500                 :          0 :                 container_of(fence, typeof(*request), submit);
     501                 :            : 
     502      [ #  #  # ]:          0 :         switch (state) {
     503                 :            :         case FENCE_COMPLETE:
     504         [ #  # ]:          0 :                 trace_i915_request_submit(request);
     505                 :            : 
     506         [ #  # ]:          0 :                 if (unlikely(fence->error))
     507                 :          0 :                         i915_request_skip(request, fence->error);
     508                 :            : 
     509                 :            :                 /*
     510                 :            :                  * We need to serialize use of the submit_request() callback
     511                 :            :                  * with its hotplugging performed during an emergency
     512                 :            :                  * i915_gem_set_wedged().  We use the RCU mechanism to mark the
     513                 :            :                  * critical section in order to force i915_gem_set_wedged() to
     514                 :            :                  * wait until the submit_request() is completed before
     515                 :            :                  * proceeding.
     516                 :            :                  */
     517                 :          0 :                 rcu_read_lock();
     518                 :          0 :                 request->engine->submit_request(request);
     519                 :          0 :                 rcu_read_unlock();
     520                 :            :                 break;
     521                 :            : 
     522                 :            :         case FENCE_FREE:
     523                 :          0 :                 i915_request_put(request);
     524                 :            :                 break;
     525                 :            :         }
     526                 :            : 
     527                 :          0 :         return NOTIFY_DONE;
     528                 :            : }
     529                 :            : 
     530                 :          0 : static void irq_semaphore_cb(struct irq_work *wrk)
     531                 :            : {
     532                 :          0 :         struct i915_request *rq =
     533                 :          0 :                 container_of(wrk, typeof(*rq), semaphore_work);
     534                 :            : 
     535                 :          0 :         i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE);
     536                 :          0 :         i915_request_put(rq);
     537                 :          0 : }
     538                 :            : 
     539                 :            : static int __i915_sw_fence_call
     540                 :          0 : semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
     541                 :            : {
     542                 :          0 :         struct i915_request *rq = container_of(fence, typeof(*rq), semaphore);
     543                 :            : 
     544      [ #  #  # ]:          0 :         switch (state) {
     545                 :            :         case FENCE_COMPLETE:
     546         [ #  # ]:          0 :                 if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) {
     547         [ #  # ]:          0 :                         i915_request_get(rq);
     548                 :          0 :                         init_irq_work(&rq->semaphore_work, irq_semaphore_cb);
     549                 :          0 :                         irq_work_queue(&rq->semaphore_work);
     550                 :            :                 }
     551                 :            :                 break;
     552                 :            : 
     553                 :            :         case FENCE_FREE:
     554                 :          0 :                 i915_request_put(rq);
     555                 :            :                 break;
     556                 :            :         }
     557                 :            : 
     558                 :          0 :         return NOTIFY_DONE;
     559                 :            : }
     560                 :            : 
     561                 :          0 : static void retire_requests(struct intel_timeline *tl)
     562                 :            : {
     563                 :          0 :         struct i915_request *rq, *rn;
     564                 :            : 
     565         [ #  # ]:          0 :         list_for_each_entry_safe(rq, rn, &tl->requests, link)
     566         [ #  # ]:          0 :                 if (!i915_request_retire(rq))
     567                 :            :                         break;
     568                 :          0 : }
     569                 :            : 
     570                 :            : static noinline struct i915_request *
     571                 :          0 : request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
     572                 :            : {
     573                 :          0 :         struct i915_request *rq;
     574                 :            : 
     575         [ #  # ]:          0 :         if (list_empty(&tl->requests))
     576                 :          0 :                 goto out;
     577                 :            : 
     578         [ #  # ]:          0 :         if (!gfpflags_allow_blocking(gfp))
     579                 :          0 :                 goto out;
     580                 :            : 
     581                 :            :         /* Move our oldest request to the slab-cache (if not in use!) */
     582                 :          0 :         rq = list_first_entry(&tl->requests, typeof(*rq), link);
     583                 :          0 :         i915_request_retire(rq);
     584                 :            : 
     585                 :          0 :         rq = kmem_cache_alloc(global.slab_requests,
     586                 :            :                               gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
     587         [ #  # ]:          0 :         if (rq)
     588                 :            :                 return rq;
     589                 :            : 
     590                 :            :         /* Ratelimit ourselves to prevent oom from malicious clients */
     591                 :          0 :         rq = list_last_entry(&tl->requests, typeof(*rq), link);
     592                 :          0 :         cond_synchronize_rcu(rq->rcustate);
     593                 :            : 
     594                 :            :         /* Retire our old requests in the hope that we free some */
     595                 :          0 :         retire_requests(tl);
     596                 :            : 
     597                 :          0 : out:
     598                 :          0 :         return kmem_cache_alloc(global.slab_requests, gfp);
     599                 :            : }
     600                 :            : 
     601                 :          0 : static void __i915_request_ctor(void *arg)
     602                 :            : {
     603                 :          0 :         struct i915_request *rq = arg;
     604                 :            : 
     605                 :          0 :         spin_lock_init(&rq->lock);
     606                 :          0 :         i915_sched_node_init(&rq->sched);
     607                 :          0 :         i915_sw_fence_init(&rq->submit, submit_notify);
     608                 :          0 :         i915_sw_fence_init(&rq->semaphore, semaphore_notify);
     609                 :            : 
     610                 :          0 :         dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0);
     611                 :            : 
     612                 :          0 :         rq->file_priv = NULL;
     613                 :          0 :         rq->capture_list = NULL;
     614                 :            : 
     615                 :          0 :         INIT_LIST_HEAD(&rq->execute_cb);
     616                 :          0 : }
     617                 :            : 
     618                 :            : struct i915_request *
     619                 :          0 : __i915_request_create(struct intel_context *ce, gfp_t gfp)
     620                 :            : {
     621                 :          0 :         struct intel_timeline *tl = ce->timeline;
     622                 :          0 :         struct i915_request *rq;
     623                 :          0 :         u32 seqno;
     624                 :          0 :         int ret;
     625                 :            : 
     626         [ #  # ]:          0 :         might_sleep_if(gfpflags_allow_blocking(gfp));
     627                 :            : 
     628                 :            :         /* Check that the caller provided an already pinned context */
     629                 :          0 :         __intel_context_pin(ce);
     630                 :            : 
     631                 :            :         /*
     632                 :            :          * Beware: Dragons be flying overhead.
     633                 :            :          *
     634                 :            :          * We use RCU to look up requests in flight. The lookups may
     635                 :            :          * race with the request being allocated from the slab freelist.
     636                 :            :          * That is the request we are writing to here, may be in the process
     637                 :            :          * of being read by __i915_active_request_get_rcu(). As such,
     638                 :            :          * we have to be very careful when overwriting the contents. During
     639                 :            :          * the RCU lookup, we change chase the request->engine pointer,
     640                 :            :          * read the request->global_seqno and increment the reference count.
     641                 :            :          *
     642                 :            :          * The reference count is incremented atomically. If it is zero,
     643                 :            :          * the lookup knows the request is unallocated and complete. Otherwise,
     644                 :            :          * it is either still in use, or has been reallocated and reset
     645                 :            :          * with dma_fence_init(). This increment is safe for release as we
     646                 :            :          * check that the request we have a reference to and matches the active
     647                 :            :          * request.
     648                 :            :          *
     649                 :            :          * Before we increment the refcount, we chase the request->engine
     650                 :            :          * pointer. We must not call kmem_cache_zalloc() or else we set
     651                 :            :          * that pointer to NULL and cause a crash during the lookup. If
     652                 :            :          * we see the request is completed (based on the value of the
     653                 :            :          * old engine and seqno), the lookup is complete and reports NULL.
     654                 :            :          * If we decide the request is not completed (new engine or seqno),
     655                 :            :          * then we grab a reference and double check that it is still the
     656                 :            :          * active request - which it won't be and restart the lookup.
     657                 :            :          *
     658                 :            :          * Do not use kmem_cache_zalloc() here!
     659                 :            :          */
     660                 :          0 :         rq = kmem_cache_alloc(global.slab_requests,
     661                 :            :                               gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
     662         [ #  # ]:          0 :         if (unlikely(!rq)) {
     663                 :          0 :                 rq = request_alloc_slow(tl, gfp);
     664         [ #  # ]:          0 :                 if (!rq) {
     665                 :          0 :                         ret = -ENOMEM;
     666                 :          0 :                         goto err_unreserve;
     667                 :            :                 }
     668                 :            :         }
     669                 :            : 
     670                 :          0 :         rq->i915 = ce->engine->i915;
     671                 :          0 :         rq->context = ce;
     672                 :          0 :         rq->engine = ce->engine;
     673                 :          0 :         rq->ring = ce->ring;
     674                 :          0 :         rq->execution_mask = ce->engine->mask;
     675                 :            : 
     676                 :          0 :         kref_init(&rq->fence.refcount);
     677                 :          0 :         rq->fence.flags = 0;
     678                 :          0 :         rq->fence.error = 0;
     679                 :          0 :         INIT_LIST_HEAD(&rq->fence.cb_list);
     680                 :            : 
     681                 :          0 :         ret = intel_timeline_get_seqno(tl, rq, &seqno);
     682         [ #  # ]:          0 :         if (ret)
     683                 :          0 :                 goto err_free;
     684                 :            : 
     685                 :          0 :         rq->fence.context = tl->fence_context;
     686                 :          0 :         rq->fence.seqno = seqno;
     687                 :            : 
     688                 :          0 :         RCU_INIT_POINTER(rq->timeline, tl);
     689                 :          0 :         RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline);
     690                 :          0 :         rq->hwsp_seqno = tl->hwsp_seqno;
     691                 :            : 
     692                 :          0 :         rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
     693                 :            : 
     694                 :            :         /* We bump the ref for the fence chain */
     695         [ #  # ]:          0 :         i915_sw_fence_reinit(&i915_request_get(rq)->submit);
     696         [ #  # ]:          0 :         i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
     697                 :            : 
     698                 :          0 :         i915_sched_node_reinit(&rq->sched);
     699                 :            : 
     700                 :            :         /* No zalloc, everything must be cleared after use */
     701                 :          0 :         rq->batch = NULL;
     702                 :          0 :         GEM_BUG_ON(rq->file_priv);
     703                 :          0 :         GEM_BUG_ON(rq->capture_list);
     704                 :          0 :         GEM_BUG_ON(!list_empty(&rq->execute_cb));
     705                 :            : 
     706                 :            :         /*
     707                 :            :          * Reserve space in the ring buffer for all the commands required to
     708                 :            :          * eventually emit this request. This is to guarantee that the
     709                 :            :          * i915_request_add() call can't fail. Note that the reserve may need
     710                 :            :          * to be redone if the request is not actually submitted straight
     711                 :            :          * away, e.g. because a GPU scheduler has deferred it.
     712                 :            :          *
     713                 :            :          * Note that due to how we add reserved_space to intel_ring_begin()
     714                 :            :          * we need to double our request to ensure that if we need to wrap
     715                 :            :          * around inside i915_request_add() there is sufficient space at
     716                 :            :          * the beginning of the ring as well.
     717                 :            :          */
     718                 :          0 :         rq->reserved_space =
     719                 :          0 :                 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
     720                 :            : 
     721                 :            :         /*
     722                 :            :          * Record the position of the start of the request so that
     723                 :            :          * should we detect the updated seqno part-way through the
     724                 :            :          * GPU processing the request, we never over-estimate the
     725                 :            :          * position of the head.
     726                 :            :          */
     727                 :          0 :         rq->head = rq->ring->emit;
     728                 :            : 
     729                 :          0 :         ret = rq->engine->request_alloc(rq);
     730         [ #  # ]:          0 :         if (ret)
     731                 :          0 :                 goto err_unwind;
     732                 :            : 
     733                 :          0 :         rq->infix = rq->ring->emit; /* end of header; start of user payload */
     734                 :            : 
     735                 :          0 :         intel_context_mark_active(ce);
     736                 :          0 :         list_add_tail_rcu(&rq->link, &tl->requests);
     737                 :            : 
     738                 :          0 :         return rq;
     739                 :            : 
     740                 :            : err_unwind:
     741                 :          0 :         ce->ring->emit = rq->head;
     742                 :            : 
     743                 :            :         /* Make sure we didn't add ourselves to external state before freeing */
     744                 :          0 :         GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
     745                 :          0 :         GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
     746                 :            : 
     747                 :          0 : err_free:
     748                 :          0 :         kmem_cache_free(global.slab_requests, rq);
     749                 :          0 : err_unreserve:
     750                 :          0 :         intel_context_unpin(ce);
     751                 :          0 :         return ERR_PTR(ret);
     752                 :            : }
     753                 :            : 
     754                 :            : struct i915_request *
     755                 :          0 : i915_request_create(struct intel_context *ce)
     756                 :            : {
     757                 :          0 :         struct i915_request *rq;
     758                 :          0 :         struct intel_timeline *tl;
     759                 :            : 
     760                 :          0 :         tl = intel_context_timeline_lock(ce);
     761         [ #  # ]:          0 :         if (IS_ERR(tl))
     762                 :            :                 return ERR_CAST(tl);
     763                 :            : 
     764                 :            :         /* Move our oldest request to the slab-cache (if not in use!) */
     765                 :          0 :         rq = list_first_entry(&tl->requests, typeof(*rq), link);
     766         [ #  # ]:          0 :         if (!list_is_last(&rq->link, &tl->requests))
     767                 :          0 :                 i915_request_retire(rq);
     768                 :            : 
     769         [ #  # ]:          0 :         intel_context_enter(ce);
     770                 :          0 :         rq = __i915_request_create(ce, GFP_KERNEL);
     771         [ #  # ]:          0 :         intel_context_exit(ce); /* active reference transferred to request */
     772         [ #  # ]:          0 :         if (IS_ERR(rq))
     773                 :          0 :                 goto err_unlock;
     774                 :            : 
     775                 :            :         /* Check that we do not interrupt ourselves with a new request */
     776                 :            :         rq->cookie = lockdep_pin_lock(&tl->mutex);
     777                 :            : 
     778                 :            :         return rq;
     779                 :            : 
     780                 :            : err_unlock:
     781                 :          0 :         intel_context_timeline_unlock(tl);
     782                 :          0 :         return rq;
     783                 :            : }
     784                 :            : 
     785                 :            : static int
     786                 :          0 : i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
     787                 :            : {
     788                 :          0 :         struct dma_fence *fence;
     789                 :          0 :         int err;
     790                 :            : 
     791         [ #  # ]:          0 :         if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline))
     792                 :            :                 return 0;
     793                 :            : 
     794         [ #  # ]:          0 :         if (i915_request_started(signal))
     795                 :            :                 return 0;
     796                 :            : 
     797                 :          0 :         fence = NULL;
     798                 :          0 :         rcu_read_lock();
     799                 :          0 :         spin_lock_irq(&signal->lock);
     800                 :          0 :         do {
     801                 :          0 :                 struct list_head *pos = READ_ONCE(signal->link.prev);
     802                 :          0 :                 struct i915_request *prev;
     803                 :            : 
     804                 :            :                 /* Confirm signal has not been retired, the link is valid */
     805         [ #  # ]:          0 :                 if (unlikely(i915_request_started(signal)))
     806                 :            :                         break;
     807                 :            : 
     808                 :            :                 /* Is signal the earliest request on its timeline? */
     809         [ #  # ]:          0 :                 if (pos == &rcu_dereference(signal->timeline)->requests)
     810                 :            :                         break;
     811                 :            : 
     812                 :            :                 /*
     813                 :            :                  * Peek at the request before us in the timeline. That
     814                 :            :                  * request will only be valid before it is retired, so
     815                 :            :                  * after acquiring a reference to it, confirm that it is
     816                 :            :                  * still part of the signaler's timeline.
     817                 :            :                  */
     818                 :          0 :                 prev = list_entry(pos, typeof(*prev), link);
     819         [ #  # ]:          0 :                 if (!i915_request_get_rcu(prev))
     820                 :            :                         break;
     821                 :            : 
     822                 :            :                 /* After the strong barrier, confirm prev is still attached */
     823         [ #  # ]:          0 :                 if (unlikely(READ_ONCE(prev->link.next) != &signal->link)) {
     824                 :          0 :                         i915_request_put(prev);
     825                 :            :                         break;
     826                 :            :                 }
     827                 :            : 
     828                 :            :                 fence = &prev->fence;
     829                 :          0 :         } while (0);
     830                 :          0 :         spin_unlock_irq(&signal->lock);
     831                 :          0 :         rcu_read_unlock();
     832         [ #  # ]:          0 :         if (!fence)
     833                 :            :                 return 0;
     834                 :            : 
     835                 :          0 :         err = 0;
     836         [ #  # ]:          0 :         if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
     837                 :          0 :                 err = i915_sw_fence_await_dma_fence(&rq->submit,
     838                 :            :                                                     fence, 0,
     839                 :            :                                                     I915_FENCE_GFP);
     840                 :          0 :         dma_fence_put(fence);
     841                 :            : 
     842                 :          0 :         return err;
     843                 :            : }
     844                 :            : 
     845                 :            : static intel_engine_mask_t
     846                 :          0 : already_busywaiting(struct i915_request *rq)
     847                 :            : {
     848                 :            :         /*
     849                 :            :          * Polling a semaphore causes bus traffic, delaying other users of
     850                 :            :          * both the GPU and CPU. We want to limit the impact on others,
     851                 :            :          * while taking advantage of early submission to reduce GPU
     852                 :            :          * latency. Therefore we restrict ourselves to not using more
     853                 :            :          * than one semaphore from each source, and not using a semaphore
     854                 :            :          * if we have detected the engine is saturated (i.e. would not be
     855                 :            :          * submitted early and cause bus traffic reading an already passed
     856                 :            :          * semaphore).
     857                 :            :          *
     858                 :            :          * See the are-we-too-late? check in __i915_request_submit().
     859                 :            :          */
     860                 :          0 :         return rq->sched.semaphores | rq->engine->saturated;
     861                 :            : }
     862                 :            : 
     863                 :            : static int
     864                 :          0 : __emit_semaphore_wait(struct i915_request *to,
     865                 :            :                       struct i915_request *from,
     866                 :            :                       u32 seqno)
     867                 :            : {
     868                 :          0 :         const int has_token = INTEL_GEN(to->i915) >= 12;
     869                 :          0 :         u32 hwsp_offset;
     870                 :          0 :         int len, err;
     871                 :          0 :         u32 *cs;
     872                 :            : 
     873                 :          0 :         GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
     874                 :            : 
     875                 :            :         /* We need to pin the signaler's HWSP until we are finished reading. */
     876                 :          0 :         err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
     877         [ #  # ]:          0 :         if (err)
     878                 :            :                 return err;
     879                 :            : 
     880                 :          0 :         len = 4;
     881         [ #  # ]:          0 :         if (has_token)
     882                 :          0 :                 len += 2;
     883                 :            : 
     884                 :          0 :         cs = intel_ring_begin(to, len);
     885         [ #  # ]:          0 :         if (IS_ERR(cs))
     886                 :          0 :                 return PTR_ERR(cs);
     887                 :            : 
     888                 :            :         /*
     889                 :            :          * Using greater-than-or-equal here means we have to worry
     890                 :            :          * about seqno wraparound. To side step that issue, we swap
     891                 :            :          * the timeline HWSP upon wrapping, so that everyone listening
     892                 :            :          * for the old (pre-wrap) values do not see the much smaller
     893                 :            :          * (post-wrap) values than they were expecting (and so wait
     894                 :            :          * forever).
     895                 :            :          */
     896                 :          0 :         *cs++ = (MI_SEMAPHORE_WAIT |
     897                 :            :                  MI_SEMAPHORE_GLOBAL_GTT |
     898                 :            :                  MI_SEMAPHORE_POLL |
     899                 :          0 :                  MI_SEMAPHORE_SAD_GTE_SDD) +
     900                 :            :                 has_token;
     901                 :          0 :         *cs++ = seqno;
     902                 :          0 :         *cs++ = hwsp_offset;
     903                 :          0 :         *cs++ = 0;
     904         [ #  # ]:          0 :         if (has_token) {
     905                 :          0 :                 *cs++ = 0;
     906                 :          0 :                 *cs++ = MI_NOOP;
     907                 :            :         }
     908                 :            : 
     909                 :            :         intel_ring_advance(to, cs);
     910                 :            :         return 0;
     911                 :            : }
     912                 :            : 
     913                 :            : static int
     914                 :          0 : emit_semaphore_wait(struct i915_request *to,
     915                 :            :                     struct i915_request *from,
     916                 :            :                     gfp_t gfp)
     917                 :            : {
     918                 :            :         /* Just emit the first semaphore we see as request space is limited. */
     919         [ #  # ]:          0 :         if (already_busywaiting(to) & from->engine->mask)
     920                 :          0 :                 goto await_fence;
     921                 :            : 
     922         [ #  # ]:          0 :         if (i915_request_await_start(to, from) < 0)
     923                 :          0 :                 goto await_fence;
     924                 :            : 
     925                 :            :         /* Only submit our spinner after the signaler is running! */
     926         [ #  # ]:          0 :         if (__await_execution(to, from, NULL, gfp))
     927                 :          0 :                 goto await_fence;
     928                 :            : 
     929         [ #  # ]:          0 :         if (__emit_semaphore_wait(to, from, from->fence.seqno))
     930                 :          0 :                 goto await_fence;
     931                 :            : 
     932                 :          0 :         to->sched.semaphores |= from->engine->mask;
     933                 :          0 :         to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
     934                 :          0 :         return 0;
     935                 :            : 
     936                 :          0 : await_fence:
     937                 :          0 :         return i915_sw_fence_await_dma_fence(&to->submit,
     938                 :            :                                              &from->fence, 0,
     939                 :            :                                              I915_FENCE_GFP);
     940                 :            : }
     941                 :            : 
     942                 :            : static int
     943                 :          0 : i915_request_await_request(struct i915_request *to, struct i915_request *from)
     944                 :            : {
     945                 :          0 :         int ret;
     946                 :            : 
     947                 :          0 :         GEM_BUG_ON(to == from);
     948                 :          0 :         GEM_BUG_ON(to->timeline == from->timeline);
     949                 :            : 
     950         [ #  # ]:          0 :         if (i915_request_completed(from))
     951                 :            :                 return 0;
     952                 :            : 
     953         [ #  # ]:          0 :         if (to->engine->schedule) {
     954                 :          0 :                 ret = i915_sched_node_add_dependency(&to->sched, &from->sched);
     955         [ #  # ]:          0 :                 if (ret < 0)
     956                 :            :                         return ret;
     957                 :            :         }
     958                 :            : 
     959         [ #  # ]:          0 :         if (to->engine == from->engine)
     960                 :          0 :                 ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
     961                 :            :                                                        &from->submit,
     962                 :            :                                                        I915_FENCE_GFP);
     963         [ #  # ]:          0 :         else if (intel_context_use_semaphores(to->context))
     964                 :          0 :                 ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
     965                 :            :         else
     966                 :          0 :                 ret = i915_sw_fence_await_dma_fence(&to->submit,
     967                 :            :                                                     &from->fence, 0,
     968                 :            :                                                     I915_FENCE_GFP);
     969         [ #  # ]:          0 :         if (ret < 0)
     970                 :            :                 return ret;
     971                 :            : 
     972         [ #  # ]:          0 :         if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) {
     973                 :          0 :                 ret = i915_sw_fence_await_dma_fence(&to->semaphore,
     974                 :            :                                                     &from->fence, 0,
     975                 :            :                                                     I915_FENCE_GFP);
     976                 :          0 :                 if (ret < 0)
     977                 :            :                         return ret;
     978                 :            :         }
     979                 :            : 
     980                 :            :         return 0;
     981                 :            : }
     982                 :            : 
     983                 :            : int
     984                 :          0 : i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
     985                 :            : {
     986                 :          0 :         struct dma_fence **child = &fence;
     987                 :          0 :         unsigned int nchild = 1;
     988                 :          0 :         int ret;
     989                 :            : 
     990                 :            :         /*
     991                 :            :          * Note that if the fence-array was created in signal-on-any mode,
     992                 :            :          * we should *not* decompose it into its individual fences. However,
     993                 :            :          * we don't currently store which mode the fence-array is operating
     994                 :            :          * in. Fortunately, the only user of signal-on-any is private to
     995                 :            :          * amdgpu and we should not see any incoming fence-array from
     996                 :            :          * sync-file being in signal-on-any mode.
     997                 :            :          */
     998         [ #  # ]:          0 :         if (dma_fence_is_array(fence)) {
     999                 :          0 :                 struct dma_fence_array *array = to_dma_fence_array(fence);
    1000                 :            : 
    1001                 :          0 :                 child = array->fences;
    1002                 :          0 :                 nchild = array->num_fences;
    1003                 :          0 :                 GEM_BUG_ON(!nchild);
    1004                 :            :         }
    1005                 :            : 
    1006                 :          0 :         do {
    1007                 :          0 :                 fence = *child++;
    1008         [ #  # ]:          0 :                 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
    1009                 :          0 :                         i915_sw_fence_set_error_once(&rq->submit, fence->error);
    1010                 :          0 :                         continue;
    1011                 :            :                 }
    1012                 :            : 
    1013                 :            :                 /*
    1014                 :            :                  * Requests on the same timeline are explicitly ordered, along
    1015                 :            :                  * with their dependencies, by i915_request_add() which ensures
    1016                 :            :                  * that requests are submitted in-order through each ring.
    1017                 :            :                  */
    1018         [ #  # ]:          0 :                 if (fence->context == rq->fence.context)
    1019                 :          0 :                         continue;
    1020                 :            : 
    1021                 :            :                 /* Squash repeated waits to the same timelines */
    1022   [ #  #  #  # ]:          0 :                 if (fence->context &&
    1023                 :          0 :                     intel_timeline_sync_is_later(i915_request_timeline(rq),
    1024                 :            :                                                  fence))
    1025                 :          0 :                         continue;
    1026                 :            : 
    1027         [ #  # ]:          0 :                 if (dma_fence_is_i915(fence))
    1028                 :          0 :                         ret = i915_request_await_request(rq, to_request(fence));
    1029                 :            :                 else
    1030                 :          0 :                         ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
    1031         [ #  # ]:          0 :                                                             fence->context ? I915_FENCE_TIMEOUT : 0,
    1032                 :            :                                                             I915_FENCE_GFP);
    1033         [ #  # ]:          0 :                 if (ret < 0)
    1034                 :          0 :                         return ret;
    1035                 :            : 
    1036                 :            :                 /* Record the latest fence used against each timeline */
    1037         [ #  # ]:          0 :                 if (fence->context)
    1038                 :          0 :                         intel_timeline_sync_set(i915_request_timeline(rq),
    1039                 :            :                                                 fence);
    1040         [ #  # ]:          0 :         } while (--nchild);
    1041                 :            : 
    1042                 :            :         return 0;
    1043                 :            : }
    1044                 :            : 
    1045                 :          0 : static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
    1046                 :            :                                           struct dma_fence *fence)
    1047                 :            : {
    1048                 :          0 :         return __intel_timeline_sync_is_later(tl,
    1049                 :            :                                               fence->context,
    1050                 :            :                                               fence->seqno - 1);
    1051                 :            : }
    1052                 :            : 
    1053                 :          0 : static int intel_timeline_sync_set_start(struct intel_timeline *tl,
    1054                 :            :                                          const struct dma_fence *fence)
    1055                 :            : {
    1056                 :          0 :         return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
    1057                 :            : }
    1058                 :            : 
    1059                 :            : static int
    1060                 :          0 : __i915_request_await_execution(struct i915_request *to,
    1061                 :            :                                struct i915_request *from,
    1062                 :            :                                void (*hook)(struct i915_request *rq,
    1063                 :            :                                             struct dma_fence *signal))
    1064                 :            : {
    1065                 :          0 :         int err;
    1066                 :            : 
    1067                 :            :         /* Submit both requests at the same time */
    1068                 :          0 :         err = __await_execution(to, from, hook, I915_FENCE_GFP);
    1069         [ #  # ]:          0 :         if (err)
    1070                 :            :                 return err;
    1071                 :            : 
    1072                 :            :         /* Squash repeated depenendices to the same timelines */
    1073         [ #  # ]:          0 :         if (intel_timeline_sync_has_start(i915_request_timeline(to),
    1074                 :            :                                           &from->fence))
    1075                 :            :                 return 0;
    1076                 :            : 
    1077                 :            :         /* Ensure both start together [after all semaphores in signal] */
    1078         [ #  # ]:          0 :         if (intel_engine_has_semaphores(to->engine))
    1079                 :          0 :                 err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
    1080                 :            :         else
    1081                 :          0 :                 err = i915_request_await_start(to, from);
    1082         [ #  # ]:          0 :         if (err < 0)
    1083                 :            :                 return err;
    1084                 :            : 
    1085                 :            :         /* Couple the dependency tree for PI on this exposed to->fence */
    1086         [ #  # ]:          0 :         if (to->engine->schedule) {
    1087                 :          0 :                 err = i915_sched_node_add_dependency(&to->sched, &from->sched);
    1088         [ #  # ]:          0 :                 if (err < 0)
    1089                 :            :                         return err;
    1090                 :            :         }
    1091                 :            : 
    1092                 :          0 :         return intel_timeline_sync_set_start(i915_request_timeline(to),
    1093                 :            :                                              &from->fence);
    1094                 :            : }
    1095                 :            : 
    1096                 :            : int
    1097                 :          0 : i915_request_await_execution(struct i915_request *rq,
    1098                 :            :                              struct dma_fence *fence,
    1099                 :            :                              void (*hook)(struct i915_request *rq,
    1100                 :            :                                           struct dma_fence *signal))
    1101                 :            : {
    1102                 :          0 :         struct dma_fence **child = &fence;
    1103                 :          0 :         unsigned int nchild = 1;
    1104                 :          0 :         int ret;
    1105                 :            : 
    1106         [ #  # ]:          0 :         if (dma_fence_is_array(fence)) {
    1107                 :          0 :                 struct dma_fence_array *array = to_dma_fence_array(fence);
    1108                 :            : 
    1109                 :            :                 /* XXX Error for signal-on-any fence arrays */
    1110                 :            : 
    1111                 :          0 :                 child = array->fences;
    1112                 :          0 :                 nchild = array->num_fences;
    1113                 :          0 :                 GEM_BUG_ON(!nchild);
    1114                 :            :         }
    1115                 :            : 
    1116                 :          0 :         do {
    1117                 :          0 :                 fence = *child++;
    1118         [ #  # ]:          0 :                 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
    1119                 :          0 :                         i915_sw_fence_set_error_once(&rq->submit, fence->error);
    1120                 :          0 :                         continue;
    1121                 :            :                 }
    1122                 :            : 
    1123                 :            :                 /*
    1124                 :            :                  * We don't squash repeated fence dependencies here as we
    1125                 :            :                  * want to run our callback in all cases.
    1126                 :            :                  */
    1127                 :            : 
    1128         [ #  # ]:          0 :                 if (dma_fence_is_i915(fence))
    1129                 :          0 :                         ret = __i915_request_await_execution(rq,
    1130                 :            :                                                              to_request(fence),
    1131                 :            :                                                              hook);
    1132                 :            :                 else
    1133                 :          0 :                         ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
    1134                 :            :                                                             I915_FENCE_TIMEOUT,
    1135                 :            :                                                             GFP_KERNEL);
    1136         [ #  # ]:          0 :                 if (ret < 0)
    1137                 :          0 :                         return ret;
    1138         [ #  # ]:          0 :         } while (--nchild);
    1139                 :            : 
    1140                 :            :         return 0;
    1141                 :            : }
    1142                 :            : 
    1143                 :            : /**
    1144                 :            :  * i915_request_await_object - set this request to (async) wait upon a bo
    1145                 :            :  * @to: request we are wishing to use
    1146                 :            :  * @obj: object which may be in use on another ring.
    1147                 :            :  * @write: whether the wait is on behalf of a writer
    1148                 :            :  *
    1149                 :            :  * This code is meant to abstract object synchronization with the GPU.
    1150                 :            :  * Conceptually we serialise writes between engines inside the GPU.
    1151                 :            :  * We only allow one engine to write into a buffer at any time, but
    1152                 :            :  * multiple readers. To ensure each has a coherent view of memory, we must:
    1153                 :            :  *
    1154                 :            :  * - If there is an outstanding write request to the object, the new
    1155                 :            :  *   request must wait for it to complete (either CPU or in hw, requests
    1156                 :            :  *   on the same ring will be naturally ordered).
    1157                 :            :  *
    1158                 :            :  * - If we are a write request (pending_write_domain is set), the new
    1159                 :            :  *   request must wait for outstanding read requests to complete.
    1160                 :            :  *
    1161                 :            :  * Returns 0 if successful, else propagates up the lower layer error.
    1162                 :            :  */
    1163                 :            : int
    1164                 :          0 : i915_request_await_object(struct i915_request *to,
    1165                 :            :                           struct drm_i915_gem_object *obj,
    1166                 :            :                           bool write)
    1167                 :            : {
    1168                 :          0 :         struct dma_fence *excl;
    1169                 :          0 :         int ret = 0;
    1170                 :            : 
    1171         [ #  # ]:          0 :         if (write) {
    1172                 :          0 :                 struct dma_fence **shared;
    1173                 :          0 :                 unsigned int count, i;
    1174                 :            : 
    1175                 :          0 :                 ret = dma_resv_get_fences_rcu(obj->base.resv,
    1176                 :            :                                                         &excl, &count, &shared);
    1177         [ #  # ]:          0 :                 if (ret)
    1178                 :          0 :                         return ret;
    1179                 :            : 
    1180         [ #  # ]:          0 :                 for (i = 0; i < count; i++) {
    1181                 :          0 :                         ret = i915_request_await_dma_fence(to, shared[i]);
    1182         [ #  # ]:          0 :                         if (ret)
    1183                 :            :                                 break;
    1184                 :            : 
    1185                 :          0 :                         dma_fence_put(shared[i]);
    1186                 :            :                 }
    1187                 :            : 
    1188         [ #  # ]:          0 :                 for (; i < count; i++)
    1189                 :          0 :                         dma_fence_put(shared[i]);
    1190                 :          0 :                 kfree(shared);
    1191                 :            :         } else {
    1192         [ #  # ]:          0 :                 excl = dma_resv_get_excl_rcu(obj->base.resv);
    1193                 :            :         }
    1194                 :            : 
    1195         [ #  # ]:          0 :         if (excl) {
    1196         [ #  # ]:          0 :                 if (ret == 0)
    1197                 :          0 :                         ret = i915_request_await_dma_fence(to, excl);
    1198                 :            : 
    1199                 :          0 :                 dma_fence_put(excl);
    1200                 :            :         }
    1201                 :            : 
    1202                 :            :         return ret;
    1203                 :            : }
    1204                 :            : 
    1205                 :          0 : void i915_request_skip(struct i915_request *rq, int error)
    1206                 :            : {
    1207                 :          0 :         void *vaddr = rq->ring->vaddr;
    1208                 :          0 :         u32 head;
    1209                 :            : 
    1210                 :          0 :         GEM_BUG_ON(!IS_ERR_VALUE((long)error));
    1211                 :          0 :         dma_fence_set_error(&rq->fence, error);
    1212                 :            : 
    1213         [ #  # ]:          0 :         if (rq->infix == rq->postfix)
    1214                 :            :                 return;
    1215                 :            : 
    1216                 :            :         /*
    1217                 :            :          * As this request likely depends on state from the lost
    1218                 :            :          * context, clear out all the user operations leaving the
    1219                 :            :          * breadcrumb at the end (so we get the fence notifications).
    1220                 :            :          */
    1221                 :          0 :         head = rq->infix;
    1222         [ #  # ]:          0 :         if (rq->postfix < head) {
    1223                 :          0 :                 memset(vaddr + head, 0, rq->ring->size - head);
    1224                 :          0 :                 head = 0;
    1225                 :            :         }
    1226                 :          0 :         memset(vaddr + head, 0, rq->postfix - head);
    1227                 :          0 :         rq->infix = rq->postfix;
    1228                 :            : }
    1229                 :            : 
    1230                 :            : static struct i915_request *
    1231                 :          0 : __i915_request_add_to_timeline(struct i915_request *rq)
    1232                 :            : {
    1233                 :          0 :         struct intel_timeline *timeline = i915_request_timeline(rq);
    1234                 :          0 :         struct i915_request *prev;
    1235                 :            : 
    1236                 :            :         /*
    1237                 :            :          * Dependency tracking and request ordering along the timeline
    1238                 :            :          * is special cased so that we can eliminate redundant ordering
    1239                 :            :          * operations while building the request (we know that the timeline
    1240                 :            :          * itself is ordered, and here we guarantee it).
    1241                 :            :          *
    1242                 :            :          * As we know we will need to emit tracking along the timeline,
    1243                 :            :          * we embed the hooks into our request struct -- at the cost of
    1244                 :            :          * having to have specialised no-allocation interfaces (which will
    1245                 :            :          * be beneficial elsewhere).
    1246                 :            :          *
    1247                 :            :          * A second benefit to open-coding i915_request_await_request is
    1248                 :            :          * that we can apply a slight variant of the rules specialised
    1249                 :            :          * for timelines that jump between engines (such as virtual engines).
    1250                 :            :          * If we consider the case of virtual engine, we must emit a dma-fence
    1251                 :            :          * to prevent scheduling of the second request until the first is
    1252                 :            :          * complete (to maximise our greedy late load balancing) and this
    1253                 :            :          * precludes optimising to use semaphores serialisation of a single
    1254                 :            :          * timeline across engines.
    1255                 :            :          */
    1256                 :          0 :         prev = to_request(__i915_active_fence_set(&timeline->last_request,
    1257                 :            :                                                   &rq->fence));
    1258   [ #  #  #  # ]:          0 :         if (prev && !i915_request_completed(prev)) {
    1259   [ #  #  #  # ]:          0 :                 if (is_power_of_2(prev->engine->mask | rq->engine->mask))
    1260                 :          0 :                         i915_sw_fence_await_sw_fence(&rq->submit,
    1261                 :            :                                                      &prev->submit,
    1262                 :            :                                                      &rq->submitq);
    1263                 :            :                 else
    1264                 :          0 :                         __i915_sw_fence_await_dma_fence(&rq->submit,
    1265                 :            :                                                         &prev->fence,
    1266                 :            :                                                         &rq->dmaq);
    1267         [ #  # ]:          0 :                 if (rq->engine->schedule)
    1268                 :          0 :                         __i915_sched_node_add_dependency(&rq->sched,
    1269                 :            :                                                          &prev->sched,
    1270                 :            :                                                          &rq->dep,
    1271                 :            :                                                          0);
    1272                 :            :         }
    1273                 :            : 
    1274                 :            :         /*
    1275                 :            :          * Make sure that no request gazumped us - if it was allocated after
    1276                 :            :          * our i915_request_alloc() and called __i915_request_add() before
    1277                 :            :          * us, the timeline will hold its seqno which is later than ours.
    1278                 :            :          */
    1279                 :          0 :         GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
    1280                 :            : 
    1281                 :          0 :         return prev;
    1282                 :            : }
    1283                 :            : 
    1284                 :            : /*
    1285                 :            :  * NB: This function is not allowed to fail. Doing so would mean the the
    1286                 :            :  * request is not being tracked for completion but the work itself is
    1287                 :            :  * going to happen on the hardware. This would be a Bad Thing(tm).
    1288                 :            :  */
    1289                 :          0 : struct i915_request *__i915_request_commit(struct i915_request *rq)
    1290                 :            : {
    1291                 :          0 :         struct intel_engine_cs *engine = rq->engine;
    1292                 :          0 :         struct intel_ring *ring = rq->ring;
    1293                 :          0 :         u32 *cs;
    1294                 :            : 
    1295                 :          0 :         RQ_TRACE(rq, "\n");
    1296                 :            : 
    1297                 :            :         /*
    1298                 :            :          * To ensure that this call will not fail, space for its emissions
    1299                 :            :          * should already have been reserved in the ring buffer. Let the ring
    1300                 :            :          * know that it is time to use that space up.
    1301                 :            :          */
    1302                 :          0 :         GEM_BUG_ON(rq->reserved_space > ring->space);
    1303                 :          0 :         rq->reserved_space = 0;
    1304                 :          0 :         rq->emitted_jiffies = jiffies;
    1305                 :            : 
    1306                 :            :         /*
    1307                 :            :          * Record the position of the start of the breadcrumb so that
    1308                 :            :          * should we detect the updated seqno part-way through the
    1309                 :            :          * GPU processing the request, we never over-estimate the
    1310                 :            :          * position of the ring's HEAD.
    1311                 :            :          */
    1312                 :          0 :         cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
    1313                 :          0 :         GEM_BUG_ON(IS_ERR(cs));
    1314                 :          0 :         rq->postfix = intel_ring_offset(rq, cs);
    1315                 :            : 
    1316                 :          0 :         return __i915_request_add_to_timeline(rq);
    1317                 :            : }
    1318                 :            : 
    1319                 :          0 : void __i915_request_queue(struct i915_request *rq,
    1320                 :            :                           const struct i915_sched_attr *attr)
    1321                 :            : {
    1322                 :            :         /*
    1323                 :            :          * Let the backend know a new request has arrived that may need
    1324                 :            :          * to adjust the existing execution schedule due to a high priority
    1325                 :            :          * request - i.e. we may want to preempt the current request in order
    1326                 :            :          * to run a high priority dependency chain *before* we can execute this
    1327                 :            :          * request.
    1328                 :            :          *
    1329                 :            :          * This is called before the request is ready to run so that we can
    1330                 :            :          * decide whether to preempt the entire chain so that it is ready to
    1331                 :            :          * run at the earliest possible convenience.
    1332                 :            :          */
    1333   [ #  #  #  # ]:          0 :         if (attr && rq->engine->schedule)
    1334                 :          0 :                 rq->engine->schedule(rq, attr);
    1335                 :          0 :         i915_sw_fence_commit(&rq->semaphore);
    1336                 :          0 :         i915_sw_fence_commit(&rq->submit);
    1337                 :          0 : }
    1338                 :            : 
    1339                 :          0 : void i915_request_add(struct i915_request *rq)
    1340                 :            : {
    1341                 :          0 :         struct intel_timeline * const tl = i915_request_timeline(rq);
    1342                 :          0 :         struct i915_sched_attr attr = {};
    1343                 :          0 :         struct i915_request *prev;
    1344                 :            : 
    1345                 :          0 :         lockdep_assert_held(&tl->mutex);
    1346                 :          0 :         lockdep_unpin_lock(&tl->mutex, rq->cookie);
    1347                 :            : 
    1348                 :          0 :         trace_i915_request_add(rq);
    1349                 :            : 
    1350                 :          0 :         prev = __i915_request_commit(rq);
    1351                 :            : 
    1352         [ #  # ]:          0 :         if (rcu_access_pointer(rq->context->gem_context))
    1353                 :          0 :                 attr = i915_request_gem_context(rq)->sched;
    1354                 :            : 
    1355                 :            :         /*
    1356                 :            :          * Boost actual workloads past semaphores!
    1357                 :            :          *
    1358                 :            :          * With semaphores we spin on one engine waiting for another,
    1359                 :            :          * simply to reduce the latency of starting our work when
    1360                 :            :          * the signaler completes. However, if there is any other
    1361                 :            :          * work that we could be doing on this engine instead, that
    1362                 :            :          * is better utilisation and will reduce the overall duration
    1363                 :            :          * of the current work. To avoid PI boosting a semaphore
    1364                 :            :          * far in the distance past over useful work, we keep a history
    1365                 :            :          * of any semaphore use along our dependency chain.
    1366                 :            :          */
    1367         [ #  # ]:          0 :         if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
    1368                 :          0 :                 attr.priority |= I915_PRIORITY_NOSEMAPHORE;
    1369                 :            : 
    1370                 :            :         /*
    1371                 :            :          * Boost priorities to new clients (new request flows).
    1372                 :            :          *
    1373                 :            :          * Allow interactive/synchronous clients to jump ahead of
    1374                 :            :          * the bulk clients. (FQ_CODEL)
    1375                 :            :          */
    1376         [ #  # ]:          0 :         if (list_empty(&rq->sched.signalers_list))
    1377                 :          0 :                 attr.priority |= I915_PRIORITY_WAIT;
    1378                 :            : 
    1379                 :          0 :         local_bh_disable();
    1380                 :          0 :         __i915_request_queue(rq, &attr);
    1381                 :          0 :         local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
    1382                 :            : 
    1383                 :            :         /*
    1384                 :            :          * In typical scenarios, we do not expect the previous request on
    1385                 :            :          * the timeline to be still tracked by timeline->last_request if it
    1386                 :            :          * has been completed. If the completed request is still here, that
    1387                 :            :          * implies that request retirement is a long way behind submission,
    1388                 :            :          * suggesting that we haven't been retiring frequently enough from
    1389                 :            :          * the combination of retire-before-alloc, waiters and the background
    1390                 :            :          * retirement worker. So if the last request on this timeline was
    1391                 :            :          * already completed, do a catch up pass, flushing the retirement queue
    1392                 :            :          * up to this client. Since we have now moved the heaviest operations
    1393                 :            :          * during retirement onto secondary workers, such as freeing objects
    1394                 :            :          * or contexts, retiring a bunch of requests is mostly list management
    1395                 :            :          * (and cache misses), and so we should not be overly penalizing this
    1396                 :            :          * client by performing excess work, though we may still performing
    1397                 :            :          * work on behalf of others -- but instead we should benefit from
    1398                 :            :          * improved resource management. (Well, that's the theory at least.)
    1399                 :            :          */
    1400   [ #  #  #  # ]:          0 :         if (prev &&
    1401         [ #  # ]:          0 :             i915_request_completed(prev) &&
    1402         [ #  # ]:          0 :             rcu_access_pointer(prev->timeline) == tl)
    1403                 :          0 :                 i915_request_retire_upto(prev);
    1404                 :            : 
    1405                 :          0 :         mutex_unlock(&tl->mutex);
    1406                 :          0 : }
    1407                 :            : 
    1408                 :          0 : static unsigned long local_clock_us(unsigned int *cpu)
    1409                 :            : {
    1410                 :          0 :         unsigned long t;
    1411                 :            : 
    1412                 :            :         /*
    1413                 :            :          * Cheaply and approximately convert from nanoseconds to microseconds.
    1414                 :            :          * The result and subsequent calculations are also defined in the same
    1415                 :            :          * approximate microseconds units. The principal source of timing
    1416                 :            :          * error here is from the simple truncation.
    1417                 :            :          *
    1418                 :            :          * Note that local_clock() is only defined wrt to the current CPU;
    1419                 :            :          * the comparisons are no longer valid if we switch CPUs. Instead of
    1420                 :            :          * blocking preemption for the entire busywait, we can detect the CPU
    1421                 :            :          * switch and use that as indicator of system load and a reason to
    1422                 :            :          * stop busywaiting, see busywait_stop().
    1423                 :            :          */
    1424                 :          0 :         *cpu = get_cpu();
    1425                 :          0 :         t = local_clock() >> 10;
    1426                 :          0 :         put_cpu();
    1427                 :            : 
    1428                 :          0 :         return t;
    1429                 :            : }
    1430                 :            : 
    1431                 :          0 : static bool busywait_stop(unsigned long timeout, unsigned int cpu)
    1432                 :            : {
    1433                 :          0 :         unsigned int this_cpu;
    1434                 :            : 
    1435         [ #  # ]:          0 :         if (time_after(local_clock_us(&this_cpu), timeout))
    1436                 :            :                 return true;
    1437                 :            : 
    1438                 :          0 :         return this_cpu != cpu;
    1439                 :            : }
    1440                 :            : 
    1441                 :          0 : static bool __i915_spin_request(const struct i915_request * const rq,
    1442                 :            :                                 int state, unsigned long timeout_us)
    1443                 :            : {
    1444                 :          0 :         unsigned int cpu;
    1445                 :            : 
    1446                 :            :         /*
    1447                 :            :          * Only wait for the request if we know it is likely to complete.
    1448                 :            :          *
    1449                 :            :          * We don't track the timestamps around requests, nor the average
    1450                 :            :          * request length, so we do not have a good indicator that this
    1451                 :            :          * request will complete within the timeout. What we do know is the
    1452                 :            :          * order in which requests are executed by the context and so we can
    1453                 :            :          * tell if the request has been started. If the request is not even
    1454                 :            :          * running yet, it is a fair assumption that it will not complete
    1455                 :            :          * within our relatively short timeout.
    1456                 :            :          */
    1457         [ #  # ]:          0 :         if (!i915_request_is_running(rq))
    1458                 :            :                 return false;
    1459                 :            : 
    1460                 :            :         /*
    1461                 :            :          * When waiting for high frequency requests, e.g. during synchronous
    1462                 :            :          * rendering split between the CPU and GPU, the finite amount of time
    1463                 :            :          * required to set up the irq and wait upon it limits the response
    1464                 :            :          * rate. By busywaiting on the request completion for a short while we
    1465                 :            :          * can service the high frequency waits as quick as possible. However,
    1466                 :            :          * if it is a slow request, we want to sleep as quickly as possible.
    1467                 :            :          * The tradeoff between waiting and sleeping is roughly the time it
    1468                 :            :          * takes to sleep on a request, on the order of a microsecond.
    1469                 :            :          */
    1470                 :            : 
    1471                 :          0 :         timeout_us += local_clock_us(&cpu);
    1472                 :          0 :         do {
    1473         [ #  # ]:          0 :                 if (i915_request_completed(rq))
    1474                 :            :                         return true;
    1475                 :            : 
    1476         [ #  # ]:          0 :                 if (signal_pending_state(state, current))
    1477                 :            :                         break;
    1478                 :            : 
    1479         [ #  # ]:          0 :                 if (busywait_stop(timeout_us, cpu))
    1480                 :            :                         break;
    1481                 :            : 
    1482                 :          0 :                 cpu_relax();
    1483         [ #  # ]:          0 :         } while (!need_resched());
    1484                 :            : 
    1485                 :            :         return false;
    1486                 :            : }
    1487                 :            : 
    1488                 :            : struct request_wait {
    1489                 :            :         struct dma_fence_cb cb;
    1490                 :            :         struct task_struct *tsk;
    1491                 :            : };
    1492                 :            : 
    1493                 :          0 : static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
    1494                 :            : {
    1495                 :          0 :         struct request_wait *wait = container_of(cb, typeof(*wait), cb);
    1496                 :            : 
    1497                 :          0 :         wake_up_process(wait->tsk);
    1498                 :          0 : }
    1499                 :            : 
    1500                 :            : /**
    1501                 :            :  * i915_request_wait - wait until execution of request has finished
    1502                 :            :  * @rq: the request to wait upon
    1503                 :            :  * @flags: how to wait
    1504                 :            :  * @timeout: how long to wait in jiffies
    1505                 :            :  *
    1506                 :            :  * i915_request_wait() waits for the request to be completed, for a
    1507                 :            :  * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
    1508                 :            :  * unbounded wait).
    1509                 :            :  *
    1510                 :            :  * Returns the remaining time (in jiffies) if the request completed, which may
    1511                 :            :  * be zero or -ETIME if the request is unfinished after the timeout expires.
    1512                 :            :  * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
    1513                 :            :  * pending before the request completes.
    1514                 :            :  */
    1515                 :          0 : long i915_request_wait(struct i915_request *rq,
    1516                 :            :                        unsigned int flags,
    1517                 :            :                        long timeout)
    1518                 :            : {
    1519                 :          0 :         const int state = flags & I915_WAIT_INTERRUPTIBLE ?
    1520                 :          0 :                 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
    1521                 :          0 :         struct request_wait wait;
    1522                 :            : 
    1523                 :          0 :         might_sleep();
    1524                 :          0 :         GEM_BUG_ON(timeout < 0);
    1525                 :            : 
    1526         [ #  # ]:          0 :         if (dma_fence_is_signaled(&rq->fence))
    1527                 :            :                 return timeout;
    1528                 :            : 
    1529         [ #  # ]:          0 :         if (!timeout)
    1530                 :            :                 return -ETIME;
    1531                 :            : 
    1532                 :          0 :         trace_i915_request_wait_begin(rq, flags);
    1533                 :            : 
    1534                 :            :         /*
    1535                 :            :          * We must never wait on the GPU while holding a lock as we
    1536                 :            :          * may need to perform a GPU reset. So while we don't need to
    1537                 :            :          * serialise wait/reset with an explicit lock, we do want
    1538                 :            :          * lockdep to detect potential dependency cycles.
    1539                 :            :          */
    1540                 :          0 :         mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_);
    1541                 :            : 
    1542                 :            :         /*
    1543                 :            :          * Optimistic spin before touching IRQs.
    1544                 :            :          *
    1545                 :            :          * We may use a rather large value here to offset the penalty of
    1546                 :            :          * switching away from the active task. Frequently, the client will
    1547                 :            :          * wait upon an old swapbuffer to throttle itself to remain within a
    1548                 :            :          * frame of the gpu. If the client is running in lockstep with the gpu,
    1549                 :            :          * then it should not be waiting long at all, and a sleep now will incur
    1550                 :            :          * extra scheduler latency in producing the next frame. To try to
    1551                 :            :          * avoid adding the cost of enabling/disabling the interrupt to the
    1552                 :            :          * short wait, we first spin to see if the request would have completed
    1553                 :            :          * in the time taken to setup the interrupt.
    1554                 :            :          *
    1555                 :            :          * We need upto 5us to enable the irq, and upto 20us to hide the
    1556                 :            :          * scheduler latency of a context switch, ignoring the secondary
    1557                 :            :          * impacts from a context switch such as cache eviction.
    1558                 :            :          *
    1559                 :            :          * The scheme used for low-latency IO is called "hybrid interrupt
    1560                 :            :          * polling". The suggestion there is to sleep until just before you
    1561                 :            :          * expect to be woken by the device interrupt and then poll for its
    1562                 :            :          * completion. That requires having a good predictor for the request
    1563                 :            :          * duration, which we currently lack.
    1564                 :            :          */
    1565         [ #  # ]:          0 :         if (IS_ACTIVE(CONFIG_DRM_I915_SPIN_REQUEST) &&
    1566                 :          0 :             __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) {
    1567                 :          0 :                 dma_fence_signal(&rq->fence);
    1568                 :          0 :                 goto out;
    1569                 :            :         }
    1570                 :            : 
    1571                 :            :         /*
    1572                 :            :          * This client is about to stall waiting for the GPU. In many cases
    1573                 :            :          * this is undesirable and limits the throughput of the system, as
    1574                 :            :          * many clients cannot continue processing user input/output whilst
    1575                 :            :          * blocked. RPS autotuning may take tens of milliseconds to respond
    1576                 :            :          * to the GPU load and thus incurs additional latency for the client.
    1577                 :            :          * We can circumvent that by promoting the GPU frequency to maximum
    1578                 :            :          * before we sleep. This makes the GPU throttle up much more quickly
    1579                 :            :          * (good for benchmarks and user experience, e.g. window animations),
    1580                 :            :          * but at a cost of spending more power processing the workload
    1581                 :            :          * (bad for battery).
    1582                 :            :          */
    1583         [ #  # ]:          0 :         if (flags & I915_WAIT_PRIORITY) {
    1584   [ #  #  #  # ]:          0 :                 if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
    1585                 :          0 :                         intel_rps_boost(rq);
    1586                 :          0 :                 i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
    1587                 :            :         }
    1588                 :            : 
    1589                 :          0 :         wait.tsk = current;
    1590         [ #  # ]:          0 :         if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
    1591                 :          0 :                 goto out;
    1592                 :            : 
    1593                 :          0 :         for (;;) {
    1594                 :          0 :                 set_current_state(state);
    1595                 :            : 
    1596         [ #  # ]:          0 :                 if (i915_request_completed(rq)) {
    1597                 :          0 :                         dma_fence_signal(&rq->fence);
    1598                 :          0 :                         break;
    1599                 :            :                 }
    1600                 :            : 
    1601         [ #  # ]:          0 :                 if (signal_pending_state(state, current)) {
    1602                 :            :                         timeout = -ERESTARTSYS;
    1603                 :            :                         break;
    1604                 :            :                 }
    1605                 :            : 
    1606         [ #  # ]:          0 :                 if (!timeout) {
    1607                 :            :                         timeout = -ETIME;
    1608                 :            :                         break;
    1609                 :            :                 }
    1610                 :            : 
    1611                 :          0 :                 intel_engine_flush_submission(rq->engine);
    1612                 :          0 :                 timeout = io_schedule_timeout(timeout);
    1613                 :            :         }
    1614                 :          0 :         __set_current_state(TASK_RUNNING);
    1615                 :            : 
    1616                 :          0 :         dma_fence_remove_callback(&rq->fence, &wait.cb);
    1617                 :            : 
    1618                 :          0 : out:
    1619                 :          0 :         mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
    1620                 :          0 :         trace_i915_request_wait_end(rq);
    1621                 :          0 :         return timeout;
    1622                 :            : }
    1623                 :            : 
    1624                 :            : #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
    1625                 :            : #include "selftests/mock_request.c"
    1626                 :            : #include "selftests/i915_request.c"
    1627                 :            : #endif
    1628                 :            : 
    1629                 :          0 : static void i915_global_request_shrink(void)
    1630                 :            : {
    1631                 :          0 :         kmem_cache_shrink(global.slab_dependencies);
    1632                 :          0 :         kmem_cache_shrink(global.slab_execute_cbs);
    1633                 :          0 :         kmem_cache_shrink(global.slab_requests);
    1634                 :          0 : }
    1635                 :            : 
    1636                 :          0 : static void i915_global_request_exit(void)
    1637                 :            : {
    1638                 :          0 :         kmem_cache_destroy(global.slab_dependencies);
    1639                 :          0 :         kmem_cache_destroy(global.slab_execute_cbs);
    1640                 :          0 :         kmem_cache_destroy(global.slab_requests);
    1641                 :          0 : }
    1642                 :            : 
    1643                 :            : static struct i915_global_request global = { {
    1644                 :            :         .shrink = i915_global_request_shrink,
    1645                 :            :         .exit = i915_global_request_exit,
    1646                 :            : } };
    1647                 :            : 
    1648                 :         21 : int __init i915_global_request_init(void)
    1649                 :            : {
    1650                 :         42 :         global.slab_requests =
    1651                 :         21 :                 kmem_cache_create("i915_request",
    1652                 :            :                                   sizeof(struct i915_request),
    1653                 :            :                                   __alignof__(struct i915_request),
    1654                 :            :                                   SLAB_HWCACHE_ALIGN |
    1655                 :            :                                   SLAB_RECLAIM_ACCOUNT |
    1656                 :            :                                   SLAB_TYPESAFE_BY_RCU,
    1657                 :            :                                   __i915_request_ctor);
    1658         [ +  - ]:         21 :         if (!global.slab_requests)
    1659                 :            :                 return -ENOMEM;
    1660                 :            : 
    1661                 :         21 :         global.slab_execute_cbs = KMEM_CACHE(execute_cb,
    1662                 :            :                                              SLAB_HWCACHE_ALIGN |
    1663                 :            :                                              SLAB_RECLAIM_ACCOUNT |
    1664                 :            :                                              SLAB_TYPESAFE_BY_RCU);
    1665         [ -  + ]:         21 :         if (!global.slab_execute_cbs)
    1666                 :          0 :                 goto err_requests;
    1667                 :            : 
    1668                 :         21 :         global.slab_dependencies = KMEM_CACHE(i915_dependency,
    1669                 :            :                                               SLAB_HWCACHE_ALIGN |
    1670                 :            :                                               SLAB_RECLAIM_ACCOUNT);
    1671         [ -  + ]:         21 :         if (!global.slab_dependencies)
    1672                 :          0 :                 goto err_execute_cbs;
    1673                 :            : 
    1674                 :         21 :         i915_global_register(&global.base);
    1675                 :         21 :         return 0;
    1676                 :            : 
    1677                 :            : err_execute_cbs:
    1678                 :          0 :         kmem_cache_destroy(global.slab_execute_cbs);
    1679                 :          0 : err_requests:
    1680                 :          0 :         kmem_cache_destroy(global.slab_requests);
    1681                 :          0 :         return -ENOMEM;
    1682                 :            : }

Generated by: LCOV version 1.14